def __init__(self, params, input_size, output_embedder, model, token_predictor): self.lstms = du.create_multilayer_lstm_params( params.decoder_num_layers, input_size, params.decoder_state_size, model, "LSTM-d") self.token_predictor = token_predictor self.output_embedder = output_embedder self.start_token_embedding = du.add_params( model, (params.output_embedding_size, ), "y-0")
def __init__( self, params, input_vocabulary, output_vocabulary, anonymizer): self.params = params self._pc = dy.ParameterCollection() if params.new_version: self.controller = Controller(output_vocabulary) else: self.controller = None # Create the input embeddings self.input_embedder = Embedder(self._pc, params.input_embedding_size, name="input-embedding", vocabulary=input_vocabulary, anonymizer=anonymizer) # Create the output embeddings self.output_embedder = Embedder(self._pc, params.output_embedding_size, name="output-embedding", vocabulary=output_vocabulary, anonymizer=anonymizer) # Create the encoder encoder_input_size = params.input_embedding_size if params.discourse_level_lstm: encoder_input_size += params.encoder_state_size / 2 self.utterance_encoder = Encoder(params.encoder_num_layers, encoder_input_size, params.encoder_state_size, self._pc) # Positional embedder for utterances attention_key_size = params.encoder_state_size if params.state_positional_embeddings: attention_key_size += params.positional_embedding_size self.positional_embedder = Embedder( self._pc, params.positional_embedding_size, name="positional-embedding", num_tokens=params.maximum_utterances) # Create the discourse-level LSTM parameters if params.discourse_level_lstm: self.discourse_lstms = du.create_multilayer_lstm_params( 1, params.encoder_state_size, params.encoder_state_size / 2, self._pc, "LSTM-t") self.initial_discourse_state = du.add_params(self._pc, tuple( [params.encoder_state_size / 2]), "V-turn-state-0") # Snippet encoder final_snippet_size = 0 if params.use_snippets and not params.previous_decoder_snippet_encoding: snippet_encoding_size = int(params.encoder_state_size / 2) final_snippet_size = params.encoder_state_size if params.snippet_age_embedding: snippet_encoding_size -= int( params.snippet_age_embedding_size / 4) self.snippet_age_embedder = Embedder( self._pc, params.snippet_age_embedding_size, name="snippet-age-embedding", num_tokens=params.max_snippet_age_embedding) final_snippet_size = params.encoder_state_size \ + params.snippet_age_embedding_size / 2 self.snippet_encoder = Encoder(params.snippet_num_layers, params.output_embedding_size, snippet_encoding_size, self._pc) token_predictor = construct_token_predictor(self._pc, params, output_vocabulary, attention_key_size, final_snippet_size, anonymizer) # 注意:此处在input增加了decoder_state_size维度 self.decoder = SequencePredictor( params, params.output_embedding_size + attention_key_size + params.decoder_state_size, self.output_embedder, self._pc, token_predictor) self.trainer = dy.AdamTrainer( self._pc, alpha=params.initial_learning_rate) self.dropout = 0.
def __init__(self, num_layers, input_size, state_size, model): self.num_layers = num_layers self.forward_lstms = create_multilayer_lstm_params( \ self.num_layers, input_size, state_size / 2, model, "LSTM-ef") self.backward_lstms = create_multilayer_lstm_params( \ self.num_layers, input_size, state_size / 2, model, "LSTM-eb")