def __init__(self, model, params, vocabulary, attention_key_size): self.vocabulary = vocabulary self.attention_module = Attention(model, params.decoder_state_size, attention_key_size, attention_key_size) self.state_transform_weights = du.add_params( model, (params.decoder_state_size + attention_key_size, params.decoder_state_size), "weights-state-transform") self.vocabulary_weights = du.add_params( model, (params.decoder_state_size, len(vocabulary)), "weights-vocabulary") self.vocabulary_biases = du.add_params(model, tuple([len(vocabulary)]), "biases-vocabulary")
def __init__(self, params, input_size, output_embedder, model, token_predictor): self.decoder_state_size = params.decoder_state_size self.lstms = du.create_multilayer_lstm_params( params.decoder_num_layers, input_size, params.decoder_state_size, model, "LSTM-d") self.token_predictor = token_predictor self.output_embedder = output_embedder self.start_token_embedding = du.add_params(model, (params.output_embedding_size,), "y-0") self.last_decoder_states = [] self.pick_pos_param = du.add_params(model, (params.encoder_state_size, params.decoder_state_size), "pick")
def __init__(self, model, params, vocabulary, attention_key_size, snippet_size): TokenPredictor.__init__(self, model, params, vocabulary, attention_key_size) if snippet_size <= 0: raise ValueError("Snippet size must be greater than zero; was " \ + str(snippet_size)) self.snippet_weights = du.add_params( model, (params.decoder_state_size, snippet_size), "weights-snippet")
def __init__( self, params, input_vocabulary, output_vocabulary, anonymizer): self.params = params self._pc = dy.ParameterCollection() if params.new_version: self.controller = Controller(output_vocabulary) else: self.controller = None # Create the input embeddings self.input_embedder = Embedder(self._pc, params.input_embedding_size, name="input-embedding", vocabulary=input_vocabulary, anonymizer=anonymizer) # Create the output embeddings self.output_embedder = Embedder(self._pc, params.output_embedding_size, name="output-embedding", vocabulary=output_vocabulary, anonymizer=anonymizer) # Create the encoder encoder_input_size = params.input_embedding_size if params.discourse_level_lstm: encoder_input_size += params.encoder_state_size / 2 self.utterance_encoder = Encoder(params.encoder_num_layers, encoder_input_size, params.encoder_state_size, self._pc) # Positional embedder for utterances attention_key_size = params.encoder_state_size if params.state_positional_embeddings: attention_key_size += params.positional_embedding_size self.positional_embedder = Embedder( self._pc, params.positional_embedding_size, name="positional-embedding", num_tokens=params.maximum_utterances) # Create the discourse-level LSTM parameters if params.discourse_level_lstm: self.discourse_lstms = du.create_multilayer_lstm_params( 1, params.encoder_state_size, params.encoder_state_size / 2, self._pc, "LSTM-t") self.initial_discourse_state = du.add_params(self._pc, tuple( [params.encoder_state_size / 2]), "V-turn-state-0") # Snippet encoder final_snippet_size = 0 if params.use_snippets and not params.previous_decoder_snippet_encoding: snippet_encoding_size = int(params.encoder_state_size / 2) final_snippet_size = params.encoder_state_size if params.snippet_age_embedding: snippet_encoding_size -= int( params.snippet_age_embedding_size / 4) self.snippet_age_embedder = Embedder( self._pc, params.snippet_age_embedding_size, name="snippet-age-embedding", num_tokens=params.max_snippet_age_embedding) final_snippet_size = params.encoder_state_size \ + params.snippet_age_embedding_size / 2 self.snippet_encoder = Encoder(params.snippet_num_layers, params.output_embedding_size, snippet_encoding_size, self._pc) token_predictor = construct_token_predictor(self._pc, params, output_vocabulary, attention_key_size, final_snippet_size, anonymizer) # 注意:此处在input增加了decoder_state_size维度 self.decoder = SequencePredictor( params, params.output_embedding_size + attention_key_size + params.decoder_state_size, self.output_embedder, self._pc, token_predictor) self.trainer = dy.AdamTrainer( self._pc, alpha=params.initial_learning_rate) self.dropout = 0.
def __init__(self, model, query_size, key_size, value_size): self.key_size = key_size self.value_size = value_size self.query_weights = du.add_params(model, (query_size, self.key_size), "weights-attention-q")