def __init__(self, c2i, num_lstm_layers=-1,\ char_dim=-1, hidden_dim=-1, word_embedding_dim=-1, file=None): self.c2i = c2i self._model = dy.Model() #self._model = dy.ParameterCollection() if file == None: # Char LSTM Parameters self.char_lookup = self._model.add_lookup_parameters((len(c2i), char_dim)) self.char_fwd_lstm = dy.LSTMBuilder(num_lstm_layers, char_dim, hidden_dim, self._model) self.char_bwd_lstm = dy.LSTMBuilder(num_lstm_layers, char_dim, hidden_dim, self._model) # Post-LSTM Parameters self.lstm_to_rep_params = self._model.add_parameters((word_embedding_dim, hidden_dim * 2)) self.lstm_to_rep_bias = self._model.add_parameters(word_embedding_dim) self.mlp_out = self._model.add_parameters((word_embedding_dim, word_embedding_dim)) self.mlp_out_bias = self._model.add_parameters(word_embedding_dim) else: model_members = iter(self._model.load(file)) #pc2 = dy.ParameterCollection() #model_members = iter(dy.load(file, pc2)) self.char_lookup = model_members.next() self.char_fwd_lstm = model_members.next() self.char_bwd_lstm = model_members.next() self.lstm_to_rep_params = model_members.next() self.lstm_to_rep_bias = model_members.next() self.mlp_out = model_members.next() self.mlp_out_bias = model_members.next()
encoder_test_file, decoder_test_file = enc_file, dec_file encoder_dict, decoder_dict = word2idx_de, word2idx_en encoder_sentences = get_data(encoder_test_file) decoder_sentences = get_data(decoder_test_file) encoder_wids, _ = get_idx(encoder_sentences, encoder_dict) decoder_wids, total_dec_toks = get_idx(decoder_sentences, decoder_dict) return encoder_sentences, encoder_wids, decoder_sentences, decoder_wids, total_dec_toks _, valid_enc_ids, _, valid_dec, total_vtoks = prepare_test_data(valid_de_path, valid_en_path) if TRAIN_SWITCH: enc_fwd_lstm = dy.LSTMBuilder(no_layers, embedding_size, hidden_size, model) enc_bwd_lstm = dy.LSTMBuilder(no_layers, embedding_size, hidden_size, model) dec_lstm = dy.LSTMBuilder(no_layers, hidden_size*2+embedding_size, hidden_size, model) input_lookup = model.add_lookup_parameters((vocab_size_de, embedding_size)) attention_w1 = model.add_parameters( (attention_size, hidden_size*2)) attention_w2 = model.add_parameters( (attention_size, hidden_size*no_layers*2)) attention_v = model.add_parameters( (1, attention_size)) decoder_w = model.add_parameters( (vocab_size_en, 3*hidden_size)) decoder_b = model.add_parameters( (vocab_size_en)) output_lookup = model.add_lookup_parameters((vocab_size_en, embedding_size)) else: [enc_fwd_lstm, enc_bwd_lstm, dec_lstm, input_lookup, output_lookup, attention_w1, attention_w2, attention_v, decoder_w, decoder_b] = model.load(load_model_path) _, encoder_test_wids, _, _, _ = prepare_test_data(test_de_path, test_en_path) #_, encoder_test_wids, _, _, _ = prepare_test_data(valid_de_path, valid_en_path)
# validation_num_op_tokens += len(english_list) # for word in german_list: # if word in german_word_vocab.w2i.keys(): # indexed_german_list.append(german_word_vocab.w2i[word]) # else: # indexed_german_list.append(german_word_vocab.w2i["<UNK>"]) # for word in english_list: # if word in english_word_vocab.w2i.keys(): # indexed_eng_list.append(english_word_vocab.w2i[word]) # else: # indexed_eng_list.append(english_word_vocab.w2i["<UNK>"]) # indexed_valid.append((indexed_german_list, indexed_eng_list)) #Declare and define the enc-doc models model = dy.Model() enc_fwd_lstm = dy.LSTMBuilder(lstm_num_of_layers, embeddings_size, state_size, model) enc_bwd_lstm = dy.LSTMBuilder(lstm_num_of_layers, embeddings_size, state_size, model) dec_lstm = dy.LSTMBuilder(lstm_num_of_layers, ((state_size * 2) + embeddings_size), state_size, model) #Define the model parameters input_lookup = model.add_lookup_parameters((german_vocab_size, embeddings_size)) attention_w1 = model.add_parameters(((attention_size, (state_size * 2)))) attention_w2 = model.add_parameters(((attention_size, (state_size * lstm_num_of_layers * 2)))) attention_v = model.add_parameters((1, attention_size)) decoder_w = model.add_parameters((english_vocab_size, state_size + (state_size * 2))) decoder_b = model.add_parameters((english_vocab_size)) output_lookup = model.add_lookup_parameters((english_vocab_size, embeddings_size)) #Convert the input(german) sentence into its embedded form def embed_sentence(sentence): #print "In embed sentence"
VOCAB_SIZE_EN = len(word2id_en.keys()) VOCAB_SIZE_DE = len(word2id_de.keys()) LSTM_NUM_OF_LAYERS = 2 EMBEDDINGS_SIZE = 512 STATE_SIZE = 512 ATTENTION_SIZE = 256 BATCH_SIZE = 30 DROPOUT = 0.2 # In[11]: model = dy.Model() encoder_fwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) encoder_bwd_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, EMBEDDINGS_SIZE, STATE_SIZE, model) input_lookup = model.add_lookup_parameters((VOCAB_SIZE_DE, EMBEDDINGS_SIZE)) decoder_lstm = dy.LSTMBuilder(LSTM_NUM_OF_LAYERS, STATE_SIZE * 2 + EMBEDDINGS_SIZE, STATE_SIZE, model) attention_w1 = model.add_parameters((ATTENTION_SIZE, STATE_SIZE * 2)) attention_w2 = model.add_parameters( (ATTENTION_SIZE, STATE_SIZE * LSTM_NUM_OF_LAYERS * 2)) attention_v = model.add_parameters((1, ATTENTION_SIZE)) decoder_w = model.add_parameters((VOCAB_SIZE_EN, 3 * STATE_SIZE)) decoder_b = model.add_parameters((VOCAB_SIZE_EN))
def __init__(self, model, training_src, training_tgt, dev_src, dev_tgt, test_src, blind_src, mode='train', modelFileName='', dictFileName=''): if mode == 'train': self.model = model self.training = [(x, y) for (x, y) in zip(training_src, training_tgt)] self.training.sort(key=lambda x: -len(x[0])) self.training_batch = create_batch(self.training) self.dev = [(x, y) for (x, y) in zip(dev_src, dev_tgt)] self.dev.sort(key=lambda x: -len(x[0])) self.dev_batch = create_batch(self.dev) self.test = test_src self.blind = blind_src self.src_token_to_id, self.src_id_to_token = self._buildMap( training_src) self.tgt_token_to_id, self.tgt_id_to_token = self._buildMap( training_tgt) self.src_vocab_size = len(self.src_token_to_id) self.tgt_vocab_size = len(self.tgt_token_to_id) self.embed_size = 512 self.hidden_size = 512 self.attention_size = 128 self.layers = 1 self.max_len = 50 self.src_lookup = model.add_lookup_parameters( (self.src_vocab_size, self.embed_size)) self.tgt_lookup = model.add_lookup_parameters( (self.tgt_vocab_size, self.embed_size)) self.l2r_builder = dy.LSTMBuilder(self.layers, self.embed_size, self.hidden_size, model) self.l2r_builder.set_dropout(0.5) self.r2l_builder = dy.LSTMBuilder(self.layers, self.embed_size, self.hidden_size, model) self.r2l_builder.set_dropout(0.5) self.dec_builder = dy.LSTMBuilder( self.layers, self.embed_size + self.hidden_size * 2, self.hidden_size, model) self.dec_builder.set_dropout(0.5) self.W_y = model.add_parameters( (self.tgt_vocab_size, self.hidden_size)) self.b_y = model.add_parameters((self.tgt_vocab_size)) self.W1_att_f = model.add_parameters( (self.attention_size, self.hidden_size * 2)) self.W1_att_e = model.add_parameters( (self.attention_size, self.hidden_size)) self.w2_att = model.add_parameters((self.attention_size)) if mode == 'test': self.model = model self.load(modelFileName, dictFileName) self.test = test_src