def decode(self, input_vectors, output): tgt_toks = [self.tgt_vocab[tok] for tok in output] w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) s = self.dec_lstm.initial_state() s = s.add_input( dynet.concatenate([ input_vectors[-1], dynet.vecInput(self.args.hidden_dim * 2), dynet.vecInput(self.pronouncer.args.hidden_dim * 2) ])) loss = [] for tok in tgt_toks: out_vector = w * s.output() + b probs = dynet.softmax(out_vector) loss.append(-dynet.log(dynet.pick(probs, tok.i))) embed_vector = self.tgt_lookup[tok.i] attn_vector = self.attend(input_vectors, s) spelling = [ self.pronouncer.src_vocab[letter] for letter in tok.s.upper() ] embedded_spelling = self.pronouncer.embed_seq(spelling) pron_vector = self.pronouncer.encode_seq(embedded_spelling)[-1] fpv = dynet.nobackprop(pron_vector) inp = dynet.concatenate([embed_vector, attn_vector, fpv]) s = s.add_input(inp) loss = dynet.esum(loss) return loss
def run_one_doc(model, first_level, emb_doc, doc_labels, w_param, b_param): """ Runs the given model on one document and makes predictions. @params: first_level is I, O, or P, model is the LSTM model, emb_doc is a numpy array of embeddings for one document, doc_labels is a list of the labels associated with emb_doc, w_param is a Dynet parameter multiplied with the layer output, b_param is a Dynet parameter added to the product of output and w_param. @returns: pred_gold is a list of tuples in the form of (prediction, gold label) """ dy.renew_cg() s = model.initial_state() i = dy.vecInput(200) o = dy.vecInput(200) p = dy.vecInput(200) si = s.add_input(i) so = s.add_input(o) sp = s.add_input(p) pred_gold = [] for wdemb, label in zip(emb_doc, doc_labels): x = dy.inputVector(wdemb) if first_level == 'I': s2 = si.add_input(x) elif first_level == 'O': s2 = so.add_input(x) else: s2 = sp.add_input(x) out_class = dy.softmax((w_param * s2.output()) + b_param) chosen_class = np.argmax(out_class.npvalue()) pred_gold.append((int(chosen_class), int(label))) return pred_gold
def train(self, words, lemmas, gold, bad): dy.renew_cg() W = dy.parameter(self.pW) b = dy.parameter(self.pb) losses = [] gold_scores = [] bad_scores = [] for item in gold: lf, denotation = item[0], item[1] feature = self.extract_feature(words, lemmas, lf, denotation) feature_vec = dy.vecInput(self.nfeatures) feature_vec.set(feature) gold_scores.append(W * feature_vec + b) for item in bad: lf, denotation = item[0], item[1] feature = self.extract_feature(words, lemmas, lf, denotation) feature_vec = dy.vecInput(self.nfeatures) feature_vec.set(feature) bad_scores.append(W * feature_vec + b) log_prob = dy.log_softmax(dy.concatenate(gold_scores + bad_scores)) for i in range(len(gold_scores)): losses.append(dy.pick(log_prob, i)) return -dy.esum(losses)
def forward(self, tokens, parents, children, node_order, inds_for_loss): hs_up = [dy.vecInput(self.emb_size) for _ in range(len(tokens))] cs_up = [dy.vecInput(self.emb_size) for _ in range(len(tokens))] hs_dn = [dy.vecInput(self.emb_size) for _ in range(len(tokens))] cs_dn = [dy.vecInput(self.emb_size) for _ in range(len(tokens))] for node in node_order: h_ch = [hs_up[ch] for ch in children[node]] c_ch = [cs_up[ch] for ch in children[node]] h_, c_ = self.tree_lstm_up.state(self.emb[tokens[node]], h_ch, c_ch) hs_up[node] = h_ cs_up[node] = c_ for node in reversed(node_order): h_pa = [hs_dn[pa] for pa in parents[node]] c_pa = [cs_dn[pa] for pa in parents[node]] h_, c_ = self.tree_lstm_dn.state(self.emb[tokens[node]], h_pa, c_pa) hs_dn[node] = h_ cs_dn[node] = c_ hs_return = [ dy.affine_transform( [self.b, self.W, dy.concatenate([hs_up[i], hs_dn[i]])]) for i in inds_for_loss ] #cs_return = [dy.concatenate([cs_up[i], cs_dn[i]]) for i in inds_for_loss] return hs_return
def predict(self, word): hidden_size = 64 vocabulary_size = len(self.vocab_to_index) input_size = output_size = vocabulary_size m = dy.Model() W = m.add_parameters((hidden_size, input_size)) b = m.add_parameters(hidden_size) V = m.add_parameters((output_size, hidden_size)) # Softmax weights a = m.add_parameters(output_size) # Softmax bias x = dy.vecInput(input_size) y = dy.vecInput(output_size) h = dy.tanh((W * x) + b) output = dy.softmax(V * h) x.set(self.word_vectors[word]) probabilities = output.npvalue() predicted_word = np.random.choice(self.unique_words, p=(probabilities + 0.0002) / sum(probabilities + 0.0002)) return predicted_word
def build_model(first_level, model, emb_doc, doc_labels, w_param, b_param): """ Runs the model for training, calculating the loss. @params: first_level is I, O, or P, model is the LSTM model, emb_doc is a numpy array of embeddings for one document, doc_labels is a list of the labels associated with emb_doc, w_param is a Dynet parameter multiplied with the layer output, b_param is a Dynet parameter added to the product of output and w_param. @returns: the sum of the errors computed for the document """ dy.renew_cg() s = model.initial_state() i = dy.vecInput(200) o = dy.vecInput(200) p = dy.vecInput(200) si = s.add_input(i) so = s.add_input(o) sp = s.add_input(p) loss = [] for wdemb, label in zip(emb_doc, doc_labels): x = dy.inputVector(wdemb) dy.noise(x, 0.5) #noise for student model if first_level == 'I': s2 = si.add_input(x) elif first_level == 'O': s2 = so.add_input(x) else: s2 = sp.add_input(x) loss.append( dy.pickneglogsoftmax((w_param * s2.output()) + b_param, label)) return dy.esum(loss)
def decode( vectors, output, decode_char=True ): #if char --> decode into characters to produce tgt form; else decode into msd's to produce tag sequence #if not decode_char: pdb.set_trace() output = [EOS] + list(output) + [EOS] if decode_char: x2id = char2id output_x_lookup = output_lookup w = decoder_w b = decoder_b w1 = attention_w1 x_lstm = dec_lstm input_mat = dy.concatenate_cols(vectors) else: x2id = msd2id_split output_x_lookup = output_msd_lookup w = decoder_msd_w b = decoder_msd_b x_lstm = dec_msd_lstm input_mat = vectors #dy.concatenate(vectors) output = [x2id[c] for c in output] w1dt = None last_output_embeddings = output_x_lookup[x2id[EOS]] if decode_char: s = x_lstm.initial_state().add_input( dy.concatenate( [dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) else: s = x_lstm.initial_state().add_input( dy.concatenate([ dy.vecInput(STATE_SIZE * 2 + EMBEDDINGS_SIZE), last_output_embeddings ])) loss = [] for char in output: # w1dt can be computed and cached once for the entire decoding phase if decode_char: w1dt = w1dt or w1 * input_mat vector = dy.concatenate( [attend(input_mat, s, w1dt), last_output_embeddings]) else: vector = dy.concatenate([input_mat, last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector) last_output_embeddings = output_x_lookup[char] loss.append(-dy.log(dy.pick(probs, char))) loss = dy.esum(loss) return loss
def decode(dec_lstm, vectors, output): output = [EOS] + list(output) + [EOS] output = [char2int[c] for c in output] w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) input_mat = dy.concatenate_cols(vectors) w1dt = None last_output_embeddings = output_lookup[char2int[EOS]] s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings])) loss = [] for char in output: # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector) last_output_embeddings = output_lookup[char] loss.append(-dy.log(dy.pick(probs, char))) loss = dy.esum(loss) return loss
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): embedded = embed_sentence(in_seq) encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded) w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) input_mat = dy.concatenate_cols(encoded) w1dt = None last_output_embeddings = output_lookup[char2int[EOS]] s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) out = '' count_EOS = 0 for i in range(len(in_seq)*2): if count_EOS == 2: break # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate([attend(input_mat, s, w1dt), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector).vec_value() next_char = probs.index(max(probs)) last_output_embeddings = output_lookup[next_char] if int2char[next_char] == EOS: count_EOS += 1 continue out += int2char[next_char] return out
def task_mlp(self, vec_sen, train, y_s=None): """ calculating the mlp function over the sentence representation vector """ w1 = dy.parameter(self._params["task_w1"]) b1 = dy.parameter(self._params["task_b1"]) w2 = dy.parameter(self._params["task_w2"]) b2 = dy.parameter(self._params["task_b2"]) if train: drop = self._dropout else: drop = 0 if y_s is not None: v = dy.vecInput(1) v.set([y_s]) in_vec = dy.concatenate([vec_sen, v]) else: in_vec = vec_sen out = dy.tanh(dy.dropout(dy.affine_transform([b1, w1, in_vec]), drop)) out = dy.affine_transform([b2, w2, out]) return out
def decode(dec_lstm, vectors, output): output = [EOS] + list(output) + [EOS] output = [char2int[c] for c in output] output = [2, 5, 6, 7, 8, 9, 3] w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) # [2*state_size, sent_len] input_mat = dy.concatenate_cols(vectors) w1dt = None # last_output_embeddings = output_lookup[char2int[EOS]] last_output_embeddings = output_lookup[2] # s = dec_lstm.initial_state_from_raw_vectors([np.random.normal(0, 0.1, STATE_SIZE) for i in range(2 * LSTM_NUM_OF_LAYERS)]) s = dec_lstm.initial_state().add_input( dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) loss = [] for char in output: # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate( [attend(input_mat, s, w1dt), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector) last_output_embeddings = output_lookup[char] loss.append(-dy.log(dy.pick(probs, char))) loss = dy.esum(loss) return loss
def get_loss(self, input_string, output_string): # Adding <EOS> input_string = self._add_eos(input_string) output_string = self._add_eos(output_string) # Create a new computation graph dy.renew_cg() # Vectorizing input and output (character-level, word-level, etc.) embedded_string = self._embed_string(input_string) # Hidden states of all the slices of the RNN for the input encoded_string = self._encode_string(embedded_string) # adding to DEC_RNN and getting the states of the decoder rnn_state = (self.DEC_RNN.initial_state()).add_input(dy.vecInput(self.enc_state_size)) loss = list() for output_char in output_string: # getting the context vector for each character (or word) attended_encoding = self._attend(encoded_string, rnn_state) # print attended_encoding.dim() # con(y{i-1}, attended_encoding) # attended_encoding, rnn_state = rnn_state.add_input(attended_encoding) # rnn_state = rnn_state.add_input(dy.concatenate(attended_encoding)) probs = self._get_probs(rnn_state.output()) # probs =self._get_probs(rnn_state.add_input(attended_encoding).output()) # - log(probs[output_char]) as loss loss.append(-dy.log(dy.pick(probs, output_char))) loss = dy.esum(loss) return loss
def generate_from_encoding_vector(self, numpy_vec): with open("gen.txt", "a+") as f: dy.renew_cg() gen = ["<S>"] encoded = dy.vecInput(len(numpy_vec)) encoded.set(numpy_vec) s = self.decoder.initial_state() start_encoded = self.E[self.tok2ind["<S>"]] s = s.add_input(dy.concatenate([start_encoded, encoded])) counter = 0 current = "<S>" while counter < 50 and current != "<E>": counter += 1 probs, scores = self.predict_word(s.output()) s = s.add_input(dy.concatenate([self.E[self.tok2ind[current]], encoded])) current = self.ind2tok[np.argmax(probs.npvalue())] gen.append(current) gen = " ".join(gen) f.write(gen + "\n")
def decode_to_loss(self, vectors, output): w = dy.parameter(self.w_softmax) b = dy.parameter(self.b_softmax) w1 = dy.parameter(self.attention_source) output = list(output) encoded_states = dy.concatenate_cols(vectors) prev_output_embeddings = self.target_lookup[self.eos_target] current_state = self.decoder.initial_state().add_input( dy.concatenate( [dy.vecInput(self.hidden_size * 2), prev_output_embeddings])) losses = [] attentional_component = w1 * encoded_states for next_word in output: vector = dy.concatenate([ self.attention(encoded_states, current_state, attentional_component), prev_output_embeddings ]) current_state = current_state.add_input(vector) s = dy.affine_transform([b, w, current_state.output()]) item_loss = dy.pickneglogsoftmax(s, next_word) losses.append(item_loss) prev_output_embeddings = self.target_lookup[next_word] loss = dy.esum(losses) return loss
def calc_loss(self, src_seqs, trg_seqs, training=True): batch_size = len(src_seqs) src_encodings = self.encoder.encode(src_seqs, training=training) src_enc_all = dy.concatenate_cols(src_encodings) src_trans_att = self.attender.get_src_transformation(src_enc_all) state = self.decoder.initialize(src_encodings, training=training) ctx_tm1 = dy.vecInput(self.encoder.state_dim) losses = [] max_len = max(map(len, trg_seqs)) for i in xrange(1, max_len): y_tm1 = [trg_seq[i - 1] if i < len(trg_seq) else trg_seq[-1] for trg_seq in trg_seqs] ref_y_t = [trg_seq[i] if i < len(trg_seq) else trg_seq[-1] for trg_seq in trg_seqs] y_tm1_embed = self.decoder.embedder.embed_item(y_tm1, training=training) x = dy.concatenate([y_tm1_embed, ctx_tm1]) state = state.add_input(x) h_t = state.output() ctx_t, alpha_t = self.attender.calc_context(src_enc_all, src_trans_att, h_t) loss_t = self.decoder.calc_loss(h_t, ctx_t, ref_y_t, training=training) mask = dy.inputVector([1 if i < len(trg_seq) else 0 for trg_seq in trg_seqs]) mask = dy.reshape(mask, (1,), batch_size) loss_t = dy.sum_batches(loss_t * mask) ctx_tm1 = ctx_t losses.append(loss_t) loss = dy.esum(losses) return loss
def decode(self, vectors, output, end_token, durs): #output = [EOS] + list(output) + [EOS] #output = [char2int[c] for c in output] w = dy.parameter(self.decoder_w) b = dy.parameter(self.decoder_b) w1 = dy.parameter(self.attention_w1) input_mat = dy.concatenate_cols(vectors) w1dt = None last_output_embeddings = self.output_lookup[2] s = self.dec_lstm.initial_state().add_input( dy.concatenate( [dy.vecInput(self.state_size * 2), last_output_embeddings])) loss = [] dur_loss = [] c = 1 for word, dur in zip(output, durs): c += 1 # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate( [self.attend(input_mat, s, w1dt), last_output_embeddings]) s = s.add_input(vector) k = s #print "Going" dloss = self.test_duration(k, dur) #print "Back" dur_loss.append(dloss) out_vector = w * s.output() + b probs = dy.softmax(out_vector) last_output_embeddings = self.output_lookup[word] loss.append(-dy.log(dy.pick(probs, word))) loss = dy.esum(loss) return loss, dy.esum(dur_loss)
def decode_to_prediction(self, encoded, max_length): w = dy.parameter(self.w_softmax) b = dy.parameter(self.b_softmax) w1 = dy.parameter(self.attention_source) encoded_states = dy.concatenate_cols(encoded) attentional_component = w1 * encoded_states prev_output_embeddings = self.target_lookup[self.eos_target] current_state = self.decoder.initial_state().add_input( dy.concatenate( [dy.vecInput(self.hidden_size * 2), prev_output_embeddings])) result = "" for i in range(max_length): vector = dy.concatenate([ self.attention(encoded_states, current_state, attentional_component), prev_output_embeddings ]) current_state = current_state.add_input(vector) s = dy.affine_transform([b, w, current_state.output()]) probs = (dy.log_softmax(s)).value() next_word = np.argmax(probs) prev_output_embeddings = self.target_lookup[next_word] if (next_word == self.eos_target): return result[:-1] if next_word in self.targetDictionnary.keys(): result += self.targetDictionnary[next_word] + " " else: result += self.targetDictionnary[unk_target] + " " return result[:-1]
def _attend(self, input_vectors, state): w1 = self.att_w1.expr() w2 = self.att_w2.expr() v = self.att_v.expr() attention_weights = [] w2dt = w2 * state.h()[-1] for input_vector in input_vectors: attention_weight = v * dy.tanh(w1 * input_vector + w2dt) attention_weights.append(attention_weight) attention_weights = dy.softmax(dy.concatenate(attention_weights)) pos = self.argmax(attention_weights.value()) #print pos att_inp = [] for x in range(pos - self.attention_window, pos + self.attention_window + 1): gaussian_value = self._gaussian(x, pos, self.attention_window) #print gaussian_value if x >= 0 and x < len(input_vectors): vector = input_vectors[x] else: vector = dy.vecInput(self.config.encoder_size * 2) att_inp.append(vector * gaussian_value) #output_vectors = dy.esum([vector * attention_weight for vector, attention_weight in zip(input_vectors, attention_weights)]) output_vectors = dy.esum(att_inp) return output_vectors
def train(self, trainning_set): for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set: features = self.encode_sentence(sentence, pos, chars) loss = [] entity_embeds = features[entity] attention, context = self.self_attend(features) ty = dy.vecInput(len(sentence)) ty.set([0 if i!=trigger else 1 for i in range(len(sentence))]) loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty)) h_t = dy.concatenate([context, entity_embeds]) hidden = dy.tanh(self.lb.expr() * h_t + self.lb_bias.expr()) out_vector = dy.reshape(dy.logistic(self.lb2.expr() * hidden + self.lb2_bias.expr()), (1,)) label = dy.scalarInput(label) loss.append(dy.binary_log_loss(out_vector, label)) pres = [0] for pattern in rule: probs = self.decoder(features, pres) loss.append(-dy.log(dy.pick(probs, pattern))) pres.append(pattern) loss = dy.esum(loss) loss.backward() self.trainer.update() dy.renew_cg()
def generate(input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): def sample(probs): rnd = random.random() for i, p in enumerate(probs): rnd -= p if rnd <= 0: break return i embedded = embed_sentence(input) encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded) w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) last_output_embeddings = output_lookup[char2int[EOS]] s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) out = '' count_EOS = 0 for i in range(len(input)*2): if count_EOS == 2: break vector = dy.concatenate([attend(encoded, s), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector) probs = probs.vec_value() next_char = sample(probs) last_output_embeddings = output_lookup[next_char] if int2char[next_char] == EOS: count_EOS += 1 continue out += int2char[next_char] return out
def train(self, trainning_set): for sentence, eid, entity, trigger, label, pos, chars, rule in trainning_set: features = self.encode_sentence(sentence, pos, chars) loss = [] # entity_embeds = features[entity] # attention, context = self.self_attend(features) # ty = dy.vecInput(len(sentence)) # ty.set([0 if i!=trigger else 1 for i in range(len(sentence))]) # loss.append(dy.binary_log_loss(dy.reshape(attention,(len(sentence),)), ty)) # h_t = dy.concatenate([context, entity_embeds]) # hidden = dy.tanh(self.lb * h_t + self.lb_bias) # out_vector = dy.reshape(dy.logistic(self.lb2 * hidden + self.lb2_bias), (1,)) # label = dy.scalarInput(label) # loss.append(dy.binary_log_loss(out_vector, label)) # Get decoding losses last_output_embeddings = self.pattern_embeddings[0] s = self.decoder_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(self.hidden_dim), last_output_embeddings])) for pattern in rule: h_t = s.output() context = self.attend(features, h_t) out_vector = self.pt * dy.concatenate([context, h_t]) + self.pt_bias probs = dy.softmax(out_vector) loss.append(-dy.log(dy.pick(probs, pattern))) last_output_embeddings = self.pattern_embeddings[pattern] s = s.add_input(dy.concatenate([context, last_output_embeddings])) loss = dy.esum(loss) loss.backward() self.trainer.update() dy.renew_cg()
def test(test_list, pWeight, unique_vector, features_total, unique_class): input_dy = dy.vecInput(features_total) output_list = [] # print(unique_class) for line in test_list: test_line = line.split() target = test_line[0] test_line = test_line[1:] test_vector = [0] * features_total all_unique = True # print(unique_vector) for word in test_line: # print(word) try: test_vector[unique_vector.index(word)] = 1 all_unique = False except: continue input_dy.set(test_vector) if all_unique: print("none") # print( "%s: %s" %(unique_class.index(target), output)) # output_list.append(output) else: output = test_network(pWeight, input_dy) # print( "%s: %s" %(unique_class.index(target), output)) # output_list.append(output) print("target: %s, output: %s" % (target, unique_class[output]))
def test(test_list, pWeight, unique_vector, features_total): input_dy = dy.vecInput(features_total) for line in test_list: test_line = line.split() target = test_line[0] test_line = test_line[1:] test_vector = [0] * features_total all_unique = True for word in test_line: try: test_vector[unique_vector.index(word)] = 1 all_unique = False except: continue input_dy.set(test_vector) if all_unique: output = 0 print("%s: %s" % (target, output)) else: output = test_network(pWeight, input_dy) print("%s: %s" % (target, output.value()))
def generate(lemma, tag, enc_fwd_lstm, enc_bwd_lstm, dec_lstm, cencoder, cdecoder): embedded = embed_sentence(lemma, tag) encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded) w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) encoded = encoded[-1] w1dt = None last_output_embeddings = char_lookup[cencoder["#"]] s = dec_lstm.initial_state().add_input( dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) out = '' count_EOS = 0 for i in range(len(lemma) * 2): if count_EOS == 2: break vector = dy.concatenate([encoded, last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector).vec_value() next_char = probs.index(max(probs)) last_output_embeddings = char_lookup[next_char] if cdecoder[next_char] == "#": count_EOS += 1 continue out += cdecoder[next_char] return out
def encode(self, w, o, s): k = 5 suffixes, prefixes = [], [] for i in range(1, k + 1): pre, suf = w[:i], w[-i:] pre_idx = self.P2I[pre] if pre in self.P2I else self.P2I["<unk>"] suf_idx = self.S2I[pre] if pre in self.S2I else self.S2I["<unk>"] suf_e = dy.lookup(self.E_suf, suf_idx) pre_e = dy.lookup(self.E_pre, pre_idx) suffixes.append(suf_e) prefixes.append(pre_e) word_encoded = self.W2I[w] if w in self.W2I else self.W2I["<unk>"] word_e = dy.lookup(self.E, word_encoded) exp_out = dy.vecInput(EMBEDDING_SIZE) if o == []: o = ["<unk>"] for out_token in o: out_token_encoded = self.OUTPUT2IND[ out_token] if out_token in self.OUTPUT2IND else self.OUTPUT2IND[ "<unk>"] out_embedding = dy.lookup(self.E_output, out_token_encoded) exp_out = exp_out + out_embedding W = dy.parameter(self.W) return W * dy.concatenate( [word_e, dy.esum(suffixes), dy.esum(prefixes), out_embedding])
def decode(dec_lstm, vectors, output): output = [EOS] + list(output) + [EOS] output = [char2int[c] for c in output] w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) input_mat = dy.concatenate_cols(vectors) w1dt = None last_output_embeddings = output_lookup[char2int[EOS]] s = dec_lstm.initial_state().add_input( dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) loss = [] for char in output: # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate( [attend(input_mat, s, w1dt), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector) last_output_embeddings = output_lookup[char] loss.append(-dy.log(dy.pick(probs, char))) loss = dy.esum(loss) return loss
def generate(in_seq, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): embedded = embed_sentence(in_seq) encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded) w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) w1 = dy.parameter(attention_w1) input_mat = dy.concatenate_cols(encoded) w1dt = None last_output_embeddings = output_lookup[char2int[EOS]] s = dec_lstm.initial_state().add_input( dy.concatenate([dy.vecInput(STATE_SIZE * 2), last_output_embeddings])) out = '' count_EOS = 0 for i in range(len(in_seq) * 2): if count_EOS == 2: break # w1dt can be computed and cached once for the entire decoding phase w1dt = w1dt or w1 * input_mat vector = dy.concatenate( [attend(input_mat, s, w1dt), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector).vec_value() next_char = probs.index(max(probs)) last_output_embeddings = output_lookup[next_char] if int2char[next_char] == EOS: count_EOS += 1 continue out += int2char[next_char] return out
def train(self, trainning_set): for sentence, rule in trainning_set: features = self.encode_sentence(sentence) loss = [] # Get decoding losses last_output_embeddings = self.pattern_embeddings[0] s = self.decoder_lstm.initial_state().add_input( dy.concatenate( [dy.vecInput(self.hidden_dim), last_output_embeddings])) rule.append(1) for pattern in rule: h_t = s.output() context = self.attend(features, h_t) out_vector = self.pt.expr() * dy.concatenate( [context, h_t]) + self.pt_bias.expr() probs = dy.softmax(out_vector) loss.append(-dy.log(dy.pick(probs, pattern))) last_output_embeddings = self.pattern_embeddings[pattern] s = s.add_input( dy.concatenate([context, last_output_embeddings])) loss = dy.esum(loss) loss.backward() self.trainer.update() dy.renew_cg()
def attend(self, H_e, h_t): context_vector = dy.vecInput(self.hidden_dim) for h_e in H_e: s = dy.transpose(h_t) * self.attention_weight.expr() * h_e a = dy.softmax(s) context_vector += h_e * a return context_vector / len(H_e)
def Train_Morph(self): self.trainer.set_sparse_updates(False) start = time.time() for iWord, word in enumerate(list(self.morph_dict.keys())): if iWord % 2000 == 0 and iWord != 0: print("Processing word number: %d" % iWord, ", Time: %.2f" % (time.time() - start)) start = time.time() morph_seg = self.morph_dict[word] morph_vec = self.__getWordVector(morph_seg) if self.ext_embeddings is None: vec_gold = self.wlookup[int(self.vocab.get(word, 0))].vec_value() elif word in self.ext_embeddings: vec_gold = self.ext_embeddings[word] else: vec_gold = None if vec_gold is not None: y_gold = dynet.vecInput(self.wdims) y_gold.set(vec_gold) mErrs = self.cosine_proximity(morph_vec, y_gold) mErrs.backward() self.trainer.update() renew_cg()
def generate(input, enc_fwd_lstm, enc_bwd_lstm, dec_lstm): def sample(probs): rnd = random.random() for i, p in enumerate(probs): rnd -= p if rnd <= 0: break return i embedded = embed_sentence(input) encoded = encode_sentence(enc_fwd_lstm, enc_bwd_lstm, embedded) w = pc.parameter(decoder_w) b = pc.parameter(decoder_b) s = dec_lstm.initial_state().add_input(pc.vecInput(STATE_SIZE * 2)) out = '' count_EOS = 0 for i in range(len(input)*2): if count_EOS == 2: break vector = attend(encoded, s) s = s.add_input(vector) out_vector = w * s.output() + b probs = pc.softmax(out_vector) probs = probs.vec_value() next_char = sample(probs) if int2char[next_char] == EOS: count_EOS += 1 continue out += int2char[next_char] return out
def generate(self, src_seq, sampled=False): def sample(probs): rnd = random.random() for i, p in enumerate(probs): rnd -= p if rnd <= 0: break return i dynet.renew_cg() embedded = self.embed_seq(src_seq) input_vectors = self.encode_seq(embedded) w = dynet.parameter(self.decoder_w) b = dynet.parameter(self.decoder_b) s = self.dec_lstm.initial_state() s = s.add_input( dynet.concatenate([ input_vectors[-1], dynet.vecInput(self.args.hidden_dim * 2), dynet.vecInput(self.pronouncer.args.hidden_dim * 2) ])) out = [] for i in range(1 + len(src_seq) * 5): out_vector = w * s.output() + b probs = dynet.softmax(out_vector) probs = probs.vec_value() next_symbol = sample(probs) if sampled else max( enumerate(probs), key=lambda x: x[1])[0] out.append(self.tgt_vocab[next_symbol]) if self.tgt_vocab[next_symbol] == self.tgt_vocab.END_TOK: break embed_vector = self.tgt_lookup[out[-1].i] attn_vector = self.attend(input_vectors, s) spelling = [ self.pronouncer.src_vocab[letter] for letter in out[-1].s.upper() ] embedded_spelling = self.pronouncer.embed_seq(spelling) pron_vector = self.pronouncer.encode_seq(embedded_spelling)[-1] fpv = dynet.nobackprop(pron_vector) inp = dynet.concatenate([embed_vector, attn_vector, fpv]) s = s.add_input(inp) return out
def generate(self, pre_context, pos_context, entity): embedded = self.embed_sentence(pre_context) pre_encoded = self.encode_sentence(self.encpre_fwd_lstm, self.encpre_bwd_lstm, embedded) embedded = self.embed_sentence(pos_context) pos_encoded = self.encode_sentence(self.encpos_fwd_lstm, self.encpos_bwd_lstm, embedded) w = dy.parameter(self.decoder_w) b = dy.parameter(self.decoder_b) w1_pre = dy.parameter(self.attention_w1_pre) h_pre = dy.concatenate_cols(pre_encoded) w1dt_pre = None w1_pos = dy.parameter(self.attention_w1_pos) h_pos = dy.concatenate_cols(pos_encoded) w1dt_pos = None last_output_embeddings = self.output_lookup[self.output2int[self.EOS]] entity_embedding = self.input_lookup[self.input2int[entity]] s = self.dec_lstm.initial_state().add_input( dy.concatenate([ dy.vecInput(self.STATE_SIZE * 2), last_output_embeddings, entity_embedding ])) out = [] count_EOS = 0 for i in range(self.config['GENERATION']): if count_EOS == 2: break # w1dt can be computed and cached once for the entire decoding phase w1dt_pre = w1dt_pre or w1_pre * h_pre w1dt_pos = w1dt_pos or w1_pos * h_pos attention_pre = self.attend(h_pre, s, w1dt_pre, self.attention_w2_pre, self.attention_v_pre) attention_pos = self.attend(h_pos, s, w1dt_pos, self.attention_w2_pos, self.attention_v_pos) vector = dy.concatenate([ self.hier_attend(attention_pre, attention_pos, s), last_output_embeddings, entity_embedding ]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector).vec_value() next_word = probs.index(max(probs)) last_output_embeddings = self.output_lookup[next_word] if self.int2output[next_word] == self.EOS: count_EOS += 1 continue out.append(self.int2output[next_word]) return out
def decode(dec_lstm, vectors, output): output = [EOS] + list(output) + [EOS] output = [char2int[c] for c in output] w = dy.parameter(decoder_w) b = dy.parameter(decoder_b) last_output_embeddings = output_lookup[char2int[EOS]] s = dec_lstm.initial_state().add_input(dy.concatenate([dy.vecInput(STATE_SIZE*2), last_output_embeddings])) loss = [] for char in output: vector = dy.concatenate([attend(vectors, s), last_output_embeddings]) s = s.add_input(vector) out_vector = w * s.output() + b probs = dy.softmax(out_vector) last_output_embeddings = output_lookup[char] loss.append(-dy.log(dy.pick(probs, char))) loss = dy.esum(loss) return loss
def create_network_return_best(inputs): ''' inputs is a list of numbers ''' dy.renew_cg() W = dy.parameter(pW) b = dy.parameter(pB) if(len(inputs) > documentLength): inputs = inputs[0:documentLength] emb_vectors = [lookup[i] for i in inputs] while(len(emb_vectors) < documentLength): pad = dy.vecInput(embDimension) pad.set(np.zeros(embDimension)) emb_vectors.append(pad) net_input = dy.concatenate(emb_vectors) net_output = dy.softmax( (W*net_input) + b) return np.argmax(net_output.npvalue())
def create_network_return_loss(inputs, expected_output): ''' inputs is a list of numbers ''' dy.renew_cg() W = dy.parameter(pW) # from parameters to expressions b = dy.parameter(pB) if(len(inputs) > documentLength): inputs = inputs[0:documentLength] emb_vectors = [lookup[i] for i in inputs] while(len(emb_vectors) < documentLength): pad = dy.vecInput(embDimension) pad.set(np.zeros(embDimension)) emb_vectors.append(pad) net_input = dy.concatenate(emb_vectors) net_output = dy.softmax( (W*net_input) + b) loss = -dy.log(dy.pick(net_output, expected_output)) return loss
HIDDEN_SIZE = 8 ITERATIONS = 2000 m = dy.Model() trainer = dy.SimpleSGDTrainer(m) W = m.add_parameters((HIDDEN_SIZE, 2)) b = m.add_parameters(HIDDEN_SIZE) V = m.add_parameters((1, HIDDEN_SIZE)) a = m.add_parameters(1) if len(sys.argv) == 2: m.populate_from_textfile(sys.argv[1]) x = dy.vecInput(2) y = dy.scalarInput(0) h = dy.tanh((W*x) + b) if xsent: y_pred = dy.logistic((V*h) + a) loss = dy.binary_log_loss(y_pred, y) T = 1 F = 0 else: y_pred = (V*h) + a loss = dy.squared_distance(y_pred, y) T = 1 F = -1 for iter in range(ITERATIONS):
trainer = dy.SimpleSGDTrainer(m) pW1 = m.add_parameters((HIDDEN_SIZE, 2), device="GPU:1") pb1 = m.add_parameters(HIDDEN_SIZE, device="GPU:1") pW2 = m.add_parameters((HIDDEN_SIZE, HIDDEN_SIZE), device="GPU:0") pb2 = m.add_parameters(HIDDEN_SIZE, device="GPU:0") pV = m.add_parameters((1, HIDDEN_SIZE), device="CPU") pa = m.add_parameters(1, device="CPU") if len(sys.argv) == 2: m.populate_from_textfile(sys.argv[1]) dy.renew_cg() W1, b1, W2, b2, V, a = dy.parameter(pW1, pb1, pW2, pb2, pV, pa) x = dy.vecInput(2, "GPU:1") y = dy.scalarInput(0, "CPU") h1 = dy.tanh((W1*x) + b1) h1_gpu0 = dy.to_device(h1, "GPU:0") h2 = dy.tanh((W2*h1_gpu0) + b2) h2_cpu = dy.to_device(h2, "CPU") if xsent: y_pred = dy.logistic((V*h2_cpu) + a) loss = dy.binary_log_loss(y_pred, y) T = 1 F = 0 else: y_pred = (V*h2_cpu) + a loss = dy.squared_distance(y_pred, y) T = 1 F = -1