def estimate_joint_priors(self, qp_pairs, smooth=False): model = self.model zsize = model.zsize pseudocount = model.pseudocount joint_samples = [] qZ_samples = [] pZ_samples = [] for (qZ_X, pZ_Y) in qp_pairs: joint_samples.append(qZ_X * dy.transpose(pZ_Y)) qZ_samples.append(qZ_X) pZ_samples.append(pZ_Y) if smooth: v_unif = dy.inputTensor(np.ones((zsize)) / zsize) A_unif = dy.inputTensor(np.ones((zsize, zsize)) / (zsize**2)) for _ in xrange(pseudocount): joint_samples.append(A_unif) qZ_samples.append(v_unif) pZ_samples.append(v_unif) joint = dy.average(joint_samples) qZ = dy.average(qZ_samples) pZ = dy.average(pZ_samples) return joint, qZ, pZ
def Train(instances, itercount): dy.renew_cg() ontoparser.initialize_graph_nodes(train=True) loss = [] errors = 0.0 for instance in instances: fexpr, sexpr, groundtruth = instance # context insensitive embeddings or local embeddings subtype = [sb.lower() for sb in fexpr.split()] #if sb.lower() not in stop] supertype = [sp.lower() for sp in sexpr.split()] #if sp.lower() not in stop] fembs, DSTATUS_X = ontoparser.get_linear_embd(subtype) sembs, DSTATUS_Y = ontoparser.get_linear_embd(supertype) #if (DSTATUS_X is False) or (DSTATUS_Y is False): continue fembs = fembs[0] if len(fembs) == 1 else dy.average(fembs) sembs = sembs[0] if len(sembs) == 1 else dy.average(sembs) x = dy.concatenate([fembs, sembs]) #e_dist = dy.squared_distance(fembs, sembs) e_dist = 1 - distance.cosine(fembs.npvalue(), sembs.npvalue()) #weighted_x = x * e_dist output = ontoparser.W2 * (dy.rectify(ontoparser.W1 * x) + ontoparser.b1) + ontoparser.b2 prediction = np.argmax(output.npvalue()) loss.append( dy.pickneglogsoftmax(output, ontoparser.meta.tdmaps[groundtruth])) #if ((ontoparser.meta.rmaps[prediction] == "Hypernym") and ("Hypernym" != groundtruth)) and (e_dist < 0.5): # loss[-1] += -log(0.6) errors += 0 if groundtruth == ontoparser.meta.rmaps[prediction] else 1 return loss, errors
def predict_hyp(self, subtype, supertype): dy.renew_cg() self.initialize_graph_nodes() subtype = [ self.lmtzr.lemmatize(sb) for sb in subtype.split() if sb not in self.stop ] supertype = [ self.lmtzr.lemmatize(sp) for sp in supertype.split() if sp not in self.stop ] if subtype == supertype: return # context insensitive embeddings or local embeddings fembs, DSTATUS_X = self.get_linear_embd(subtype) sembs, DSTATUS_Y = self.get_linear_embd(supertype) if len(fembs) < 1 or len(sembs) < 1: return if (DSTATUS_X is False) or (DSTATUS_Y is False): return fembs = fembs[0] if len(fembs) == 1 else dy.average(fembs) sembs = sembs[0] if len(sembs) == 1 else dy.average(sembs) x = dy.concatenate([fembs, sembs]) #e_dist = dy.squared_distance(fembs, sembs) #e_dist = distance.euclidean(fembs.npvalue(), sembs.npvalue()) e_dist = 1 - distance.cosine(fembs.npvalue(), sembs.npvalue()) #weighted_x = x * e_dist output = dy.softmax(self.W2 * (dy.rectify(self.W1 * x) + self.b1) + self.b2) prediction = np.argmax(output.npvalue()) confidence = np.max(output.npvalue()) return self.meta.rmaps[prediction], confidence, e_dist
def _predict(self, batch, train=True): # load the network parameters W_hid = dy.parameter(self.W_hid) b_hid = dy.parameter(self.b_hid) w_clf = dy.parameter(self.w_clf) b_clf = dy.parameter(self.b_clf) probas = [] # predict the probability of positive sentiment for each sentence for _, sent in batch: sent_embed = [dy.lookup(self.embed, w) for w in sent] dropout_embed = [] # $@$ Task3 implying dropout to regluarization training if train == True: for embed in sent_embed: embed = dy.dropout(embed, 0.5) dropout_embed.append(embed) sent_embed = dy.average(dropout_embed) else: sent_embed = dy.average(sent_embed) # hid = tanh(b + W * sent_embed) # but it's faster to use affine_transform in dynet hid = dy.affine_transform([b_hid, W_hid, sent_embed]) hid = dy.tanh(hid) y_score = dy.affine_transform([b_clf, w_clf, hid]) y_proba = dy.logistic(y_score) probas.append(y_proba) return probas
def __call__(self, query, options, gold, lengths, query_no): if len(options) == 1: return None, 0 final = [] if args.word_vectors: qvecs = [dy.lookup(self.pEmbedding, w) for w in query] qvec_max = dy.emax(qvecs) qvec_mean = dy.average(qvecs) for otext, features in options: if not args.no_features: inputs = dy.inputTensor(features) if args.word_vectors: ovecs = [dy.lookup(self.pEmbedding, w) for w in otext] ovec_max = dy.emax(ovecs) ovec_mean = dy.average(ovecs) if args.no_features: inputs = dy.concatenate( [qvec_max, qvec_mean, ovec_max, ovec_mean]) else: inputs = dy.concatenate( [inputs, qvec_max, qvec_mean, ovec_max, ovec_mean]) if args.drop > 0: inputs = dy.dropout(inputs, args.drop) h = inputs for pH, pB in zip(self.hidden, self.bias): h = dy.affine_transform([pB, pH, h]) if args.nonlin == "linear": pass elif args.nonlin == "tanh": h = dy.tanh(h) elif args.nonlin == "cube": h = dy.cube(h) elif args.nonlin == "logistic": h = dy.logistic(h) elif args.nonlin == "relu": h = dy.rectify(h) elif args.nonlin == "elu": h = dy.elu(h) elif args.nonlin == "selu": h = dy.selu(h) elif args.nonlin == "softsign": h = dy.softsign(h) elif args.nonlin == "swish": h = dy.cmult(h, dy.logistic(h)) final.append(dy.sum_dim(h, [0])) final = dy.concatenate(final) nll = -dy.log_softmax(final) dense_gold = [] for i in range(len(options)): dense_gold.append(1.0 / len(gold) if i in gold else 0.0) answer = dy.inputTensor(dense_gold) loss = dy.transpose(answer) * nll predicted_link = np.argmax(final.npvalue()) return loss, predicted_link
def __init__(self, nmodel, qinfo, vw, init_example=True): SqaState.__init__(self, qinfo) self.path_score_expression = dt.scalarInput(0) self.score = 0 self.nm = nmodel self.vw = vw self.H = dt.parameter(self.nm.pH) if init_example: UNK = self.vw.w2i["_UNK_"] # vectors of question words self.ques_emb = [ self.nm.E[self.vw.w2i.get(w, UNK)] for w in self.qinfo.ques_word_sequence ] #self.ques_avg_emb = dt.average(self.ques_emb) #self.ques_emb = dt.concatenate_cols([self.nm.E[self.vw.w2i.get(w, UNK)] for w in self.qinfo.ques_word_sequence]) # avg. vectors of column names self.headers_embs = [] for colname_word_sequence in self.qinfo.headers_word_sequences: colname_emb = dt.average([ self.nm.E[self.vw.w2i.get(w, UNK)] for w in colname_word_sequence ]) self.headers_embs.append(colname_emb) # avg. vectors of table entries self.entries_embs = [] for row_word_sequences in self.qinfo.entries_word_sequences: row_embs = [] for cell_word_sequence in row_word_sequences: row_embs.append( dt.average([ self.nm.E[self.vw.w2i.get(w, UNK)] for w in cell_word_sequence ])) self.entries_embs.append(row_embs) self.NulW = dt.parameter(self.nm.NulW) self.ColW = dt.parameter(self.nm.ColW) self.SelColW = dt.parameter(self.nm.SelColW) self.SelColWhereW = dt.parameter(self.nm.SelColWhereW) # question LSTM f_init, b_init = [b.initial_state() for b in self.nm.builders] wembs = [ self.nm.E[self.vw.w2i.get(w, UNK)] for w in self.qinfo.ques_word_sequence ] self.fw = [x.output() for x in f_init.add_inputs(wembs)] self.bw = [x.output() for x in b_init.add_inputs(reversed(wembs))] self.bw.reverse()
def evaluate(self, input_sentences, labels): dy.renew_cg() self.word_rnn.disable_dropout() self.sent_rnn.disable_dropout() embed_sents = [] for input_sentence in input_sentences: input_sentence = self._preprocess_input(input_sentence, self.word_to_ix) #input_sentence = [self.word_to_ix['<start>']] + input_sentence + [self.word_to_ix['<end>']] embed_words = self._embed_sentence(input_sentence) word_rnn_outputs = self._run_rnn(self.word_rnn, embed_words) sent_embed = dy.average(word_rnn_outputs) embed_sents.append(sent_embed) rnn_outputs = self._run_rnn(self.sent_rnn, embed_sents) doc_output_w = dy.parameter(self.doc_output_w) doc_output_b = dy.parameter(self.doc_output_b) doc_output = dy.tanh(doc_output_w * dy.average(rnn_outputs) + doc_output_b) probs = [] sum_output = dy.zeros(self.args.sent_hidden_dim) pred_labels = [] correct = 0 total = 0 loss = dy.zeros(1) for i, rnn_output in enumerate(rnn_outputs): abspos_embed = dy.lookup(self.abspos_embeddings, self.abspos_ix[i]) relpos_embed = dy.lookup(self.relpos_embeddings, self.relpos_ix[i]) prob = self._get_probs(rnn_output, doc_output, sum_output, abspos_embed, relpos_embed) sum_output += dy.cmult(prob, rnn_output) pred_label = self._predict(prob) pred_labels.append(pred_label) if pred_label == labels[i]: correct += 1 total += 1 if labels[i] == 1: loss -= dy.log(prob) else: loss -= dy.log(dy.scalarInput(1) - prob) return loss.value(), pred_labels, correct, total
def train(self, trainning_set): loss_chunk = 0 loss_all = 0 total_chunk = 0 total_all = 0 losses = [] for datapoint in trainning_set: query = datapoint[0] eq = dy.average([ self.word_embeddings[self.w2i[w]] if w in self.w2i else self.word_embeddings[0] for w in query ]) hyper = datapoint[1] eh = dy.average([ self.word_embeddings[self.w2i[w]] if w in self.w2i else self.word_embeddings[0] for w in hyper ]) t = dy.scalarInput(datapoint[2]) Ps = [] for i in range(self.k): Ps.append(self.Phis[i].expr() * eq) P = dy.transpose(dy.concatenate_cols(Ps)) s = P * eh y = dy.reshape(dy.logistic(self.W.expr() * s + self.b.expr()), (1, )) losses.append(dy.binary_log_loss(y, t)) # process losses in chunks if len(losses) > 50: loss = dy.esum(losses) l = loss.scalar_value() loss.backward() self.trainer.update() dy.renew_cg() losses = [] loss_chunk += l loss_all += l total_chunk += 1 total_all += 1 # consider any remaining losses if len(losses) > 0: loss = dy.esum(losses) loss.scalar_value() loss.backward() self.trainer.update() dy.renew_cg() print(f'loss: {loss_all/total_all:.4f}')
def _calc_scores_embedded_mlp(self, sentences, W_emb, W_mlp, b_mlp, V_mlp, a_mlp, meta_data=None): """ calculating the score for a a NN network (in a specific state along learning phase) :param sentences: list list of lists of sentences (represented already as numbers and not letters) :param W_emb: lookup parameter (dynet obj). size: (emb_size x nwords) matrix holding the word embedding values :param W_mlp: model parameter (dynet obj). size: (hid_size, emb_size + meta_data_dim) matrix holding weights of the mlp phase :param b_mlp: model parameter (dynet obj). size: (hid_size,) vector holding weights of intercept for each hidden state :param V_mlp: model parameter (dynet obj). size: (2, hid_size) matrix holding weights of the logisitc regression phase. 2 is there due to the fact we are in a binary classification :param a_mlp: model parameter (dynet obj). size: (1,) intercept value for the logistic regression phase :param meta_data: dict or None meta data features for the model. If None - meta data is not used :return: dynet parameter. size: (2,) prediction of the instance to be a drawing one according to the model (vector of 2, first place is the probability to be a drawing team) """ dy.renew_cg() # sentences_len = len(sentences) word_embs = [[dy.lookup(W_emb, w) for w in words] for words in sentences] # taking the average over all words first_layer_avg = dy.average([dy.average(w_em) for w_em in word_embs]) # case we don't wish to use meta features for the model if meta_data is None: h = dy.tanh((W_mlp * first_layer_avg) + b_mlp) prediction = dy.logistic((V_mlp * h) + a_mlp) else: meta_data_ordered = [ value for key, value in sorted(meta_data.items()) ] meta_data_vector = dy.inputVector(meta_data_ordered) first_layer_avg_and_meta_data = dy.concatenate( [first_layer_avg, meta_data_vector]) h = dy.tanh((W_mlp * first_layer_avg_and_meta_data) + b_mlp) prediction = dy.logistic((V_mlp * h) + a_mlp) return prediction
def get_hier_bilstm_avg(self, input_seqs, flstm1, blstm1, flstm2, blstm2, update_flag=True): seqreps = [] for input_seq in input_seqs: seqreps.append( dy.average( self.get_bilstm_all(input_seq, flstm1, blstm1, update_flag))) return dy.average( self.get_bilstm_all(seqreps, flstm2, blstm2, update_flag))
def beam_train_max_margin_with_answer_guidence(self, init_state, gold_ans): # perform two beam search; one for prediction and the other for state action suff # max reward y = argmax(r(y)) with the help of gold_ans # max y' = argmax f(x,y) - R(y') # loss = max(f(x,y') - f(x,y) + R(y) - R(y') , 0) #end_state_list = self.beam_predict(init_state) end_state_list = self.beam_predict_max_violation( init_state, gold_ans) # have to use this to make it work.... reward_list = [x.reward(gold_ans) for x in end_state_list] violation_list = [ s.path_score_expression.value() - reward for s, reward in zip(end_state_list, reward_list) ] best_score_state_idx = violation_list.index(max( violation_list)) # find the best scoring seq with minimal reward best_score_state = end_state_list[best_score_state_idx] best_score_state_reward = reward_list[best_score_state_idx] loss_value = 0 if self.only_one_best: best_states = self.beam_find_actions_with_answer_guidence( init_state, gold_ans) if best_states == []: return 0, [] best_reward_state = best_states[0] #print ("debug: found best_reward_state: qid =", best_reward_state.qinfo.seq_qid, best_reward_state) best_reward_state_reward = best_reward_state.reward(gold_ans) #print ("debug: best_reward_state_reward =", best_reward_state_reward) loss = dt.rectify(best_score_state.path_score_expression - best_reward_state.path_score_expression + dt.scalarInput(best_reward_state_reward - best_score_state_reward)) else: best_states = self.beam_find_actions_with_answer_guidence( init_state, gold_ans) best_states_rewards = [s.reward(gold_ans) for s in best_states] max_reward = max(best_states_rewards) best_states = [ s for s, r in zip(best_states, best_states_rewards) if r == max_reward ] loss = dt.average([ dt.rectify(best_score_state.path_score_expression - best_reward_state.path_score_expression + dt.scalarInput(max_reward - best_score_state_reward)) for best_reward_state in best_states ]) loss_value = loss.value() loss.backward() self.neural_model.learner.update() #print ("debug: beam_train_max_margin_with_answer_guidence done. loss_value =", loss_value) return loss_value, best_states
def learn(self, seq): dy.renew_cg() softmax_list, aux_softmax_list = self._predict(seq, runtime=False) losses = [] for entry, softmax, aux_softmax in zip(seq, softmax_list, aux_softmax_list): # if self.tagset == 'upos': # label_index = self.encodings.label2int[entry.upos] # elif self.tagset == 'xpos': # label_index = self.encodings.label2int[entry.xpos] # else: # label_index = self.encodings.label2int[entry.attrs] upos_index = self.encodings.upos2int[entry.upos] xpos_index = self.encodings.xpos2int[entry.xpos] attrs_index = self.encodings.attrs2int[entry.attrs] losses.append(-dy.log(dy.pick(softmax[0], upos_index))) losses.append(-dy.log(dy.pick(softmax[1], xpos_index))) losses.append(-dy.log(dy.pick(softmax[2], attrs_index))) losses.append(-dy.log(dy.pick(aux_softmax[0], upos_index)) * (self.aux_softmax_weight / 3)) losses.append(-dy.log(dy.pick(aux_softmax[1], xpos_index)) * (self.aux_softmax_weight / 3)) losses.append(-dy.log(dy.pick(aux_softmax[2], attrs_index)) * (self.aux_softmax_weight / 3)) loss = dy.average(losses) loss_val = loss.value() loss.backward() self.trainer.update() return loss_val
def internal_eval(batches, transducer, vocab, previous_predicted_actions, check_condition=True, name='train'): then = time.time() print('evaluating on {} data...'.format(name)) number_correct = 0. total_loss = 0. predictions = [] pred_acts = [] i = 0 # counter of samples for j, batch in enumerate(batches): dy.renew_cg() batch_loss = [] for sample in batch: feats = sample.pos, sample.feats loss, prediction, predicted_actions = transducer.transduce( sample.lemma, feats, external_cg=True) ### predictions.append(prediction) pred_acts.append(predicted_actions) batch_loss.extend(loss) # evaluation correct_prediction = False if (prediction in vocab.word and vocab.word.w2i[prediction] == sample.word): correct_prediction = True number_correct += 1 if check_condition: # display prediction for this sample if it differs the prediction # of the previous epoch or its an error if predicted_actions != previous_predicted_actions[ i] or not correct_prediction: # print( 'BEFORE: ', datasets.action2string(previous_predicted_actions[i], vocab)) print('THIS TIME: ', datasets.action2string(predicted_actions, vocab)) print('TRUE: ', sample.act_repr) print('PRED: ', prediction) print('WORD: ', sample.word_str) print('X' if correct_prediction else 'V') # increment counter of samples i += 1 batch_loss = -dy.average(batch_loss) total_loss += batch_loss.scalar_value() # report progress if j > 0 and j % 100 == 0: print('\t\t...{} batches'.format(j)) accuracy = number_correct / i print('\t...finished in {:.3f} sec'.format(time.time() - then)) return accuracy, total_loss, predictions, pred_acts
def decode(self, tokens, constraints=[], train_mode=False): loss = 0 errs = [] fr_vecs = [self.special[0]] + [t.vecs[self.vec_key] for t in tokens] to_vecs = [self.special[1]] + [t.vecs[self.vec_key] for t in tokens] score_mat = self.biaffine.attend(fr_vecs, to_vecs) scores = score_mat.npvalue() if train_mode: oids = [0] + [t['original_id'] for t in tokens] gold_path = np.argsort(oids).tolist() + [0] trans_mat = dy.transpose(score_mat) for i, j in zip(gold_path, gold_path[1:]): errs.append(dy.hinge(score_mat[i], j)) errs.append(dy.hinge(trans_mat[j], i)) if errs: loss = dy.average(errs) costs = (1000 * (scores.max() - scores)).astype(int).tolist() solution = solve_tsp(costs, constraints, self.args.guided_local_search) # first is best if not solution: # self.log('no solution, remove constraints') solution = solve_tsp(costs, [], self.args.guided_local_search) assert solution != [] seq = [tokens[i - 1] for i in solution[1:-1]] return {'loss': loss, 'seq': seq}
def il_training_batch_update(batch, *args): # How to update model parameters from # a batch of training samples with il training? dy.renew_cg() epoch = args[0] e = 1 - decay(epoch-pretrain_epochs) if epoch >= pretrain_epochs else 0. if verbose and e: print 'Sampling probability = {:.3f}'.format(e) batch_loss = [] for sample in batch: feats = sample.pos, sample.feats # @TODO This will fail if a target character has never been seen # in lemmas and parameter tying is not used! loss, prediction, predicted_actions = self.transducer.transduce( lemma=sample.lemma, feats=feats, oracle_actions={'loss' : loss_expression, 'rollout_mixin_beta' : rollout_mixin_beta, 'global_rollout' : global_rollout, 'target_word' : sample.actions, # support for legacy, buggy experiments 'optimal' : optimal_oracle, 'bias_inserts' : bias_inserts}, sampling=e, external_cg=True, verbose=verbose) batch_loss.extend(loss) batch_loss = -dy.average(batch_loss) if l2: batch_loss += l2 * self.transducer.l2_norm(with_embeddings=False) loss = batch_loss.scalar_value() # forward batch_loss.backward() # backward self.trainer.update() return loss
def bow_snippets(self, token, snippets=None): """ Bag of words embedding for snippets""" if snippet_handler.is_snippet(token): """ Somehow in this part the program goes wrong in the server.(OK in my own computer) Phenomenon: token is predicted to be a snippet, and wrongly goes into this branch. Just ignore the assertion error. """ try: assert snippets except: return self(token) snippet_sequence = [] for snippet in snippets: if snippet.name == token: snippet_sequence = snippet.sequence break assert snippet_sequence snippet_embeddings = [self(subtoken) for subtoken in snippet_sequence] return dy.average(snippet_embeddings) else: return self(token)
def _predict(self, batch, train=True): # load the network parameters W_hid = dy.parameter(self.W_hid) b_hid = dy.parameter(self.b_hid) w_clf = dy.parameter(self.w_clf) b_clf = dy.parameter(self.b_clf) probas = [] # predict the probability of positive sentiment for each sentence for _, sent in batch: sent_embed = [dy.lookup(self.embed, w) for w in sent] sent_embed = dy.average(sent_embed) # hid = tanh(b + W * sent_embed) # but it's faster to use affine_transform in dynet hid = dy.affine_transform([b_hid, W_hid, sent_embed]) hid = dy.tanh(hid) y_score = dy.affine_transform([b_clf, w_clf, hid]) y_proba = dy.logistic(y_score) probas.append(y_proba) return probas
def adapt_user(s2s, trainer, train_src, train_trg, test_src, opt): timer = utils.Timer() log = utils.Logger(opt.verbose) n_train = len(train_src) n_tokens = (sum(map(len, train_trg)) - len(train_trg)) # Train for n_iter timer.restart() best_ppl = np.inf for epoch in range(opt.num_epochs): timer.tick() dy.renew_cg() losses = [] # Add losses for all samples for x, y in zip(train_src, train_trg): losses.append( s2s.calculate_user_loss([x], [y], [0], update_mode=opt.update_mode)) loss = dy.average(losses) # Backward + update loss.backward() trainer.update() # Print loss etc... train_loss = loss.value() / n_tokens train_ppl = np.exp(train_loss) trainer.status() elapsed = timer.tick() log.info(" Training_loss=%f, ppl=%f, time=%f s, tok/s=%.1f" % (train_loss, train_ppl, elapsed, n_tokens / elapsed)) if train_ppl < best_ppl: best_ppl = train_ppl translations = evaluate_model(s2s, test_src, opt.beam_size) else: log.info("Early stopping after %d iterations" % (epoch + 1)) break return translations
def sent_rep(sent): tokens = w_tokenizer(sent) tokens_rep_list = [] for token in tokens: tokens_rep_list.append(token_lookup(token)) return dy.average(tokens_rep_list)
def __call__(self, H,is_train=True): """ :param xs: a list of ngrams (or words if win is set to 1) :return: embeddings looked from tables """ seq_len = len(H) if is_train: # in the training phase, perform dropout W1 = dy.dropout(self.W1, self.dropout_rate) W2 = dy.dropout(self.W2, self.dropout_rate) else: W1= self.W1 W2 = self.W2 pool= dy.average(H) aspect_attentions = [] Weights=[] for t in range(seq_len): ht = H[t] scores = dy.tanh(dy.transpose(ht)*W1*pool+self.bd) # print(scores.value()) Weights.append(scores.value() ) ht_hat=dy.cmult(dy.softmax(scores),ht) # print(ht_hat.value()) aspect_attentions.append(ht_hat) Weights_np=[] return aspect_attentions,Weights_np
def _loss(outputs, labels): losses = [ dy.pickneglogsoftmax_batch(out, label) for out, label in zip(outputs, labels) ] loss = dy.mean_batches(dy.average(losses)) return loss
def compose( self, embeds: Union[dy.Expression, List[dy.Expression]]) -> dy.Expression: if type(embeds) != list: return dy.mean_batches(embeds) else: return dy.average(embeds)
def _step(self, loader, update, log, reporting_fns, verbose=None, output=None, txts=None): steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) epoch_loss = 0 epoch_div = 0 preds, losses, ys = [], [], [] dy.renew_cg() for i, batch_dict in enumerate(pg(loader), 1): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') pred = self.model.forward(inputs) preds.append(pred) loss = self.model.loss(pred, y) losses.append(loss) ys.append(y) if i % self.autobatchsz == 0: loss = dy.average(losses) preds = dy.concatenate_cols(preds) batchsz = len(losses) lossv = loss.npvalue().item() * batchsz epoch_loss += lossv epoch_div += batchsz _add_to_cm(cm, np.array(ys), preds.npvalue()) update(loss) log(self.optimizer.global_step, lossv, batchsz, reporting_fns) preds, losses, ys = [], [], [] dy.renew_cg() loss = dy.average(losses) preds = dy.concatenate_cols(preds) batchsz = len(losses) epoch_loss += loss.npvalue().item() * batchsz epoch_div += batchsz _add_to_cm(cm, np.array(ys), preds.npvalue()) update(loss) metrics = cm.get_all_metrics() metrics['avg_loss'] = epoch_loss / float(epoch_div) verbose_output(verbose, cm) return metrics
def average_pooling(encoded_sequence): averages = [] for col in range(encoded_sequence[0].dim()[0][0]): avg = [] for row in range(len(encoded_sequence)): avg.append(encoded_sequence[row][col]) averages.append(dy.average(avg)) return dy.concatenate(averages)
def create_network_return_best(self, x): dy.renew_cg() emb_vectors = [self.lookup[self.corpus.get(item, len(self.corpus))] for item in x] calc_avg = dy.average(emb_vectors) emb_vectors_mean = dy.reshape(calc_avg, (1, self.dim)) z1 = (emb_vectors_mean * self._pW1) + self._pB1 a1 = dy.tanh(z1) net_output = dy.softmax(dy.reshape((a1 * self._kW1) + self._kB1, (self.numClasses,))) return np.argmax(net_output.npvalue())
def score_expression(self, qwVecs, numWdPos): if numWdPos == 0: kwVec = qwVecs[numWdPos + 1] elif numWdPos == 1: kwVec = qwVecs[0] else: kwVec = dt.average(qwVecs[numWdPos - 2:numWdPos]) ret = dt.dot_product(kwVec, self.OpW) return ret
def calc_errors(self, batch: List[Tuple]): dy.renew_cg() errors_exp = dy.concatenate([ dy.average(list(self.model.forward(_in, _out))) for _in, _out in batch ]) errors = errors_exp.value() if len(batch) == 1: errors = [errors] return np.array(errors)
def __init__(self, nmodel, qinfo, vw, init_example=True): SqaState.__init__(self, qinfo) self.path_score_expression = dt.scalarInput(0) self.score = 0 self.nm = nmodel self.vw = vw if init_example: UNK = self.vw.w2i["_UNK_"] self.ques_word_sequence = self.qinfo.ques_word_sequence() # vectors of question words #self.ques_emb = [self.nm.E[self.vw.w2i.get(w, UNK)] for w in self.ques_word_sequence] self.ques_emb = dt.concatenate_cols([ self.nm.E[self.vw.w2i.get(w, UNK)] for w in self.ques_word_sequence ]) #self.ques_avg_emb = dt.average(self.ques_emb) # avg. vectors of column names self.headers_embs = [] for colname_word_sequence in self.qinfo.headers_word_sequences(): colname_emb = dt.average([ self.nm.E[self.vw.w2i.get(w, UNK)] for w in colname_word_sequence ]) self.headers_embs.append(colname_emb) # avg. vectors of table entries self.entries_embs = [] for row_word_sequences in self.qinfo.entries_word_sequences(): row_embs = [] for cell_word_sequence in row_word_sequences: row_embs.append( dt.average([ self.nm.E[self.vw.w2i.get(w, UNK)] for w in cell_word_sequence ])) self.entries_embs.append(row_embs) self.R = dt.parameter(self.nm.R) self.NulW = dt.parameter(self.nm.NulW)
def test(sqnorm_original_value, assert_equal): dy.renew_cg() inputs = make_inputs() avg = dy.average(common.get_bilstm_all(inputs, flstm, blstm)) sqnorm = dy.squared_norm(avg) if assert_equal: self.assertAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10) else: self.assertNotAlmostEqual(sqnorm_original_value, sqnorm.value(), places=10)
def calc_loss(self, src, trg, loss_calculator): sub_losses = collections.defaultdict(list) for model in self.models: for loss_name, loss in model.calc_loss(src, trg, loss_calculator).loss_values.items(): sub_losses[loss_name].append(loss) model_loss = LossBuilder() for loss_name, losslist in sub_losses.items(): # TODO: dy.average(losslist) _or_ dy.esum(losslist) / len(self.models) ? # -- might not be the same if not all models return all losses model_loss.add_loss(loss_name, dy.average(losslist)) return model_loss
def test(): correct = 0 dy.renew_cg() losses = [] for lbl, img in test_data: losses.append(network.create_network_return_loss(img, lbl, dropout=False)) if lbl == network.create_network_return_best(img, dropout=False): correct += 1 mbloss = dy.average(losses) print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( mbloss.value(), correct, len(test_data), 100. * correct / len(test_data)))
def _step(self, loader, update, log, reporting_fns, verbose=None): steps = len(loader) pg = create_progress_bar(steps) cm = ConfusionMatrix(self.labels) epoch_loss = 0 epoch_div = 0 preds, losses, ys = [], [], [] dy.renew_cg() for i, batch_dict in enumerate(pg(loader), 1): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') pred = self.model.forward(inputs) preds.append(pred) loss = self.model.loss(pred, y) losses.append(loss) ys.append(y) if i % self.autobatchsz == 0: loss = dy.average(losses) preds = dy.concatenate_cols(preds) batchsz = len(losses) lossv = loss.npvalue().item() * batchsz epoch_loss += lossv epoch_div += batchsz _add_to_cm(cm, np.array(ys), preds.npvalue()) update(loss) log(self.optimizer.global_step, lossv, batchsz, reporting_fns) preds, losses, ys = [], [], [] dy.renew_cg() loss = dy.average(losses) preds = dy.concatenate_cols(preds) batchsz = len(losses) epoch_loss += loss.npvalue().item() * batchsz epoch_div += batchsz _add_to_cm(cm, np.array(ys), preds.npvalue()) update(loss) metrics = cm.get_all_metrics() metrics['avg_loss'] = epoch_loss / float(epoch_div) verbose_output(verbose, cm) return metrics
def train(epoch): random.shuffle(train_data) i = 0 epoch_start = time.time() while i < len(train_data): dy.renew_cg() losses = [] for lbl, img in train_data[i:i+args.batch_size]: loss = network.create_network_return_loss(img, lbl, dropout=True) losses.append(loss) mbloss = dy.average(losses) if (int(i/args.batch_size)) % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, i, len(train_data), 100. * i/len(train_data), mbloss.value())) mbloss.backward() trainer.update() i += args.batch_size epoch_end = time.time() print("{} s per epoch".format(epoch_end-epoch_start))
def _loss(outputs, labels): losses = [dy.pickneglogsoftmax_batch(out, label) for out, label in zip(outputs, labels)] loss = dy.mean_batches(dy.average(losses)) return loss
def _train(self, ts, **kwargs): self.model.train = True reporting_fns = kwargs.get('reporting_fns', []) epoch_loss = 0 epoch_norm = 0 auto_norm = 0 metrics = {} steps = len(ts) last = steps losses = [] i = 1 pg = create_progress_bar(steps) dy.renew_cg() for batch_dict in pg(ts): inputs = self.model.make_input(batch_dict) y = inputs.pop('y') pred = self.model.compute_unaries(inputs) bsz = self._get_batchsz(y) if self.autobatchsz is None: losses = self.model.loss(pred, y) loss = dy.mean_batches(losses) lossv = loss.npvalue().item() report_loss = lossv * bsz epoch_loss += report_loss epoch_norm += bsz self.nstep_agg += report_loss self.nstep_div += bsz loss.backward() self.optimizer.update() dy.renew_cg() # TODO: Abstract this somewhat, or else once we have a batched tagger have 2 trainers if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fns, self.nsteps ) self.reset_nstep() else: loss = self.model.loss(pred, y) losses.append(loss) self.nstep_div += bsz epoch_norm += bsz auto_norm += bsz if i % self.autobatchsz == 0 or i == last: loss = dy.average(losses) lossv = loss.npvalue().item() loss.backward() self.optimizer.update() report_loss = lossv * auto_norm epoch_loss += report_loss self.nstep_agg += report_loss losses = [] dy.renew_cg() if (self.optimizer.global_step + 1) % self.nsteps == 0: metrics = self.calc_metrics(self.nstep_agg, self.nstep_div) self.report( self.optimizer.global_step + 1, metrics, self.nstep_start, 'Train', 'STEP', reporting_fnsa, self.nsteps ) self.reset_nstep() auto_norm = 0 i += 1 metrics = self.calc_metrics(epoch_loss, epoch_norm) return metrics