def main(): zeroshot_vocab2int, zeroshot_int2vocab, lang_idx_dict = load_vocabs() x = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen]) y = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen]) is_train = tf.constant(False, tf.bool, name='is_train') model = Transformer(x, y, zeroshot_int2vocab, zeroshot_int2vocab, is_train) with tf.Session() as sess: sess.run([ tf.global_variables_initializer(), eval_iterator.initializer, tf.tables_initializer() ]) model.evaluate(sess, eval_iterator)
def eval(task_name): # Load graph g = Graph(is_training=False) print("Graph loaded") # Load data X, _, Texts, Labels = load_test_data() word2idx, idx2word = load_vocabs() # Start session with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ## Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") ## Get model name print('Model dir:', hp.logdir) mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name print("Model name:", mname) ## Inference if not os.path.exists('results'): os.mkdir('results') with codecs.open("results/" + mname, "w", "utf-8") as fout: list_of_refs, hypotheses = [], [] print("Iterator:", len(X), hp.batch_size) predict_label = [] for i in range(len(X) // hp.batch_size + 1): print('Step:\t', i, '/', len(X) // hp.batch_size) ### Get mini-batches x = X[i * hp.batch_size:(i + 1) * hp.batch_size] sentences = Texts[i * hp.batch_size:(i + 1) * hp.batch_size] labels = Labels[i * hp.batch_size:(i + 1) * hp.batch_size] preds = sess.run(g.preds, {g.x: x}) preds = [int(x) for x in preds] predict_label.extend(preds) ### Write to file for sent, label, pred in zip(sentences, labels, preds): # sentence-wise #got = " ".join(idx2word[idx] for idx in pred).split("</S>")[0].strip() fout.write("- sent: " + sent + "\n") fout.write('- label: {}, -predict: {} \n'.format( label, pred)) fout.flush() # bleu score if task_name == 'seq2seq': ref = target.split() hypothesis = got.split() if len(ref) > 3 and len(hypothesis) > 3: list_of_refs.append([ref]) hypotheses.append(hypothesis) ## Calculate bleu score if task_name == 'seq2seq': score = corpus_bleu(list_of_refs, hypotheses) fout.write("Bleu Score = " + str(100 * score)) elif task_name == 'classfication': assert len(Labels) == len( predict_label), 'The length of label and predicts\ are not alignmentted.' res = classification_report(Labels, predict_label) print(res) fout.write(res + '\n')
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x1, self.x2, self.y, self.num_batch = get_batch_data() #self.x, self.label, self.num_batch = get_batch_data() # (N, T) #self.y = tf.one_hot(self.label, depth = hp.n_class) else: # inference self.x1 = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) self.x2 = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) #self.label = tf.placeholder(tf.int32, shape = (None, hp.n_class)) #self.y = tf.placeholder(tf.int32, shape = (None, hp.n_class)) #self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) self.l2_loss = tf.constant(0.0) # define decoder inputs #for sentence relationship learning task we want to encoder sent1 to e1, then decoder(e1 + sent2) #to get a more sementic relationship across corpus self.decoder_inputs = tf.concat( (tf.ones_like(self.x2[:, :1]) * 2, self.x2[:, :-1]), -1) # 2:<S> # Load vocabulary word2idx, idx2word = load_vocabs() # initialize transformer transformer = vanilla_transformer(hp, self.is_training) #encode self.encode1, self.encode2 = transformer.encode(self.x1, len(word2idx)), \ transformer.encode(self.x2, len(word2idx)) #concated self.enc = tf.divide(tf.add(self.encode1, encode2), 2) self.enc = normalize(self.enc) #for sentence relationship learning task we want to encoder sent1 to e1, then decoder(e1 + sent2) #to get a more sementic relationship across corpus # Decoder self.dec = transformer.decode(self.decoder_inputs, self.enc, len(word2idx), hp.p_maxlen) self.logits = tf.add(self.enc, tf.multiply(self.enc, self.dec)) #self.logits = self.enc #self.logits = tf.layers.dense(self.logits, 64, activation = 'tanh') self.logits = tf.layers.flatten(self.logits) #self.logits = tf.reshape(self.logits, [64, -1]) self.h_drop = tf.nn.dropout(self.logits, hp.dropout_keep_prob) with tf.name_scope("output_logit"): W = tf.get_variable( "W", shape=[hp.maxlen * hp.hidden_units, len(hp.relations)], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[len(hp.relations)]), name="b") self.l2_loss += tf.nn.l2_loss(W) self.l2_loss += tf.nn.l2_loss(b) self.logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logit") #self.preds = tf.argmax(self.scores, 1, name="predictions") self.preds = tf.to_int32(tf.argmax(self.logits, dimension=-1)) if is_training: self.y_hotting = tf.one_hot(self.y, depth=len(hp.relations)) #Accuracy self.cpl = tf.equal(tf.convert_to_tensor(self.y, tf.int32), self.preds) self.cpl = tf.to_int32(self.cpl) self.acc = tf.reduce_sum(self.cpl) / tf.to_int32( tf.reduce_sum(self.y_hotting)) tf.summary.scalar('acc', self.acc) # Loss #self.y_smoothed = label_smoothing(self.y_hotting) self.loss = tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y_hotting) self.mean_loss = (tf.reduce_sum( self.loss) + self.l2_loss * hp.reg_lambda) / tf.reduce_sum( self.y_hotting) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all() if __name__ == '__main__': # Load vocabulary word2idx, idx2word = load_vocabs() # Construct graph g = Graph("train") print("Graph loaded") # Start session sv = tf.train.Supervisor(graph=g.graph, logdir=hp.logdir, save_model_secs=0) with sv.managed_session() as sess: with open('acc_mean_loss.rec', 'w') as rec: for epoch in range(1, hp.num_epochs + 1): if sv.should_stop(): break for step in tqdm(range(g.num_batch), total=g.num_batch,
def eval(task_name): # Load graph g = Graph(is_training=False) print("Graph loaded") # Load data test_data = pd.read_csv(hp.testfile) questions, contents, q_lens, p_lens, start_pos, end_pos = load_test_data() raw_passages = list(test_data['content']) reference_answers = list(test_data['answer']) word2idx, idx2word = load_vocabs() # Start session with g.graph.as_default(): sv = tf.train.Supervisor() with sv.managed_session(config=tf.ConfigProto( allow_soft_placement=True)) as sess: ## Restore parameters sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored!") ## Get model name print('Model dir:', hp.logdir) mname = open(hp.logdir + '/checkpoint', 'r').read().split('"')[1] # model name print("Model name:", mname) ## Inference if not os.path.exists('results'): os.mkdir('results') with codecs.open("results/" + mname, "w", "utf-8") as fout: pred_answers, ref_answers = [], [] pred_dict, ref_dict = {}, {} ques_id = 0 eval_dict = { 'bleu_1': [], 'bleu_2': [], 'bleu_3': [], 'bleu_4': [] } for i in range(len(questions) // hp.batch_size): print("Iterator: {} / {}".format( i, len(questions) // hp.batch_size)) ### Get mini-batches q = questions[i * hp.batch_size:(i + 1) * hp.batch_size] p = contents[i * hp.batch_size:(i + 1) * hp.batch_size] q_length = q_lens[i * hp.batch_size:(i + 1) * hp.batch_size] p_length = p_lens[i * hp.batch_size:(i + 1) * hp.batch_size] s_pos = start_pos[i * hp.batch_size:(i + 1) * hp.batch_size] e_pos = end_pos[i * hp.batch_size:(i + 1) * hp.batch_size] passages = raw_passages[i * hp.batch_size:(i + 1) * hp.batch_size] ref_answers = reference_answers[i * hp.batch_size:(i + 1) * hp.batch_size] feed_dict = { g.q: q, g.p: p, g.q_length: q_length, g.p_length: p_length, g.start_label: s_pos, g.end_label: e_pos } start_probs, end_probs = sess.run( [g.start_probs, g.end_probs], feed_dict) ### Write to file for start_prob, end_prob, passage, ref in zip( start_probs, end_probs, passages, ref_answers): pred_span, prob = find_best_answer_for_passage( start_prob, end_prob) pred_answer = passage[pred_span[0]:pred_span[1] + 1] if not len(pred_answer) > 0: continue pred_dict[str(ques_id)] = [pred_answer] ref_dict[str(ques_id)] = [ref] ques_id += 1 fout.write('-ref: ' + ref) fout.write("-pred: " + pred_answer) b1, b2, b3, b4 = bleu(list(pred_answer), list(ref), 1), \ bleu(list(pred_answer), list(ref), 2), \ bleu(list(pred_answer), list(ref), 3), \ bleu(list(pred_answer), list(ref), 4) eval_dict['bleu_1'].append(b1) eval_dict['bleu_2'].append(b2) eval_dict['bleu_3'].append(b3) eval_dict['bleu_2'].append(b2) for metric in eval_dict: fout.write(metric + '\t' + str(np.mean(eval_dict[metric])) + '\n') print(metric + '\t' + str(np.mean(eval_dict[metric])))
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.x, self.label, self.num_batch = get_batch_data() # (N, T) self.y = tf.one_hot(self.label, depth=hp.n_class) else: # inference self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) self.label = tf.placeholder(tf.int32, shape=(None, hp.n_class)) #self.y = tf.placeholder(tf.int32, shape = (None, hp.n_class)) #self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen)) # define decoder inputs #self.decoder_inputs = tf.concat((tf.ones_like(self.y[:, :1])*2, self.y[:, :-1]), -1) # 2:<S> # Load vocabulary word2idx, idx2word = load_vocabs() # Encoder with tf.variable_scope("encoder"): ## Embedding self.enc = embedding(self.x, vocab_size=len(word2idx), num_units=hp.hidden_units, scale=True, scope="enc_embed") ## Positional Encoding if hp.sinusoid: self.enc += positional_encoding(self.x, num_units=hp.hidden_units, zero_pad=False, scale=False, scope="enc_pe") else: self.enc += embedding(tf.tile( tf.expand_dims(tf.range(tf.shape(self.x)[1]), 0), [tf.shape(self.x)[0], 1]), vocab_size=hp.maxlen, num_units=hp.hidden_units, zero_pad=False, scale=False, scope="enc_pe") ## Dropout self.enc = tf.layers.dropout( self.enc, rate=hp.dropout_rate, training=tf.convert_to_tensor(is_training)) ## Blocks for i in range(hp.num_blocks): with tf.variable_scope("num_blocks_{}".format(i)): ### Multihead Attention self.enc = multihead_attention( queries=self.enc, keys=self.enc, num_units=hp.hidden_units, num_heads=hp.num_heads, dropout_rate=hp.dropout_rate, is_training=is_training, causality=False) ### Feed Forward self.enc = feedforward( self.enc, num_units=[4 * hp.hidden_units, hp.hidden_units]) # Final linear projection #print(self.enc.shape) #4, 500, 512 self.enc = tf.reduce_sum(self.enc, axis=2) #4, 500 self.enc = tf.layers.batch_normalization(self.enc, True) self.logits = tf.layers.dense(self.enc, hp.n_class) #4, 2 #print(self.logits.shape) self.preds = tf.to_int32(tf.arg_max(self.logits, dimension=-1)) if is_training: #Accuracy self.cpl = tf.equal(tf.convert_to_tensor(self.label, tf.int32), self.preds) self.cpl = tf.to_int32(self.cpl) self.acc = tf.reduce_sum(self.cpl) / tf.reduce_sum( tf.to_int32(self.y)) tf.summary.scalar('acc', self.acc) # Loss self.y_smoothed = label_smoothing(self.y) self.loss = tf.nn.softmax_cross_entropy_with_logits( logits=self.logits, labels=self.y_smoothed) self.mean_loss = tf.reduce_sum(self.loss) / tf.reduce_sum( self.y) #self.mean_loss = tf.reduce_sum(self.loss*self.istarget) / (tf.reduce_sum(self.istarget)) # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) self.train_op = self.optimizer.minimize( self.mean_loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.mean_loss) self.merged = tf.summary.merge_all()
from data_load import load_vocabs, eval_iterator from model import * if __name__ =='__main__': zeroshot_vocab2int, zeroshot_int2vocab, lang_idx_dict = load_vocabs() x = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen] ) y = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen] ) is_train = tf.constant(False, tf.bool, name='is_train') Model = Transformer(x, y, zeroshot_int2vocab, zeroshot_int2vocab, is_train) with tf.Session() as sess: sess.run([tf.global_variables_initializer(), eval_iterator.initializer, tf.tables_initializer()]) Model.evaluate(sess, eval_iterator)
def __init__(self, is_training=True): self.graph = tf.Graph() with self.graph.as_default(): if is_training: self.q, self.p, self.q_length, self.p_length, \ self.start_label, self.end_label, self.num_batch = get_batch_data() self.dropout_keep_prob = hp.dropout_keep_prob else: # inference self.q = tf.placeholder(tf.int32, [None, hp.q_maxlen]) self.p = tf.placeholder(tf.int32, [None, hp.p_maxlen]) self.q_length = tf.placeholder(tf.int32, [None]) self.p_length = tf.placeholder(tf.int32, [None]) self.start_label = tf.placeholder(tf.int32, [None]) self.end_label = tf.placeholder(tf.int32, [None]) self.dropout_keep_prob = hp.dropout_keep_prob self.l2_loss = tf.constant(0.0) # define decoder input self.decoder_inputs = tf.concat((tf.ones_like(self.p[:, :1])*2, self.p[:, :-1]), -1) # 2:<S> # Load vocabulary word2idx, idx2word = load_vocabs() # initialize transformer transformer = vanilla_transformer(hp, self.is_training) ### encode self.q_encodes, self.p_encodes = transformer.encode(self.q, len(word2idx)), \ transformer.encode(self.q, len(word2idx)) #concated features to attend p with q # first pad q_encodes to the length of p_encodes pad_dim = hp.p_maxlen - hp.q_maxlen pad_ = tf.zeros([tf.shape(self.q_encodes)[0], pad_dim, hp.hidden_units], dtype = self.q_encodes.dtype) self.padded_q_encodes = tf.concat([self.q_encodes, pad_,], 1) #normalization self.padded_q_encodes = normalize(self.padded_q_encodes) # Decoder self.dec = transformer.decode(self.decoder_inputs, self.padded_q_encodes, len(word2idx), hp.p_maxlen) # fix paragraph tensor with self.dec self.p_encodes = self.dec """ The core of RC model, get the question-aware passage encoding """ match_layer = AttentionFlowMatchLayer(hp.hidden_units) self.match_p_encodes, _ = match_layer.match(self.p_encodes, self.q_encodes, self.p_length, self.q_length) # pooling or bi-rnn to fuision passage encodes if hp.Passage_fuse == 'Pooling': #pooling layer self.match_p_encodes = \ tf.keras.layers.MaxPool1D(pool_size=4, strides=None, padding='valid')\ (self.match_p_encodes) self.match_p_encodes = tf.reshape(self.match_p_encodes, [-1, hp.p_maxlen, hp.hidden_units]) #normalization self.match_p_encodes = tf.layers.batch_normalization(self.match_p_encodes) if hp.use_dropout: self.match_p_encodes = tf.nn.dropout(self.match_p_encodes, self.dropout_keep_prob) elif hp.Passage_fuse == 'bi-rnn': self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length, hp.hidden_units, layer_num=1, concat = False) if hp.use_dropout: self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob) decoder = PointerNetDecoder(hp.hidden_units) self.start_probs, self.end_probs = decoder.decode(self.match_p_encodes, self.q_encodes) if is_training: self.start_loss = self.sparse_nll_loss(probs=self.start_probs, labels=self.start_label) self.end_loss = self.sparse_nll_loss(probs=self.end_probs, labels=self.end_label) self.all_params = tf.trainable_variables() self.loss = tf.reduce_mean(tf.add(self.start_loss, self.end_loss)) if hp.weight_decay > 0: with tf.variable_scope('l2_loss'): l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in self.all_params]) self.loss += hp.weight_decay * l2_loss # Training Scheme self.global_step = tf.Variable(0, name='global_step', trainable=False) self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr, beta1=0.9, beta2=0.98, epsilon=1e-8) self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step) # Summary tf.summary.scalar('mean_loss', self.loss) self.merged = tf.summary.merge_all()