def main():
    zeroshot_vocab2int, zeroshot_int2vocab, lang_idx_dict = load_vocabs()
    x = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen])
    y = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen])
    is_train = tf.constant(False, tf.bool, name='is_train')
    model = Transformer(x, y, zeroshot_int2vocab, zeroshot_int2vocab, is_train)

    with tf.Session() as sess:

        sess.run([
            tf.global_variables_initializer(), eval_iterator.initializer,
            tf.tables_initializer()
        ])

        model.evaluate(sess, eval_iterator)
예제 #2
0
def eval(task_name):
    # Load graph
    g = Graph(is_training=False)
    print("Graph loaded")

    # Load data
    X, _, Texts, Labels = load_test_data()

    word2idx, idx2word = load_vocabs()

    # Start session
    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ## Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")

            ## Get model name
            print('Model dir:', hp.logdir)
            mname = open(hp.logdir + '/checkpoint',
                         'r').read().split('"')[1]  # model name
            print("Model name:", mname)

            ## Inference
            if not os.path.exists('results'): os.mkdir('results')
            with codecs.open("results/" + mname, "w", "utf-8") as fout:
                list_of_refs, hypotheses = [], []
                print("Iterator:", len(X), hp.batch_size)

                predict_label = []
                for i in range(len(X) // hp.batch_size + 1):
                    print('Step:\t', i, '/', len(X) // hp.batch_size)
                    ### Get mini-batches
                    x = X[i * hp.batch_size:(i + 1) * hp.batch_size]
                    sentences = Texts[i * hp.batch_size:(i + 1) *
                                      hp.batch_size]
                    labels = Labels[i * hp.batch_size:(i + 1) * hp.batch_size]

                    preds = sess.run(g.preds, {g.x: x})
                    preds = [int(x) for x in preds]
                    predict_label.extend(preds)

                    ### Write to file
                    for sent, label, pred in zip(sentences, labels,
                                                 preds):  # sentence-wise
                        #got = " ".join(idx2word[idx] for idx in pred).split("</S>")[0].strip()
                        fout.write("- sent: " + sent + "\n")
                        fout.write('- label: {}, -predict: {} \n'.format(
                            label, pred))
                        fout.flush()

                        # bleu score
                        if task_name == 'seq2seq':
                            ref = target.split()
                            hypothesis = got.split()
                            if len(ref) > 3 and len(hypothesis) > 3:
                                list_of_refs.append([ref])
                                hypotheses.append(hypothesis)

                ## Calculate bleu score
                if task_name == 'seq2seq':
                    score = corpus_bleu(list_of_refs, hypotheses)
                    fout.write("Bleu Score = " + str(100 * score))
                elif task_name == 'classfication':
                    assert len(Labels) == len(
                        predict_label), 'The length of label and predicts\
                        are not alignmentted.'

                res = classification_report(Labels, predict_label)
                print(res)
                fout.write(res + '\n')
예제 #3
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x1, self.x2, self.y, self.num_batch = get_batch_data()
                #self.x, self.label, self.num_batch = get_batch_data() # (N, T)
                #self.y = tf.one_hot(self.label, depth = hp.n_class)

            else:  # inference
                self.x1 = tf.placeholder(tf.int32, shape=(None, hp.maxlen))
                self.x2 = tf.placeholder(tf.int32, shape=(None, hp.maxlen))
                #self.label = tf.placeholder(tf.int32, shape = (None, hp.n_class))
                #self.y = tf.placeholder(tf.int32, shape = (None, hp.n_class))
                #self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen))

            self.l2_loss = tf.constant(0.0)
            # define decoder inputs
            #for sentence relationship learning task we want to encoder sent1 to e1, then decoder(e1 + sent2)
            #to get a more sementic relationship across corpus
            self.decoder_inputs = tf.concat(
                (tf.ones_like(self.x2[:, :1]) * 2, self.x2[:, :-1]),
                -1)  # 2:<S>

            # Load vocabulary
            word2idx, idx2word = load_vocabs()

            # initialize transformer
            transformer = vanilla_transformer(hp, self.is_training)

            #encode
            self.encode1, self.encode2 = transformer.encode(self.x1, len(word2idx)), \
                transformer.encode(self.x2, len(word2idx))

            #concated
            self.enc = tf.divide(tf.add(self.encode1, encode2), 2)
            self.enc = normalize(self.enc)

            #for sentence relationship learning task we want to encoder sent1 to e1, then decoder(e1 + sent2)
            #to get a more sementic relationship across corpus

            # Decoder
            self.dec = transformer.decode(self.decoder_inputs, self.enc,
                                          len(word2idx), hp.p_maxlen)

            self.logits = tf.add(self.enc, tf.multiply(self.enc, self.dec))
            #self.logits = self.enc

            #self.logits = tf.layers.dense(self.logits, 64, activation = 'tanh')
            self.logits = tf.layers.flatten(self.logits)
            #self.logits = tf.reshape(self.logits, [64, -1])
            self.h_drop = tf.nn.dropout(self.logits, hp.dropout_keep_prob)

            with tf.name_scope("output_logit"):
                W = tf.get_variable(
                    "W",
                    shape=[hp.maxlen * hp.hidden_units,
                           len(hp.relations)],
                    initializer=tf.contrib.layers.xavier_initializer())

                b = tf.Variable(tf.constant(0.1, shape=[len(hp.relations)]),
                                name="b")
                self.l2_loss += tf.nn.l2_loss(W)
                self.l2_loss += tf.nn.l2_loss(b)
                self.logits = tf.nn.xw_plus_b(self.h_drop, W, b, name="logit")
                #self.preds = tf.argmax(self.scores, 1, name="predictions")

            self.preds = tf.to_int32(tf.argmax(self.logits, dimension=-1))

            if is_training:
                self.y_hotting = tf.one_hot(self.y, depth=len(hp.relations))

                #Accuracy
                self.cpl = tf.equal(tf.convert_to_tensor(self.y, tf.int32),
                                    self.preds)
                self.cpl = tf.to_int32(self.cpl)
                self.acc = tf.reduce_sum(self.cpl) / tf.to_int32(
                    tf.reduce_sum(self.y_hotting))
                tf.summary.scalar('acc', self.acc)

                # Loss
                #self.y_smoothed = label_smoothing(self.y_hotting)
                self.loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.logits, labels=self.y_hotting)
                self.mean_loss = (tf.reduce_sum(
                    self.loss) + self.l2_loss * hp.reg_lambda) / tf.reduce_sum(
                        self.y_hotting)

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr,
                                                        beta1=0.9,
                                                        beta2=0.98,
                                                        epsilon=1e-8)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()
예제 #4
0
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr,
                                                        beta1=0.9,
                                                        beta2=0.98,
                                                        epsilon=1e-8)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()


if __name__ == '__main__':
    # Load vocabulary
    word2idx, idx2word = load_vocabs()

    # Construct graph
    g = Graph("train")
    print("Graph loaded")

    # Start session
    sv = tf.train.Supervisor(graph=g.graph,
                             logdir=hp.logdir,
                             save_model_secs=0)
    with sv.managed_session() as sess:
        with open('acc_mean_loss.rec', 'w') as rec:
            for epoch in range(1, hp.num_epochs + 1):
                if sv.should_stop(): break
                for step in tqdm(range(g.num_batch),
                                 total=g.num_batch,
예제 #5
0
def eval(task_name):
    # Load graph
    g = Graph(is_training=False)
    print("Graph loaded")

    # Load data
    test_data = pd.read_csv(hp.testfile)
    questions, contents, q_lens, p_lens, start_pos, end_pos = load_test_data()
    raw_passages = list(test_data['content'])
    reference_answers = list(test_data['answer'])

    word2idx, idx2word = load_vocabs()

    # Start session
    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            ## Restore parameters
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")

            ## Get model name
            print('Model dir:', hp.logdir)
            mname = open(hp.logdir + '/checkpoint',
                         'r').read().split('"')[1]  # model name
            print("Model name:", mname)

            ## Inference
            if not os.path.exists('results'): os.mkdir('results')
            with codecs.open("results/" + mname, "w", "utf-8") as fout:

                pred_answers, ref_answers = [], []
                pred_dict, ref_dict = {}, {}
                ques_id = 0
                eval_dict = {
                    'bleu_1': [],
                    'bleu_2': [],
                    'bleu_3': [],
                    'bleu_4': []
                }

                for i in range(len(questions) // hp.batch_size):
                    print("Iterator: {} / {}".format(
                        i,
                        len(questions) // hp.batch_size))

                    ### Get mini-batches
                    q = questions[i * hp.batch_size:(i + 1) * hp.batch_size]
                    p = contents[i * hp.batch_size:(i + 1) * hp.batch_size]
                    q_length = q_lens[i * hp.batch_size:(i + 1) *
                                      hp.batch_size]
                    p_length = p_lens[i * hp.batch_size:(i + 1) *
                                      hp.batch_size]
                    s_pos = start_pos[i * hp.batch_size:(i + 1) *
                                      hp.batch_size]
                    e_pos = end_pos[i * hp.batch_size:(i + 1) * hp.batch_size]
                    passages = raw_passages[i * hp.batch_size:(i + 1) *
                                            hp.batch_size]
                    ref_answers = reference_answers[i * hp.batch_size:(i + 1) *
                                                    hp.batch_size]

                    feed_dict = {
                        g.q: q,
                        g.p: p,
                        g.q_length: q_length,
                        g.p_length: p_length,
                        g.start_label: s_pos,
                        g.end_label: e_pos
                    }

                    start_probs, end_probs = sess.run(
                        [g.start_probs, g.end_probs], feed_dict)

                    ### Write to file
                    for start_prob, end_prob, passage, ref in zip(
                            start_probs, end_probs, passages, ref_answers):
                        pred_span, prob = find_best_answer_for_passage(
                            start_prob, end_prob)
                        pred_answer = passage[pred_span[0]:pred_span[1] + 1]

                        if not len(pred_answer) > 0: continue

                        pred_dict[str(ques_id)] = [pred_answer]
                        ref_dict[str(ques_id)] = [ref]
                        ques_id += 1

                        fout.write('-ref: ' + ref)
                        fout.write("-pred: " + pred_answer)

                        b1, b2, b3, b4 = bleu(list(pred_answer), list(ref), 1), \
                                         bleu(list(pred_answer), list(ref), 2), \
                                         bleu(list(pred_answer), list(ref), 3), \
                                         bleu(list(pred_answer), list(ref), 4)

                        eval_dict['bleu_1'].append(b1)
                        eval_dict['bleu_2'].append(b2)
                        eval_dict['bleu_3'].append(b3)
                        eval_dict['bleu_2'].append(b2)

                for metric in eval_dict:
                    fout.write(metric + '\t' +
                               str(np.mean(eval_dict[metric])) + '\n')
                    print(metric + '\t' + str(np.mean(eval_dict[metric])))
예제 #6
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.x, self.label, self.num_batch = get_batch_data()  # (N, T)
                self.y = tf.one_hot(self.label, depth=hp.n_class)
            else:  # inference
                self.x = tf.placeholder(tf.int32, shape=(None, hp.maxlen))
                self.label = tf.placeholder(tf.int32, shape=(None, hp.n_class))
                #self.y = tf.placeholder(tf.int32, shape = (None, hp.n_class))
                #self.y = tf.placeholder(tf.int32, shape=(None, hp.maxlen))

            # define decoder inputs
            #self.decoder_inputs = tf.concat((tf.ones_like(self.y[:, :1])*2, self.y[:, :-1]), -1) # 2:<S>

            # Load vocabulary
            word2idx, idx2word = load_vocabs()

            # Encoder
            with tf.variable_scope("encoder"):
                ## Embedding
                self.enc = embedding(self.x,
                                     vocab_size=len(word2idx),
                                     num_units=hp.hidden_units,
                                     scale=True,
                                     scope="enc_embed")

                ## Positional Encoding
                if hp.sinusoid:
                    self.enc += positional_encoding(self.x,
                                                    num_units=hp.hidden_units,
                                                    zero_pad=False,
                                                    scale=False,
                                                    scope="enc_pe")
                else:
                    self.enc += embedding(tf.tile(
                        tf.expand_dims(tf.range(tf.shape(self.x)[1]), 0),
                        [tf.shape(self.x)[0], 1]),
                                          vocab_size=hp.maxlen,
                                          num_units=hp.hidden_units,
                                          zero_pad=False,
                                          scale=False,
                                          scope="enc_pe")

                ## Dropout
                self.enc = tf.layers.dropout(
                    self.enc,
                    rate=hp.dropout_rate,
                    training=tf.convert_to_tensor(is_training))

                ## Blocks
                for i in range(hp.num_blocks):
                    with tf.variable_scope("num_blocks_{}".format(i)):
                        ### Multihead Attention
                        self.enc = multihead_attention(
                            queries=self.enc,
                            keys=self.enc,
                            num_units=hp.hidden_units,
                            num_heads=hp.num_heads,
                            dropout_rate=hp.dropout_rate,
                            is_training=is_training,
                            causality=False)

                        ### Feed Forward
                        self.enc = feedforward(
                            self.enc,
                            num_units=[4 * hp.hidden_units, hp.hidden_units])

            # Final linear projection
            #print(self.enc.shape) #4, 500, 512
            self.enc = tf.reduce_sum(self.enc, axis=2)  #4, 500
            self.enc = tf.layers.batch_normalization(self.enc, True)
            self.logits = tf.layers.dense(self.enc, hp.n_class)  #4, 2
            #print(self.logits.shape)
            self.preds = tf.to_int32(tf.arg_max(self.logits, dimension=-1))

            if is_training:
                #Accuracy
                self.cpl = tf.equal(tf.convert_to_tensor(self.label, tf.int32),
                                    self.preds)
                self.cpl = tf.to_int32(self.cpl)
                self.acc = tf.reduce_sum(self.cpl) / tf.reduce_sum(
                    tf.to_int32(self.y))
                tf.summary.scalar('acc', self.acc)

                # Loss
                self.y_smoothed = label_smoothing(self.y)
                self.loss = tf.nn.softmax_cross_entropy_with_logits(
                    logits=self.logits, labels=self.y_smoothed)
                self.mean_loss = tf.reduce_sum(self.loss) / tf.reduce_sum(
                    self.y)
                #self.mean_loss = tf.reduce_sum(self.loss*self.istarget) / (tf.reduce_sum(self.istarget))

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr,
                                                        beta1=0.9,
                                                        beta2=0.98,
                                                        epsilon=1e-8)
                self.train_op = self.optimizer.minimize(
                    self.mean_loss, global_step=self.global_step)

                # Summary
                tf.summary.scalar('mean_loss', self.mean_loss)
                self.merged = tf.summary.merge_all()
from data_load import load_vocabs, eval_iterator
from model import *


if __name__ =='__main__':
    
    zeroshot_vocab2int, zeroshot_int2vocab, lang_idx_dict = load_vocabs()
    x = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen] )
    y = tf.placeholder(tf.int32, shape=[hp.batch_size, hp.maxlen] )
    is_train = tf.constant(False, tf.bool, name='is_train')
    Model = Transformer(x, y, zeroshot_int2vocab, zeroshot_int2vocab, is_train)

    with tf.Session() as sess:
        sess.run([tf.global_variables_initializer(), eval_iterator.initializer, tf.tables_initializer()])
        Model.evaluate(sess, eval_iterator)
예제 #8
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        with self.graph.as_default():
            if is_training:
                self.q, self.p, self.q_length, self.p_length, \
                self.start_label, self.end_label, self.num_batch = get_batch_data() 
                self.dropout_keep_prob = hp.dropout_keep_prob

            else: # inference
                self.q = tf.placeholder(tf.int32, [None, hp.q_maxlen])
                self.p = tf.placeholder(tf.int32, [None, hp.p_maxlen])
                self.q_length = tf.placeholder(tf.int32, [None])
                self.p_length = tf.placeholder(tf.int32, [None])
                self.start_label = tf.placeholder(tf.int32, [None])
                self.end_label = tf.placeholder(tf.int32, [None])

            self.dropout_keep_prob = hp.dropout_keep_prob
            self.l2_loss = tf.constant(0.0)
            # define decoder input
            self.decoder_inputs = tf.concat((tf.ones_like(self.p[:, :1])*2, self.p[:, :-1]), -1) # 2:<S>

            # Load vocabulary    
            word2idx, idx2word = load_vocabs()

            # initialize transformer
            transformer = vanilla_transformer(hp, self.is_training)
            ### encode
            self.q_encodes, self.p_encodes = transformer.encode(self.q, len(word2idx)), \
                transformer.encode(self.q, len(word2idx))

            #concated features to attend p with q
            # first pad q_encodes to the length of p_encodes
            pad_dim = hp.p_maxlen - hp.q_maxlen
            pad_ = tf.zeros([tf.shape(self.q_encodes)[0], pad_dim, hp.hidden_units], dtype = self.q_encodes.dtype)
            self.padded_q_encodes = tf.concat([self.q_encodes, pad_,], 1)
            #normalization
            self.padded_q_encodes = normalize(self.padded_q_encodes)

            # Decoder
            self.dec = transformer.decode(self.decoder_inputs, self.padded_q_encodes, len(word2idx), hp.p_maxlen)

            # fix paragraph tensor with self.dec
            self.p_encodes = self.dec

            """
            The core of RC model, get the question-aware passage encoding
            """
            match_layer = AttentionFlowMatchLayer(hp.hidden_units)
            self.match_p_encodes, _ = match_layer.match(self.p_encodes, self.q_encodes,
                                                        self.p_length, self.q_length)

            # pooling or bi-rnn to fuision passage encodes
            if hp.Passage_fuse == 'Pooling':
                #pooling layer
                self.match_p_encodes = \
                tf.keras.layers.MaxPool1D(pool_size=4, strides=None, padding='valid')\
                                        (self.match_p_encodes)

                self.match_p_encodes = tf.reshape(self.match_p_encodes, [-1, hp.p_maxlen, hp.hidden_units])
                #normalization
                self.match_p_encodes = tf.layers.batch_normalization(self.match_p_encodes)
                if hp.use_dropout:
                    self.match_p_encodes = tf.nn.dropout(self.match_p_encodes, self.dropout_keep_prob)
            elif hp.Passage_fuse == 'bi-rnn':
                self.fuse_p_encodes, _ = rnn('bi-lstm', self.match_p_encodes, self.p_length,
                                             hp.hidden_units, layer_num=1, concat = False)
                if hp.use_dropout:
                    self.fuse_p_encodes = tf.nn.dropout(self.fuse_p_encodes, self.dropout_keep_prob)


            decoder = PointerNetDecoder(hp.hidden_units)
            self.start_probs, self.end_probs = decoder.decode(self.match_p_encodes,
                                                              self.q_encodes)

                
            if is_training:  
                self.start_loss = self.sparse_nll_loss(probs=self.start_probs, labels=self.start_label)
                self.end_loss = self.sparse_nll_loss(probs=self.end_probs, labels=self.end_label)
                self.all_params = tf.trainable_variables()
                self.loss = tf.reduce_mean(tf.add(self.start_loss, self.end_loss))
                if hp.weight_decay > 0:
                    with tf.variable_scope('l2_loss'):
                        l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in self.all_params])
                    self.loss += hp.weight_decay * l2_loss



                # Training Scheme
                self.global_step = tf.Variable(0, name='global_step', trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr, beta1=0.9, beta2=0.98, epsilon=1e-8)
                self.train_op = self.optimizer.minimize(self.loss, global_step=self.global_step)
                   
                # Summary 
                tf.summary.scalar('mean_loss', self.loss)
                self.merged = tf.summary.merge_all()