Esempio n. 1
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        self.vocab_size = len(
            load_doc_vocab()[0])  # load_doc_vocab returns: de2idx, idx2de

        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.num_batch = get_batch_data()  # (N, T)
            else:  # inference
                self.x = tf.placeholder(tf.int32,
                                        shape=(None, hp.article_maxlen))
                self.y = tf.placeholder(tf.int32,
                                        shape=(None, hp.summary_maxlen))

            self.decoder_inputs = tf.concat(
                (tf.ones_like(self.y[:, :1]) * 2, self.y[:, :-1]),
                -1)  # 2:<S> # define decoder inputs

            self._add_encoder(is_training=is_training)
            self.ml_loss = self._add_ml_loss(is_training=is_training)
            self.loss = self.ml_loss

            if is_training:
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr,
                                                        beta1=0.9,
                                                        beta2=0.98,
                                                        epsilon=1e-8)

                grads_and_vars_ml = self.optimizer.compute_gradients(
                    loss=self.ml_loss)
                grad_ml, vars_ml = zip(
                    *grads_and_vars_ml)  # parse grad and var

                # add gradient clipping
                clipped_grad_ml, globle_norm_ml = tf.clip_by_global_norm(
                    grad_ml, hp.maxgradient)
                self.globle_norm_ml = globle_norm_ml
                self.train_op_ml = self.optimizer.apply_gradients(
                    grads_and_vars=zip(clipped_grad_ml, vars_ml),
                    global_step=self.global_step)
                '''
                # training wihtout gradient clipping
                self.train_op_ml  = self.optimizer.apply_gradients(grads_and_vars=grads_and_vars_ml,
                                                                   global_step=self.global_step)
                '''

                # Summary
                tf.summary.scalar('globle_norm_ml', globle_norm_ml)
                tf.summary.scalar('loss', self.loss)

                self.merged = tf.summary.merge_all()

        self.filewriter = tf.summary.FileWriter(hp.tb_dir + '/train',
                                                self.graph)
Esempio n. 2
0
def train():
    try:
        if not os.path.exists(hp.logdir):
            os.mkdir(hp.logdir)
    except:
        tf.logging.info('making logdir failed')

    # Load vocabulary
    de2idx, idx2de = load_doc_vocab()
    en2idx, idx2en = load_sum_vocab()

    print("Constructing graph...")
    train_g = Graph("train")

    print("Start training...")
    with train_g.graph.as_default():
        saver = tf.train.Saver(max_to_keep=1)
        # sv = tf.train.Supervisor(logdir=hp.logdir, saver=None)
        sv = tf.train.Supervisor(logdir=hp.logdir, saver=saver)

        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)
        config.gpu_options.allow_growth = True

        with sv.managed_session(config=config) as sess:
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            print("Start training: epoches={}, num batch={}".format(hp.num_epochs, train_g.num_batch))
            for epoch in range(1, hp.num_epochs+1):
                print("Starting {}-th epoch".format(epoch))

                if sv.should_stop():
                    break

                for step in range(train_g.num_batch):
                    true_step = step + (epoch - 1) * train_g.num_batch

                    if true_step % hp.train_record_steps == 0:
                        summary, _ = sess.run([train_g.merged, train_g.train_op_ml])
                        train_g.filewriter.add_summary(summary, true_step)
                    else:
                        sess.run(train_g.train_op_ml)

                    if true_step % hp.checkpoint_steps == 0:
                        # sv.saver.save(sess, hp.logdir + '/model_epoch_%02d_step_%d' % (epoch, true_step))
                        saver.save(sess, hp.logdir + '/model_epoch_%02d_step_%d' % (epoch, true_step))

                    if true_step > 0 and true_step % hp.eval_record_steps == 0:
                        eval(cur_step=true_step, write_file=False)

                    # iteration indent
                # epoch indent
                # eval(cur_step=true_step, write_file=True)
    print("Done")
Esempio n. 3
0
def train():
    try:
        if not os.path.exists(hp.logdir):
            tf.logging.info('making logdir')
            os.mkdir(hp.logdir)
    except:
        tf.logging.info('making logdir failed')
        pass

    # Load vocabulary
    de2idx, idx2de = load_doc_vocab()
    en2idx, idx2en = load_sum_vocab()

    print("Constructing graph...")
    train_g = Graph("train")

    print("Start training...")
    with train_g.graph.as_default():
        sv = tf.train.Supervisor(logdir=hp.logdir)
        config = tf.ConfigProto(allow_soft_placement=True,
                                log_device_placement=False)
        config.gpu_options.allow_growth = True

        with sv.managed_session(config=config) as sess:
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            print("Start training: epoches={}, num batch={}".format(
                hp.num_epochs, train_g.num_batch))
            for epoch in range(1, hp.num_epochs + 1):
                print("Starting {}-th epoch".format(epoch))

                if epoch == 1:
                    sess.run(train_g.eta.initializer)  # explicitly init eta
                    train_g.subset_saver.restore(
                        sess, tf.train.latest_checkpoint(hp.pretrain_logdir))
                    print("Restored previous training model!")

                # not train the RL part in frist num_ml_epoch session for efficiency
                if epoch <= hp.num_ml_epoch:
                    train_op = train_g.train_op_ml
                else:
                    train_op = train_g.train_op_mix

                    cur_eta = sess.run(train_g.eta)
                    '''
                    if cur_eta <= 0.9:
                        sess.run(train_g.update_eta)
                        print("increasing eta by 0.1, current eta = {} ".format(cur_eta + 0.1))
                    '''
                    print(
                        "not increasing eta: current eta = {}".format(cur_eta))
                if sv.should_stop():
                    break

                for step in range(train_g.num_batch):
                    true_step = step + (epoch - 1) * train_g.num_batch

                    if true_step % hp.train_record_steps == 0:
                        outp = [
                            train_g.loss, train_g.acc, train_g.rouge,
                            train_g.reward_diff, train_g.clipped_reward_diff,
                            train_g.globle_norm_ml, train_op, train_g.merged
                        ]
                        loss, acc, rouge, reward_diff, clipped_reward_diff, norm_ml, _, summary = sess.run(
                            outp)

                        # visualize
                        nsml.report(
                            step=true_step,
                            train_loss=float(loss),
                            train_accuracy=float(acc),
                            rouge=float(rouge),
                            norm_ml=float(norm_ml),
                            reward_diff=float(reward_diff),
                            clipped_reward_diff=float(clipped_reward_diff))
                        train_g.filewriter.add_summary(summary, true_step)

                    else:
                        sess.run(train_op)

                    if true_step % hp.checkpoint_steps == 0:
                        sv.saver.save(
                            sess, hp.logdir + '/model_epoch_%02d_step_%d' %
                            (epoch, true_step))

                    if true_step > 0 and true_step % hp.eval_record_steps == 0:
                        eval(cur_step=true_step, write_file=False)
                        # nsml.report(step=true_step, blue_score=float(blue_score))

                    # iteration indent
                # epoch indent
                eval(cur_step=true_step, write_file=True)
    print("Done")
Esempio n. 4
0
def eval(type='eval', cur_step=0, write_file=True):
    # Load graph
    g = Graph(is_training=False)
    print("Eval Graph loaded")

    # Load data
    # X, Sources, Targets = load_data(type='eval')
    X, Sources, Targets = load_data(type=type)

    de2idx, idx2de = load_doc_vocab()
    word2idx, idx2word = load_sum_vocab()

    # Start session
    with g.graph.as_default():
        sv = tf.train.Supervisor()
        with sv.managed_session(config=tf.ConfigProto(
                allow_soft_placement=True)) as sess:
            sv.saver.restore(sess, tf.train.latest_checkpoint(hp.logdir))
            print("Restored!")

            mname = open(hp.logdir + '/checkpoint',
                         'r').read().split('"')[1]  # model name

            ## Inference
            if not os.path.exists('results'): os.mkdir('results')

            if write_file:
                ### Write to file
                fout2 = codecs.open("results/eval-pred", "w", "utf-8")
                fout3 = codecs.open("results/eval-title", "w", "utf-8")
                fout = codecs.open("results/eval-" + type + "_" + mname, "w",
                                   "utf-8")

            list_of_refs, hypotheses = [], []
            num_batch = len(X) // hp.batch_size
            print("num batch: ", num_batch, "len(X): ", len(X))
            for i in range(num_batch):
                ### Get mini-batches
                x = X[i * hp.batch_size:(i + 1) * hp.batch_size]
                sources = Sources[i * hp.batch_size:(i + 1) * hp.batch_size]
                targets = Targets[i * hp.batch_size:(i + 1) * hp.batch_size]

                ### Autoregressive inference
                preds = np.zeros((hp.batch_size, hp.summary_maxlen), np.int32)
                for j in range(hp.summary_maxlen):
                    _preds = sess.run(g.preds, {g.x: x, g.y: preds})
                    preds[:, j] = _preds[:, j]

                for source, target, pred in zip(sources, targets,
                                                preds):  # sentence-wise
                    got = " ".join(idx2word[idx]
                                   for idx in pred).split("</S>")[0].strip()

                    if write_file:
                        sentence_to_write = "-source: {}\n-expected: {}\n-got: {}\n\n".format(
                            source, target, got)

                        print(sentence_to_write)
                        fout.write(sentence_to_write)
                        fout2.write(got.strip() + '\n')
                        fout3.write(target.strip() + '\n')

                        fout.flush()
                        fout2.flush()
                        fout3.flush()

                    # bleu score
                    ref = target.split()
                    hypothesis = got.split()
                    if len(ref) > 3 and len(hypothesis) > 3:
                        list_of_refs.append(ref)
                        hypotheses.append(hypothesis)

            ## Calculate bleu score and rouge
            rouge = rouge_l_sentence_level(hypotheses, list_of_refs)
            # (eval_sentences, ref_sentences):

            rouge = np.mean(rouge)
            nsml.report(step=cur_step, eval_rouge=float(rouge))

    return None
Esempio n. 5
0
    def __init__(self, is_training=True):
        self.graph = tf.Graph()
        # de2idx, idx2de = load_doc_vocab()
        # self.vocab_size = len(de2idx)
        self.vocab_size = len(load_doc_vocab()[0])

        with self.graph.as_default():
            if is_training:
                self.x, self.y, self.num_batch = get_batch_data()  # (N, T)
            else:  # inference
                self.x = tf.placeholder(tf.int32,
                                        shape=(None, hp.article_maxlen))
                self.y = tf.placeholder(tf.int32,
                                        shape=(None, hp.summary_maxlen))

            self.decoder_inputs = tf.concat(
                (tf.ones_like(self.y[:, :1]) * 2, self.y[:, :-1]),
                -1)  # 2:<S> # define decoder inputs

            self._add_encoder(is_training=is_training)
            self.ml_loss = self._add_ml_loss(is_training=is_training)

            if is_training:
                self.eta = tf.Variable(initial_value=hp.eta_init,
                                       dtype=tf.float32,
                                       trainable=False,
                                       name='eta')
                # disallow eta to be updated by loss
                self.update_eta = tf.assign(self.eta, self.eta + 0.1)

                self.rl_loss = self._add_rl_loss()
                self.loss = self.eta * self.rl_loss + (1 -
                                                       self.eta) * self.ml_loss

                # Training Scheme
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                self.optimizer = tf.train.AdamOptimizer(learning_rate=hp.lr,
                                                        beta1=0.9,
                                                        beta2=0.98,
                                                        epsilon=1e-8)

                grads_and_vars_mix = self.optimizer.compute_gradients(
                    loss=self.loss)
                grads_and_vars_ml = self.optimizer.compute_gradients(
                    loss=self.ml_loss)

                grad_mix, vars_mix = zip(
                    *grads_and_vars_mix)  # parse grad and var
                grad_ml, vars_ml = zip(
                    *grads_and_vars_ml)  # parse grad and var

                # add gradient clipping
                clipped_grad_mix, globle_norm_mix = tf.clip_by_global_norm(
                    grad_mix, hp.maxgradient)
                clipped_grad_ml, globle_norm_ml = tf.clip_by_global_norm(
                    grad_ml, hp.maxgradient)
                self.globle_norm_ml = globle_norm_ml
                self.train_op_mix = self.optimizer.apply_gradients(
                    grads_and_vars=zip(clipped_grad_mix, vars_mix),
                    global_step=self.global_step)
                self.train_op_ml = self.optimizer.apply_gradients(
                    grads_and_vars=zip(clipped_grad_ml, vars_ml),
                    global_step=self.global_step)
                '''
                # below: training wihtout gradient clipping
                self.train_op_mix = self.optimizer.apply_gradients(grads_and_vars=grads_and_vars_mix,
                                                                   global_step=self.global_step)
                self.train_op_ml  = self.optimizer.apply_gradients(grads_and_vars=grads_and_vars_ml,
                                                                   global_step=self.global_step)
                '''

                # Summary
                tf.summary.scalar('globle_norm_ml', globle_norm_ml)
                tf.summary.histogram(name='reward_diff',
                                     values=self.reward_diff)
                tf.summary.histogram(name='clipped_reward_diff',
                                     values=self.clipped_reward_diff)
                tf.summary.scalar('rl_loss', self.rl_loss)
                tf.summary.scalar('ml_loss', self.ml_loss)
                tf.summary.scalar('loss', self.loss)
                self.merged = tf.summary.merge_all()

                # prepare the Saver that restore all variables other than eta
                all_var = tf.get_collection(key=tf.GraphKeys.GLOBAL_VARIABLES)
                all_var.remove(self.eta)
                self.subset_saver = tf.train.Saver(var_list=all_var)

        self.filewriter = tf.summary.FileWriter(hp.tb_dir + '/train',
                                                self.graph)