Example #1
0
def main(_):
    """Print out the FLAGS in the main function."""
    logging.info("param = %s", FLAGS.param)
    if FLAGS.action == "echo":
        logging.warning(FLAGS.echo_text)
    elif FLAGS.action == "echo_bool":
        logging.info("Just do it? %s", "Yes!" if FLAGS.just_do_it else "No :(")
Example #2
0
 def test_integration_log(self):  # pylint: disable=no-self-use
     """This test simply should not return any error."""
     logging.info("This is a info test message.")
     logging.warning("This is a warning test message.")
     logging.error("This is an error message.")
     logging.log(logging.INFO, "This is just another info test message.")
     logging.debug("This is a debug test message.")
Example #3
0
def read_data(source_path, buckets, max_size=None):
    """Read data from source and target files and put into buckets.

    Args:
        source_path: path to the files with token-ids for the source language.
        max_size: maximum number of lines to read, all other will be ignored;
            if 0 or None, data files will be read completely (no limit).

    Returns:
        data_set: a list of length len(_buckets); data_set[n] contains a list of
            (source, target) pairs read from the provided data files that fit
            into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and
            len(target) < _buckets[n][1]; source and target are lists of token-ids.
    """
    data_set = [[] for _ in buckets]
    with tf.gfile.GFile(source_path, mode="r") as source_file:
        source = source_file.readline()
        counter = 0
        while source and (not max_size or counter < max_size):
            counter += 1
            if counter % 100000 == 0:
                logging.info("  reading data line %d" % counter)
                sys.stdout.flush()
            source_ids = [int(x) for x in source.split()]
            target_ids = [int(x) for x in source.split()]
            target_ids.append(EOS_ID)
            for bucket_id, (source_size, target_size) in enumerate(buckets):
                if len(source_ids) < source_size and len(
                        target_ids) < target_size:
                    data_set[bucket_id].append([source_ids, target_ids])
                    break
            source = source_file.readline()
    return data_set
 def load_model_from_files(  # pylint: disable=too-many-arguments
         cls,
         model_file,
         checkpoint_dir,
         forward_only,
         restore_all_vars=True,
         pretrain_model_path="",
         hparams_dict={},
         sess=None):
     """Load model from file."""
     hparams = build_base_hparams()
     if os.path.exists(model_file):
         logging.info("Loading seq2seq model definition from %s..." %
                      model_file)
         with open(model_file, "r") as fobj:
             model_dict = json.load(fobj)
         model_dict["buckets"] = [
             tuple(_bucket) for _bucket in model_dict["buckets"]
         ]
         hparams.set_from_map(model_dict)
     else:
         logging.info("Initializing a fresh training...")
     hparams.set_from_map(hparams_dict)
     model = cls(hparams, forward_only)
     # Load model weights.
     ckpt = tf.train.get_checkpoint_state(checkpoint_dir)
     sess = sess or tf.get_default_session()
     if pretrain_model_path:
         if tf.gfile.IsDirectory(pretrain_model_path):
             pretrain_model_path = os.path.join(pretrain_model_path,
                                                "weights")
             pretrain_ckpt = tf.train.get_checkpoint_state(
                 pretrain_model_path)
             pretrain_model_path = pretrain_ckpt.model_checkpoint_path
         logging.info(
             "Loading pretrained model weights from checkpoint: %s" %
             pretrain_model_path)
         if restore_all_vars:
             model.saver_sup.restore(sess, pretrain_model_path)
         else:
             # This is an ugly workaround to load pretrain model for part of
             # the models.
             sess.run(tf.global_variables_initializer())
             model.saver_unsup.restore(sess, pretrain_model_path)
     elif ckpt:
         logging.info("Loading model weights from checkpoint_dir: %s" %
                      checkpoint_dir)
         if restore_all_vars:
             model.saver_sup.restore(sess, ckpt.model_checkpoint_path)
         else:
             sess.run(tf.global_variables_initializer())
             model.saver_unsup.restore(sess, ckpt.model_checkpoint_path)
     else:
         logging.info("Initialize fresh parameters...")
         sess.run(tf.global_variables_initializer())
     return model
Example #5
0
def train(hparams, data_hparams):
    vocab = Vocabulary.get_default_vocab(not data_hparams.skip_at_symbol)
    # Create global step variable first.

    train_data, val_data, test_data = make_train_data(
        json.loads(FLAGS.dataset_spec), vocab, data_hparams, FLAGS.epochs)
    model = DiscoveryModel(data_hparams, hparams, vocab)
    train_outputs, _, _ = model.build_train_graph(train_data)
    seq_loss_op, train_op = model.build_train_loss(train_data, train_outputs)
    with tf.control_dependencies([val_data.initializer,
                                  test_data.initializer]):
        _, val_ctr_smile_op, val_sampled_smiles_op = model.build_val_net(
            val_data.get_next())
        model.build_test_net(val_ctr_smile_op, val_sampled_smiles_op,
                             test_data.get_next())

    train_summary_ops = tf.summary.merge(tf.get_collection("train_summaries"))
    val_summary_ops = tf.summary.merge(tf.get_collection("val_summaries"))
    test_summary_ops = tf.summary.merge(tf.get_collection("test_summaries"))

    stale_global_step_op = tf.train.get_or_create_global_step()
    with tf.train.MonitoredTrainingSession(
            checkpoint_dir=FLAGS.train_dir or None,
            save_checkpoint_steps=FLAGS.steps_per_checkpoint or None,
            log_step_count_steps=FLAGS.steps_per_checkpoint or None) as sess:
        if FLAGS.train_dir:
            summary_writer = tf.summary.FileWriterCache.get(FLAGS.train_dir)
        else:
            summary_writer = None
        # step = 0
        while not sess.should_stop():
            # while step < 10:
            #     step += 1
            stale_global_step, seq_loss, _, train_summary = sess.run([
                stale_global_step_op, seq_loss_op, train_op, train_summary_ops
            ])
            if summary_writer is not None:
                summary_writer.add_summary(train_summary, stale_global_step)
            # Run validation and test.
            # Trigger test events.
            if stale_global_step % FLAGS.steps_per_checkpoint == 0:
                # if True:
                try:
                    sess.run([val_data.initializer, test_data.initializer])
                    _, _ = sess.run([val_summary_ops, test_summary_ops])
                    # The monitored training session will pick up the summary
                    # and automatically add them.
                except Exception as ex:
                    logging.error(str(ex))
                    raise
                except tf.errors.OutOfRangeError:
                    logging.info("Test finished. Continue training.")
                    continue
        logging.info("Coordinator request to stop.")
Example #6
0
def main(_):
    """Entry function for the script."""
    if FLAGS.action == "sample":
        raise NotImplementedError
    elif FLAGS.action == "fp":
        result = []
        for _ in range(FLAGS.repeat_num):
            tf.reset_default_graph()
            result.append(fp_decode())
        em_acc, acc = zip(*result)
        logging.info("EM Acc: %s" % ", ".join(["%.8f" % x for x in em_acc]))
        logging.info("Acc: %s" % ", ".join(["%.8f" % x for x in acc]))
    else:
        print("Unsupported action: %s" % FLAGS.action)
Example #7
0
def read_data_labels(source_path,
                     label_path,
                     reg_flag,
                     num_prop,
                     buckets,
                     max_size=None):  # pylint: disable=too-many-locals
    """Read data from source and target files and put into buckets.

    Args:
        source_path: path to the files with token-ids for the source language.
        label_path: path to the labels
        max_size: maximum number of lines to read, all other will be ignored;
            if 0 or None, data files will be read completely (no limit).

    Returns:
        data_set: a list of length len(_buckets); data_set[n] contains a list of
            (source, target) pairs read from the provided data files that fit
            into the n-th bucket, i.e., such that len(source) < _buckets[n][0] and
            len(target) < _buckets[n][1]; source and target are lists of token-ids.
    """
    data_set = [[] for _ in buckets]
    with tf.gfile.GFile(source_path) as source_file, tf.gfile.GFile(
            label_path) as label_file:  # pylint: disable=bad-continuation
        source = source_file.readline().strip()
        label = label_file.readline().strip()
        counter = 0
        while (source and label) and (not max_size or counter < max_size):
            counter += 1
            if counter % 100000 == 0:
                logging.info("  reading data line %d" % counter)
                sys.stdout.flush()
            source_ids = [int(x) for x in source.split()]
            target_ids = [int(x) for x in source.split()]
            if reg_flag:
                if num_prop > 1:
                    label_ids = [float(x) for x in label.split()]
                else:
                    label_ids = float(label)
            else:
                label_ids = int(label)
            target_ids.append(EOS_ID)
            for bucket_id, (source_size, target_size) in enumerate(buckets):
                if len(source_ids) < source_size and len(
                        target_ids) < target_size:
                    data_set[bucket_id].append(
                        [source_ids, target_ids, label_ids])
                    break
            source = source_file.readline().strip()
            label = label_file.readline().strip()
    return data_set
def show_event_file(event_file):
    try:
        it = tf.train.summary_iterator(event_file)
    except:
        logging.error("Corrupted file: " % event_file)
        return
    for event in it:
        if event.step == FLAGS.step:
            for v in event.summary.value:
                if v.tensor and v.tensor.string_val:
                    if FLAGS.tag and FLAGS.tag != v.tag:
                        continue
                    if FLAGS.tag:
                        print("\n".join(v.tensor.string_val).replace(
                            ", ", ","))
                        break
                    logging.info(v.tag)
                    logging.info("\n".join(v.tensor.string_val))
Example #9
0
def fp_decode():
    """Decode ALL samples from the given data file and output to file."""
    # TODO(zhengxu): An ugly workaround to ensure the output path is optional.
    output_path = FLAGS.output_path or NamedTemporaryFile(delete=False).name
    with tf.Session() as sess, open(output_path, "w") as fout:
        all_smiles = SMISingleTaskReader(
            dataset_cols=FLAGS.dataset_headers.split(","),
            cls_thres=FLAGS.cls_thres).read(FLAGS.data_path)
        fetcher = FingerprintFetcher(FLAGS.model_dir, FLAGS.vocab_path, sess)
        exact_match_num = 0
        acc_count = 0
        # Note here the idx is the row index in the dataset.
        # So it might not be robust to dataset shuffle.
        for idx, (smile, label) in all_smiles.iterrows():
            seq2seq_fp, output_smile, acc = fetcher.decode(smile, label)
            acc_count += acc["accuracy"]
            if output_smile == smile:
                exact_match_num += 1
            if FLAGS.output_path:
                fout.write(" ".join([str(fp_bit)
                                     for fp_bit in seq2seq_fp]) + "\n")
            if idx % 200 == 0 and idx:
                logging.info("Progress: %d/%d" % (idx, len(all_smiles)))
        final_em_acc = float(exact_match_num) / len(all_smiles)
        final_acc = float(acc_count) / len(all_smiles)
        logging.info("Exact match count: %d/%d, %.4f%%" %
                     (exact_match_num, len(all_smiles), 100. * final_em_acc))
        logging.info("Accuracy: %d/%d, %.4f%%" %
                     (acc_count, len(all_smiles), 100. * final_acc))
    return final_em_acc, final_acc
 def save_model_to_files(  # pylint: disable=too-many-arguments
         self,
         model_file,
         checkpoint_file,
         save_all_vars,
         sess=None,
         verbose=False):
     """Save all the model hyper-parameters to a json file."""
     if verbose:
         logging.info("Save model defintion to %s..." % model_file)
     model_dict = {
         key: getattr(self, key)
         for key in self.MODEL_PARAMETER_FIELDS
     }
     with open(model_file, "w") as fobj:
         json.dump(model_dict, fobj)
     checkpoint_dir = os.path.dirname(checkpoint_file)
     if os.path.exists(checkpoint_dir):
         if verbose:
             logging.info("Save weights to %s..." % checkpoint_file)
         sess = sess or tf.get_default_session()
         if save_all_vars:
             self.saver_sup.save(sess,
                                 checkpoint_file,
                                 global_step=self.global_step)
         else:
             self.saver_unsup.save(sess,
                                   checkpoint_file,
                                   global_step=self.global_step)
     elif verbose:
         logging.info(
             "Skip save weights to %s since the dir does not exist." %
             checkpoint_dir)
Example #11
0
def train(  # pylint: disable=too-many-locals,too-many-statements,too-many-arguments
        train_data, test_data, train_labels, test_labels, restore_all_vars,
        save_all_vars):
    """Train script."""
    model_dir = FLAGS.model_dir
    batch_size = FLAGS.batch_size

    config = tf.ConfigProto(allow_soft_placement=True)
    config.gpu_options.allow_growth = True

    hparams_override = json.loads(FLAGS.hparams) if FLAGS.hparams else dict()
    # Override some hparams results.
    hparams_override["label_states"] = (bool(train_labels)
                                        and bool(test_labels))
    hparams_override["batch_size"] = hparams_override.get(
        "batch_size", batch_size)

    with tf.Session(config=config) as sess:
        with tf.device("/gpu:%d" % FLAGS.gpu):
            # Create model.
            model = seq3seq_model.Seq3SeqModel.load_model_from_dir(
                model_dir,
                False,
                restore_all_vars,
                pretrain_model_path=FLAGS.pretrain_model_path,
                hparams_dict=hparams_override,
                sess=sess)
        if FLAGS.reset_lr > 0.:
            logging.info("Resetting LR to %.10f..." % FLAGS.reset_lr)
            sess.run(model.learning_rate_op.assign(FLAGS.reset_lr))
        if FLAGS.reset_global_step:
            logging.info("Reset global step to 0.")
            sess.run(model.global_step.assign(0))

        buckets = model.buckets
        reg = model.reg
        num_prop = model.num_prop
        alpha = model.alpha  # Get coefficient for combined loss function
        label_states = model.hparams.label_states

        # Read data into buckets and compute their sizes.
        if model.hparams.label_states:
            logging.info("Reading train data from %s..." % train_data)
            train_label_set = read_data_labels(train_data, train_labels, reg,
                                               num_prop, buckets)
            logging.info("Reading test data from %s..." % test_data)
            test_label_set = read_data_labels(test_data, test_labels, reg,
                                              num_prop, buckets)
        else:
            logging.info("Reading train data from %s..." % train_data)
            train_label_set = read_data(train_data, buckets)
            logging.info("Reading test data from %s..." % test_data)
            test_label_set = read_data(test_data, buckets)

        train_bucket_sizes = [
            len(train_label_set[b]) for b in range(len(buckets))
        ]
        train_total_size = float(sum(train_bucket_sizes))
        train_bucket_prob = [
            size / train_total_size for size in train_bucket_sizes
        ]

        # This is the training loop.
        step_time, loss = 0.0, 0.0
        current_step = 0
        previous_losses = []
        if FLAGS.summary_dir:
            train_writer = tf.summary.FileWriter(
                os.path.join(FLAGS.summary_dir, "train"), sess.graph)
            test_writer = tf.summary.FileWriter(
                os.path.join(FLAGS.summary_dir, "test"), sess.graph)
        else:
            logging.warning(
                "You do not specify any summary directory. Reliance on log file"
                " might be unstable and dangerous.")
            train_writer = None
            test_writer = None

        test_summary_ops = model.test_summary_ops

        while model.learning_rate_op.eval() > FLAGS.min_lr_threshold:
            # Choose a bucket according to data distribution. We pick a random number
            # in [0, 1] and use the corresponding interval in train_buckets_scale.
            bucket_id = np.random.choice(len(train_bucket_prob),
                                         p=train_bucket_prob)

            # Get a batch and make a step.
            start_time = time.time()

            encoder_inputs, decoder_inputs, labels, target_weights = model.get_batch(
                train_label_set, bucket_id, label_states)
            _, step_loss, step_loss_sup = model.step(  # pylint: disable=unused-variable
                sess, encoder_inputs, decoder_inputs, target_weights,
                bucket_id, train_writer, False, False, labels)
            step_time += (time.time() -
                          start_time) / FLAGS.steps_per_checkpoint
            loss += ((1.0 - alpha) * step_loss +
                     alpha * step_loss_sup) / FLAGS.steps_per_checkpoint
            current_step += 1
            # Once in a while, we save checkpoint, print statistics, and run evals.
            if current_step % FLAGS.steps_per_checkpoint == 0:
                # Print statistics for the previous epoch.
                perplexity = math.exp(
                    float(loss)) if loss < 300 else float("inf")

                logging.info(
                    "global step %d learning rate %.6f step-time %.6f perplexity"
                    "%.6f" %
                    (model.global_step.eval(), model.learning_rate_op.eval(),
                     step_time, perplexity))
                logging.info("  loss_unsupervised: %s   loss_supervised: %s" %
                             (str(step_loss), str(step_loss_sup)))
                # Decrease learning rate if no improvement was seen over last 3 times.
                if len(previous_losses) > 2 and loss > max(
                        previous_losses[-3:]):
                    sess.run(model.learning_rate_decay_op)
                previous_losses.append(loss)
                # Save checkpoint and zero timer and loss.
                model.save_model_to_dir(model_dir, save_all_vars, sess=sess)
                step_time, loss = 0.0, 0.0
                # Run a full evaluation on the test dataset.
                eval_dataset(test_label_set,
                             model,
                             label_states,
                             num_prop,
                             test_writer=test_writer,
                             sess=sess)
                sys.stdout.flush()
            if FLAGS.max_step and current_step >= FLAGS.max_step:
                break
Example #12
0
def eval_dataset(test_label_set,
                 model,
                 label_states,
                 num_prop,
                 test_writer=None,
                 sess=None):
    """Perform an evaluation on the test dataset."""

    sess = sess or tf.get_default_session()
    if num_prop == 1:
        acms = AccumulatorWithBuckets()
    else:
        acms = []
        for i in range(num_prop):
            acms.append(AccumulatorWithBuckets())

    for bucket_id in range(len(test_label_set)):
        length_test_set = len(test_label_set[bucket_id])
        if length_test_set == 0:
            logging.info("  eval: empty bucket %d" % (bucket_id))
            continue

        batch_size = model.batch_size
        # Iterate all the data inside the bucket.
        for start_idx in range(0, length_test_set, batch_size):
            # TODO(zhengxu): Provide an option to eval a subset of each bucket for speed.
            tmp_data = [None] * len(test_label_set)
            actual_data_len = (min(length_test_set, start_idx + batch_size) -
                               start_idx)
            tmp_data[bucket_id] = test_label_set[bucket_id][
                start_idx:start_idx + actual_data_len]
            encoder_inputs, decoder_inputs, eval_labels, target_weights = (
                model.get_batch(tmp_data, bucket_id, label_states))
            _, eval_loss, eval_acc_sup, output_logits = model.step(
                sess,
                encoder_inputs,
                decoder_inputs,
                target_weights,
                bucket_id,
                test_writer,
                forward_only=True,
                output_encoder_states=False,
                encoder_labels=eval_labels)
            if eval_acc_sup is not None:
                if num_prop == 1:
                    for idx in eval_acc_sup:
                        acms.get(idx,
                                 bucket_id).accumulate(actual_data_len,
                                                       eval_acc_sup[idx])
                else:
                    for i in range(num_prop):
                        for idx in eval_acc_sup[i]:
                            acms[i].get(idx, bucket_id).accumulate(
                                actual_data_len, eval_acc_sup[i][idx])
            if eval_loss is not None:
                if num_prop == 1:
                    acms.get("eval_loss",
                             bucket_id).accumulate(actual_data_len, eval_loss)
                else:
                    acms[0].get("eval_loss",
                                bucket_id).accumulate(actual_data_len,
                                                      eval_loss)
            input_ph, output_ph, em_acc_op, summary_op = model.test_summary_ops[
                bucket_id]
            em_acc, summary = sess.run(
                [em_acc_op, summary_op],
                feed_dict={
                    input_ph: np.array(encoder_inputs),
                    output_ph: np.array(output_logits)
                })
            if em_acc is not None:
                if num_prop == 1:
                    acms.get("em_acc",
                             bucket_id).accumulate(actual_data_len, em_acc)
                else:
                    acms[0].get("em_acc",
                                bucket_id).accumulate(actual_data_len, em_acc)
        if num_prop == 1:
            eval_ppx = math.exp(float(acms.get(
                "eval_loss",
                bucket_id).value)) if eval_loss < 300 else float("inf")
        else:
            eval_ppx = math.exp(
                float(acms[0].get(
                    "eval_loss",
                    bucket_id).value)) if eval_loss < 300 else float("inf")
        logging.info("  eval: bucket %d perplexity %.6f" %
                     (bucket_id, eval_ppx))
        if num_prop == 1:
            logging.info("  eval: " + ",".join([
                "%s %.6e " % (key, val[bucket_id].value)
                for key, val in acms.acumulators.items()
            ]))
        else:
            for i in range(num_prop):
                logging.info("  eval: Property(%d) " % (i + 1) + ",".join([
                    "%s %.6e " % (key, val[bucket_id].value)
                    for key, val in acms[i].acumulators.items()
                ]))

    # Add summary and calculate the overall evaluation metrics.
    if num_prop == 1:
        overall_acms = add_eval_summary(test_writer, model.global_step.eval(),
                                        acms.acumulators)
        logging.info("  eval: overall " + ", ".join(
            ["%s %.4e" % (k, v.value) for k, v in overall_acms.items()]))
    else:
        overall_acms = []
        for i in range(num_prop):
            overall_acms.append(
                add_eval_summary(test_writer, model.global_step.eval(),
                                 acms[i].acumulators))
            logging.info("  eval: overall Property(%d) " % (i + 1) + ", ".join(
                ["%s %.4e" % (k, v.value)
                 for k, v in overall_acms[i].items()]))
    def __init__(  # pylint: disable=too-many-locals, too-many-arguments, too-many-branches, super-init-not-called, too-many-statements
            self,
            hparams,
            forward_only=False,
            num_samples=512,
            dtype=tf.float32):
        """Create the model.
        Args:
            hparams: Hyperparameters used to contruct the nerual network.
            num_samples: number of samples for sampled softmax.
            forward_only: if set, we do not construct the backward pass in the model.
            dtype: the data type to use to store internal variables.
        """
        self.hparams = hparams
        self.source_vocab_size = hparams.source_vocab_size
        self.target_vocab_size = hparams.target_vocab_size
        self.buckets = hparams.buckets
        self.size = hparams.size
        self.num_layers = hparams.num_layers
        self.max_gradient_norm = hparams.max_gradient_norm
        self.batch_size = hparams.batch_size
        self.learning_rate = hparams.learning_rate
        self.learning_rate_decay_factor = hparams.learning_rate_decay_factor
        self.learning_rate_op = tf.Variable(float(self.learning_rate),
                                            trainable=False,
                                            dtype=dtype)
        self.learning_rate_decay_op = self.learning_rate_op.assign(
            self.learning_rate_op * hparams.learning_rate_decay_factor)
        self.dropout_rate = hparams.dropout_rate
        self.label_states = hparams.label_states
        self.alpha = hparams.alpha  # Get coefficient for combined loss function
        self.global_step = tf.Variable(0, trainable=False)
        self.reg = hparams.reg
        self.num_prop = hparams.num_prop

        logging.info("Initializing model with hparams: %s" %
                     str(self.hparams.to_json()))

        size = hparams.size
        buckets = hparams.buckets
        dropout_rate = hparams.dropout_rate
        num_layers = hparams.num_layers

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None
        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.target_vocab_size:
            w_t = tf.get_variable("proj_w",
                                  [self.target_vocab_size, hparams.size],
                                  dtype=dtype)
            w = tf.transpose(w_t)
            b = tf.get_variable("proj_b", [self.target_vocab_size],
                                dtype=dtype)
            output_projection = (w, b)

            def sampled_loss(labels, logits):
                """Sampleed loss function."""
                labels = tf.reshape(labels, [-1, 1])
                # We need to compute the sampled_softmax_loss using 32bit floats to
                # avoid numerical instabilities.
                local_w_t = tf.cast(w_t, tf.float32)
                local_b = tf.cast(b, tf.float32)
                local_inputs = tf.cast(logits, tf.float32)
                return tf.cast(
                    tf.nn.sampled_softmax_loss(local_w_t, local_b, labels,
                                               local_inputs, num_samples,
                                               self.target_vocab_size), dtype)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        def single_cell():
            """internal single cell for RNN"""
            cell_cls_name = "%sCell" % hparams.rnn_cell
            cell_cls = getattr(tf.contrib.rnn, cell_cls_name)
            ret = cell_cls(hparams.size)
            ret = tf.nn.rnn_cell.DropoutWrapper(ret,
                                                input_keep_prob=dropout_rate,
                                                output_keep_prob=dropout_rate)
            return ret

        self._fp_tensors = []

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            """Sequence to sequence function."""
            cell = single_cell()
            if num_layers > 1:
                cell = tf.contrib.rnn.MultiRNNCell(
                    [single_cell() for _ in range(num_layers)])
            outputs, encoder_state, decoder_state = embedding_attention_seq2seq(
                encoder_inputs,
                decoder_inputs,
                cell,
                num_encoder_symbols=hparams.source_vocab_size,
                num_decoder_symbols=hparams.target_vocab_size,
                embedding_size=size,
                output_projection=output_projection,
                feed_previous=do_decode,
                dtype=dtype)
            self._fp_tensors.append(encoder_state)
            return outputs, decoder_state

        def pred_net(bucket_id, encoder_labels):
            """Build prediction network."""

            fp_tensor = self.get_fingerprint_tensor(bucket_id)

            # Prediction network definition.
            pred_net_cls = getattr(pred_models, hparams.pred_net_type)
            if (self.num_prop == 1):
                pred = pred_net_cls(hparams)(fp_tensor, reuse=(bucket_id > 0))
            elif (self.num_prop > 1):
                pred_mprop = pred_net_cls(hparams)(fp_tensor,
                                                   reuse=(bucket_id > 0))
                pred = pred_mprop[0]

            # Prediction loss.
            loss_cls = getattr(losses, hparams.loss_type)
            loss_sup = loss_cls(hparams)(
                input_tensor=pred if self.num_prop == 1 else pred_mprop,
                label_tensor=encoder_labels)

            # Metrics.
            metric_cls = getattr(metrics, hparams.metric_type)
            if (self.num_prop == 1):
                metric_ops = metric_cls(hparams)(input_tensor=pred,
                                                 label_tensor=encoder_labels)
            elif (self.num_prop > 1):
                metric_ops_mprop = []
                pred_mprop = tf.transpose(pred_mprop, [1, 0])
                for i in range(self.num_prop):
                    metric_ops_mprop.append(
                        metric_cls(hparams)(input_tensor=pred_mprop[i],
                                            label_tensor=encoder_labels[i]))
                metric_ops = metric_ops_mprop

            return pred, loss_sup, metric_ops

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []
        self.encoder_labels = []
        for i in range(buckets[-1][0]):  # Last bucket is the biggest one.
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))
        if self.label_states:
            if self.reg and self.num_prop > 1:
                for i in range(self.num_prop):
                    self.encoder_labels.append(
                        tf.placeholder(tf.float32,
                                       shape=[None],
                                       name="label{0}".format(i)))
            else:
                self.encoder_labels.append(
                    tf.placeholder(tf.float32 if self.reg else tf.int32,
                                   shape=[None],
                                   name="label{0}".format(0)))
        for i in range(buckets[-1][1] + 1):
            self.decoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="decoder{0}".format(i)))
            self.target_weights.append(
                tf.placeholder(dtype, shape=[None],
                               name="weight{0}".format(i)))
        # Our targets are decoder inputs shifted by one.
        targets = [
            self.decoder_inputs[i + 1]
            for i in range(len(self.decoder_inputs) - 1)
        ]

        # Training outputs and losses.
        if forward_only:
            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, True),
                softmax_loss_function=softmax_loss_function)
            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                for b in range(len(buckets)):
                    self.outputs[b] = [
                        tf.matmul(output, output_projection[0]) +
                        output_projection[1] for output in self.outputs[b]
                    ]

        else:
            self.outputs, self.losses = tf.contrib.legacy_seq2seq.model_with_buckets(
                self.encoder_inputs,
                self.decoder_inputs,
                targets,
                self.target_weights,
                buckets,
                lambda x, y: seq2seq_f(x, y, False),
                softmax_loss_function=softmax_loss_function)

        if self.label_states:
            self.loss_supervised = [None] * len(buckets)
            self.pred = [None] * len(buckets)
            self.sup_metrics = [None] * len(buckets)
            for bucket_id in range(len(buckets)):
                self.pred[bucket_id], self.loss_supervised[bucket_id],\
                self.sup_metrics[bucket_id] = (
                    pred_net(bucket_id, self.encoder_labels))

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        self.summary_ops = []
        self.test_summary_ops = []
        # TODO(zhengxu): This is a workaround to avoid test summary initialization
        # from train script.
        # Append test summaries.
        self.test_summary_ops = [
            self.get_em_acc_op(bucket_id) for bucket_id in range(len(buckets))
        ]
        if not forward_only:
            self.gradient_norms = []
            self.updates = []
            lr_summary_op = tf.summary.scalar("learning_rate",
                                              self.learning_rate_op)
            opt = tf.train.GradientDescentOptimizer(self.learning_rate_op)
            for b in range(len(buckets)):
                loss = self.losses[b] if hparams.use_recovery else 0.
                if self.label_states:
                    loss = (1.0 - self.alpha
                            ) * loss + self.alpha * self.loss_supervised[b]
                gradients = tf.gradients(loss, params)
                clipped_gradients, norm = tf.clip_by_global_norm(
                    gradients, hparams.max_gradient_norm)
                self.gradient_norms.append(norm)
                self.updates.append(
                    opt.apply_gradients(zip(clipped_gradients, params),
                                        global_step=self.global_step))
                bucket_summary_ops = [
                    # Global norm in each buckets.
                    tf.summary.scalar("global_norm_%d" % b, norm),
                    # Unsupervised (Recovery) Loss in each buckets.
                    tf.summary.scalar("loss_unsup_%d" % b, self.losses[b]),
                    # Learning rate summary op.
                    lr_summary_op
                ]
                if self.label_states:
                    bucket_summary_ops.append([
                        # Supervised (Classification) Loss.
                        tf.summary.scalar("loss_sup_%d" %
                                          b, self.loss_supervised[b]),
                        # Total loss (Multi-task loss).
                        tf.summary.scalar("total_loss_%d" % b, loss)
                    ] + [
                        # Supervised task evaluation metric.
                        tf.summary.scalar("%s_%d" % (k, b), v)
                        for k, v in self.sup_metrics[b].items()
                    ] if self.num_prop == 1 else [
                        tf.summary.scalar("%s_%d_%d" % (k, b, i), v)
                        for i in range(self.num_prop)
                        for k, v in self.sup_metrics[b][i].items()
                    ])
                self.summary_ops.append(tf.summary.merge(bucket_summary_ops))

        variables_to_restore = [
            v for v in tf.global_variables() if v.name.split('/')[0] != 'pred'
        ]
        self.saver_sup = tf.train.Saver(tf.global_variables(),
                                        save_relative_paths=True)
        self.saver_unsup = tf.train.Saver(variables_to_restore,
                                          save_relative_paths=True)