Example #1
0
def train(args):
    iters, vocab = get_iterator(args)

    model = get_model(args, vocab)
    loss_fn = get_loss(args, vocab)
    optimizer = get_optimizer(args, model)

    trainer = get_trainer(args, model, loss_fn, optimizer)
    metrics = get_metrics(args, vocab)
    evaluator = get_evaluator(args, model, loss_fn, metrics)

    logger = get_logger(args)
    @trainer.on(Events.STARTED)
    def on_training_started(engine):
        print("Begin Training")

    @trainer.on(Events.ITERATION_COMPLETED)
    def log_iter_results(engine):
        log_results(logger, 'train/iter', engine.state, engine.state.iteration)

    @trainer.on(Events.EPOCH_COMPLETED)
    def evaluate_epoch(engine):
        log_results(logger, 'train/epoch', engine.state, engine.state.epoch)
        state = evaluate_once(evaluator, iterator=iters['val'])
        log_results(logger, 'valid/epoch', state, engine.state.epoch)

    trainer.run(iters['train'], max_epochs=args.max_epochs)
Example #2
0
    def check_dataloader(self, **kwargs):
        args = self._default_args(**kwargs)

        iters, vocab = get_iterator(args)
        for batch in iters['train']:
            import ipdb
            ipdb.set_trace()  # XXX DEBUG
Example #3
0
def get_model_ckpt(args):
    ckpt_available = args.ckpt_name is not None
    vocab = None
    if ckpt_available:
        name = '{}'.format(args.ckpt_name)
        name = '{}*'.format(name) if not name.endswith('*') else name
        ckpt_paths = sorted(args.ckpt_path.glob(name), reverse=False)
        assert len(ckpt_paths) > 0, "no ckpt candidate for {}".format(
            args.ckpt_path / args.ckpt_name)
        ckpt_path = ckpt_paths[0]  # monkey patch for choosing the best ckpt
        print("loading from {}".format(ckpt_path))
        dt = torch.load(ckpt_path)
        args.update(dt['args'])
        vocab = dt['vocab']

    iters, vocab = get_iterator(args, vocab)
    model = get_model(args, vocab)

    if ckpt_available:
        model.load_state_dict(dt['model'])
    return args, model, iters, vocab, ckpt_available
Example #4
0
    def build(self, mode):
        # get iterator that can iterate the dataset and batch data in supposed format
        if mode != tf.estimator.ModeKeys.TRAIN and mode != tf.estimator.Modekeys.INFER:
            raise ValueError("mode must be a key in tf.estimator.ModeKeyS")
        index_table = tf.contrib.lookup.index_table_from_file(
            param.vocab_file, num_oov_buckets=0,
            default_value=1)  # create index_table to map a string to a integer
        data_file = param.data_file
        self.it_train = get_iterator(data_file, index_table)

        x, y_in, y_out, x_seq_length, y_seq_length = self.it_train.get_next(
        )  # got encoder/decoder input ids and their responese sequence length from iterator
        #x = tf.Print(x, [x, y_in, y_out, x_seq_length, y_seq_length])

        # group the initialize op

        y_l = tf.shape(y_in)[1]  # minibatch length for decoder input ids

        # build encoder, decoder input layer by get tokens' embedding and add the position encoding on it
        encoding = position_encoding_init(param.d_model, param.max_length)
        print encoding
        encoder_input = input_layer(encoding, x, param.vocab_size,
                                    x_seq_length, param.d_model,
                                    param.keep_prob, "input")
        decoder_input = input_layer(encoding,
                                    y_in,
                                    param.vocab_size,
                                    y_seq_length,
                                    param.d_model,
                                    param.keep_prob,
                                    "input",
                                    reuse=True)  # reuse embedding

        # build encoder blocks, self-attention use encoder_input as both queries and keys
        for i in range(param.num_encoder_blocks):
            encoder_input = encoder_block(encoder_input, encoder_input,
                                          param.d_qkv, param.d_ff,
                                          param.num_heads, param.keep_prob,
                                          x_seq_length, x_seq_length,
                                          "encoder_block_%d" % i)
        encoder_output = encoder_input
        # build decoder blocks, self-attention use decoder_input as queries and keys, vanillia attention use encoder's output as keys,
        for i in range(param.num_decoder_blocks):
            decoder_input = decoder_block(decoder_input, encoder_output,
                                          decoder_input, param.d_qkv,
                                          param.d_ff, param.num_heads,
                                          param.keep_prob, x_seq_length,
                                          y_seq_length, "decoder_block_%d" % i)
        with tf.variable_scope("last_projection"):
            decoder_output = tf.layers.dense(decoder_input, param.vocab_size)
        with tf.variable_scope("loss"):
            mask = tf.sequence_mask(y_seq_length, y_l)
            mask = tf.Print(mask, [mask], summarize=1000)
            if mode == tf.estimator.ModeKeys.TRAIN:
                labels = tf.one_hot(y_out, param.vocab_size)
                labels_smoothed = label_smoothing(labels)
                loss = tf.nn.softmax_cross_entropy_with_logits(
                    labels=labels_smoothed, logits=decoder_output)
                #loss = tf.Print(loss, [loss, labels_smoothed], summarize=1000)
                loss = loss * tf.to_float(mask)  # batch, y_l
                self.loss = tf.reduce_sum(loss) / (tf.to_float(
                    tf.reduce_sum(y_seq_length)))  # per token loss
                tf.summary.scalar('loss', self.loss)
                self.global_step = tf.Variable(0,
                                               name='global_step',
                                               trainable=False)
                # optimizer
                self.optimizer = tf.train.AdamOptimizer(
                    learning_rate=param.learning_rate * 10,
                    beta1=0.9,
                    beta2=0.98,
                    epsilon=1e-8)

                tvars = tf.trainable_variables()
                grads, _ = tf.clip_by_global_norm(tf.gradients(loss, tvars),
                                                  10)
                self.train_op = self.optimizer.apply_gradients(
                    zip(grads, tvars))

            elif mode == tf.estimator.ModeKeys.INFER:
                mask = tf.expand_dims(mask, 2)  # batch, y_l, 1
                pred = tf.arg_max(
                    decoder_output * tf.to_float(mask), -1
                )[:
                  -1]  # batch, the last one along length dimension is the predict token for next position

        self.merged = tf.summary.merge_all()
        self.init_op = tf.group(self.it_train.initializer,
                                tf.global_variables_initializer(),
                                tf.tables_initializer())

        def decode():
            # sequential decoding is only used in infererence, during trainning, the mode decode in a parrallel mode.
            # batched decode should use some fuction like maybe_finished  which paded the finished sample with padded value, and continue to decode the rest of samples until all samples are finished or the max decode step is reached.
            pass

if __name__ == '__main__':
    # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # Params Preparation
    print_args(FLAGS)
    entity_table, entity, entity_size = load_vocab(FLAGS.entity_vocab)
    relation_table, _, relation_size = load_vocab(FLAGS.relation_vocab)
    FLAGS.entity_size = entity_size
    FLAGS.relation_size = relation_size

    # Model Preparation
    mode = tf.estimator.ModeKeys.TRAIN
    iterator = get_iterator(FLAGS.data_file,
                            entity,
                            entity_table,
                            relation_table,
                            FLAGS.batch_size,
                            shuffle_buffer_size=FLAGS.shuffle_buffer_size)
    if FLAGS.model_name.lower() == "transe":
        model = TransE(iterator, FLAGS)
    elif FLAGS.model_name.lower() == "distmult":
        model = DISTMULT(iterator, FLAGS)
    elif FLAGS.model_name.lower() == "transh":
        model = TransH(iterator, FLAGS)
    elif FLAGS.model_name.lower() == "transr":
        model = TransR(iterator, FLAGS)
    elif FLAGS.model_name.lower() == "transd":
        model = TransD(iterator, FLAGS)
    elif FLAGS.model_name.lower() == "stranse":
        model = STransE(iterator, FLAGS)
                save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
                model.save(sess, save_path)
                print("Epoch {}, saved checkpoint to {}".format(epoch+1, save_path))


if __name__ == '__main__':
    # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # Params Preparation
    print_args(FLAGS)
    vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file)
    FLAGS.vocab_size = vocab_size

    # Model Preparation
    padding = True if FLAGS.model_type == 1 else False
    mode = tf.estimator.ModeKeys.TRAIN
    iterator = get_iterator(
        FLAGS.train_file, vocab_table, FLAGS.batch_size,
        q_max_len=FLAGS.question_max_len,
        a_max_len=FLAGS.answer_max_len,
        num_buckets=FLAGS.num_buckets,
        shuffle_buffer_size=FLAGS.shuffle_buffer_size,
        padding=padding,
    )
    if FLAGS.model_type == 1:
        model = AP_CNN(iterator, FLAGS, mode)
    else:
        model = AP_biLSTM(iterator, FLAGS, mode)

    train()

Example #7
0
                save_path = os.path.join(FLAGS.model_dir, "model.ckpt")
                model.save(sess, save_path)
                print("Epoch {}, saved checkpoint to {}".format(
                    epoch + 1, save_path))


if __name__ == '__main__':
    # os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
    # Params Preparation
    print_args(FLAGS)
    vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file)
    FLAGS.vocab_size = vocab_size

    # Model Preparation
    mode = tf.estimator.ModeKeys.TRAIN
    iterator = get_iterator(
        FLAGS.train_file,
        vocab_table,
        FLAGS.batch_size,
        s0_max_len=FLAGS.s0_max_len,
        s1_max_len=FLAGS.s1_max_len,
        num_buckets=FLAGS.num_buckets,
        shuffle_buffer_size=FLAGS.shuffle_buffer_size,
        padding=True,
    )
    if FLAGS.model_name.lower() == "bcnn":
        model = BCNN(iterator, FLAGS, mode)
    else:
        model = ABCNN(iterator, FLAGS, mode, FLAGS.model_type)

    train()
Example #8
0
            # save model
            if FLAGS.savemodel and (epoch +
                                    1) % FLAGS.save_model_per_epochs == 0:
                model_name = "model_{}_{}".format(
                    epoch + 1,
                    time.strftime('%Y%m%d%H%M%S', time.localtime(time.time())))
                ckpt_path = os.path.join(FLAGS.checkpointDir, model_name)
                model.savedmodel(sess, signature, ckpt_path)
                print("Export SavedModel with acc={} to {}".format(
                    acc, ckpt_path))


if __name__ == '__main__':
    # tf.set_min_vlog_level(1)
    # Params Preparation
    print_args(FLAGS)
    vocab_table, _, vocab_size = load_vocab(FLAGS.vocab_file)
    FLAGS.vocab_size = vocab_size

    # Model Preparation
    data_file_placeholder = tf.placeholder(tf.string, [])
    mode = tf.placeholder(tf.string, [])

    iterator = get_iterator(data_file_placeholder,
                            vocab_table,
                            FLAGS.batch_size,
                            question_max_len=FLAGS.question_max_len,
                            answer_max_len=FLAGS.answer_max_len,
                            shuffle_buffer_size=FLAGS.num_samples)
    model = parse_model(iterator, FLAGS, mode)
    train()