Exemple #1
0
def train_model(model, train_data, valid_data, fields, optim):

    # If repeat=True, program will forever run in: 'for b, batch in enumerate(train_iter):' until you manually break
    train_iter = table.IO.OrderedIterator(dataset=train_data,
                                          batch_size=opt.batch_size,
                                          device=opt.gpuid[0],
                                          repeat=False)

    # 'train=False' will also let repeat=False. So here do not need to define repeat=False.
    # sort_within_batch = False: The element in batch is in ascending order according to their length. Else in descending
    # sort=True, order the examples of dataset. And train=False means the batch will not be Random Shuffler.
    # Since the order in train is (and it should be) random which is different from here, it dosen't need to define: sort=True, sort_within_batch=False
    valid_iter = table.IO.OrderedIterator(dataset=valid_data,
                                          batch_size=opt.batch_size,
                                          device=opt.gpuid[0],
                                          train=False,
                                          sort=True,
                                          sort_within_batch=False)

    # default agg_sample_rate is 0.5; default smooth_eps is 0
    # Loss is sum(negative log likelihood)
    train_loss = table.Loss.TableLossCompute(
        opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda()
    valid_loss = table.Loss.TableLossCompute(
        opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda()

    trainer = table.Trainer(model, train_iter, valid_iter, train_loss,
                            valid_loss, optim)

    for epoch in range(opt.start_epoch, opt.epochs + 1):
        print('')

        if opt.fix_word_vecs:
            if (epoch >= opt.update_word_vecs_after):
                model.q_encoder.embeddings.set_update(True)
            else:
                model.q_encoder.embeddings.set_update(False)

        # 1. Train for one epoch on the training set.
        train_stats = trainer.train(epoch, report_func)
        print('Train accuracy: %s' % train_stats.accuracy(True))

        # 2. Validate on the validation set.
        valid_stats = trainer.validate()
        print('Validation accuracy: %s' % valid_stats.accuracy(True))

        # 3. Log to remote server.
        # train_stats.log("train", logger, optim.lr, epoch)
        # valid_stats.log("valid", logger, optim.lr, epoch)

        # 4. Update the learning rate
        trainer.epoch_step(None, epoch)

        # 5. Drop a checkpoint if needed.
        if epoch >= opt.start_checkpoint_at:
            trainer.drop_checkpoint(opt, epoch, fields, valid_stats)
def train_model(model, train_data, valid_data, fields, optim):
    train_iter = table.IO.OrderedIterator(
        dataset=train_data, batch_size=opt.batch_size, device=device, repeat=True)
    valid_iter = table.IO.OrderedIterator(
        dataset=valid_data, batch_size=opt.batch_size, device=device, repeat=True, train=False, sort=True, sort_within_batch=False)

    train_loss = table.Loss.TableLossCompute(opt.agg_sample_rate, opt, smooth_eps=model.opt.smooth_eps)
    if torch.cuda.is_available():
        train_loss=train_loss.cuda()
    valid_loss = table.Loss.TableLossCompute(opt.agg_sample_rate, opt, smooth_eps=model.opt.smooth_eps)
    if torch.cuda.is_available():
        # TODO: should this be valid_loss.cuda() ?
        valid_loss=valid_loss.cuda()

    trainer = table.Trainer(model, train_iter, valid_iter,
                            train_loss, valid_loss, optim)

    for epoch in range(opt.start_epoch, opt.epochs + 1):
    #for epoch in range(1):
        print('Started Epoch: ' + str(epoch))
        print("num examples: " + str(len(train_iter)))

        if opt.fix_word_vecs:
            if (epoch >= opt.update_word_vecs_after):
                model.q_encoder.embeddings.set_update(True)
            else:
                model.q_encoder.embeddings.set_update(False)

        train_stats = 0
        valid_stats = 0
        #try:
        # 1. Train for one epoch on the training set.
        train_stats = trainer.train(epoch, report_func)
        print('Train accuracy: %s' % train_stats.accuracy(True))

        # 2. Validate on the validation set.
        valid_stats = trainer.validate()
        print('Validation accuracy: %s' % valid_stats.accuracy(True))
        #except:
        #    pass


        # 3. Log to remote server.
        # train_stats.log("train", logger, optim.lr, epoch)
        # valid_stats.log("valid", logger, optim.lr, epoch)

        # 4. Update the learning rate
        trainer.epoch_step(None, epoch)

        # 5. Drop a checkpoint if needed.
        if epoch >= opt.start_checkpoint_at:
            trainer.drop_checkpoint(opt, epoch, fields, valid_stats)
        print('Completed Epoch: ' + str(epoch))
Exemple #3
0
def train_model(model, train_data, valid_data, fields, optim):
    train_iter = table.IO.OrderedIterator(dataset=train_data,
                                          batch_size=opt.batch_size,
                                          device=opt.gpuid[0],
                                          repeat=False)
    valid_iter = table.IO.OrderedIterator(dataset=valid_data,
                                          batch_size=opt.batch_size,
                                          device=opt.gpuid[0],
                                          train=False,
                                          sort=True,
                                          sort_within_batch=False)

    train_loss = table.Loss.TableLossCompute(
        opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda()
    valid_loss = table.Loss.TableLossCompute(
        opt.agg_sample_rate, smooth_eps=model.opt.smooth_eps).cuda()

    trainer = table.Trainer(model, train_iter, valid_iter, train_loss,
                            valid_loss, optim)

    for epoch in range(opt.start_epoch, opt.epochs + 1):
        print('')

        if opt.fix_word_vecs:
            if (epoch >= opt.update_word_vecs_after):
                model.q_encoder.embeddings.set_update(True)
            else:
                model.q_encoder.embeddings.set_update(False)

        # 1. Train for one epoch on the training set.
        train_stats = trainer.train(epoch, report_func)
        print('Train accuracy: %s' % train_stats.accuracy(True))

        # 2. Validate on the validation set.
        valid_stats = trainer.validate()
        print('Validation accuracy: %s' % valid_stats.accuracy(True))

        # 3. Log to remote server.
        # train_stats.log("train", logger, optim.lr, epoch)
        # valid_stats.log("valid", logger, optim.lr, epoch)

        # 4. Update the learning rate
        trainer.epoch_step(None, epoch)

        # 5. Drop a checkpoint if needed.
        if epoch >= opt.start_checkpoint_at:
            trainer.drop_checkpoint(opt, epoch, fields, valid_stats)
Exemple #4
0
def train(model, train_data, valid_data, fields, optim):
    experiment.set_model_graph(str(model))

    train_iter = table.IO.OrderedIterator(
        dataset=train_data, batch_size=args.batch_size, device=args.gpu_id[0], repeat=False
    )

    valid_iter = table.IO.OrderedIterator(
        dataset=valid_data, batch_size=args.batch_size, device=args.gpu_id[0], train=False, sort=True, sort_within_batch=False
    )

    train_loss = table.Loss.LossCompute(smooth_eps=model.args.smooth_eps).cuda()
    valid_loss = table.Loss.LossCompute(smooth_eps=model.args.smooth_eps).cuda()

    trainer = table.Trainer(model, train_iter, valid_iter, train_loss, valid_loss, optim, summary_writer, experiment)

    logger.debug("Training from epoch %d, total: %d" % (args.start_epoch, args.epochs))

    for epoch in range(args.start_epoch, args.epochs + 1):
        if args.fix_word_vecs:
            model.q_encoder.embeddings.set_update(epoch >= args.update_word_vecs_after)

        train_stats = trainer.train(epoch, fields, report_func)
        logger.info('Train accuracy: %s' % train_stats.accuracy(return_str=True))

        for k, v in train_stats.accuracy(return_str=False).items():
            summary_writer.add_scalar("train/accuracy/%s" % k, v / 100.0, trainer.global_timestep)
            experiment.log_metric("train/accuracy/%s" % k, v / 100.0, step=trainer.global_timestep)

        valid_stats = trainer.validate(epoch, fields)
        logger.info('Validation accuracy: %s' % valid_stats.accuracy(return_str=True))

        for k, v in valid_stats.accuracy(return_str=False).items():
            summary_writer.add_scalar("valid/accuracy/%s" % k, v / 100.0, trainer.global_timestep)
            experiment.log_metric("valid/accuracy/%s" % k, v / 100.0, step=trainer.global_timestep)

        # Update the learning rate
        trainer.epoch_step(eval_metric=None, epoch=epoch)

        experiment.log_epoch_end(epoch_cnt=epoch)

        if epoch >= args.start_checkpoint_at:
            trainer.drop_checkpoint(args, epoch, fields, valid_stats)

    logger.info('Training done')
    summary_writer.close()