def train_s2s_ch9(model, data_iter, lr, num_epochs, ctx):
    model.initialize(init.Xavier(), force_reinit=True, ctx=ctx)
    trainer = gluon.Trainer(model.collect_params(),'adam', {'learning_rate': lr})
    loss = MaskedSoftmaxCELoss()
    animator = d2l.Animator(xlabel='epoch', ylabel='loss',xlim=[1, num_epochs], ylim=[0, 0.25])
    animator.fig.subplots_adjust(hspace=0.3) #FIXME - TEST
    for epoch in range(1, num_epochs + 1):
        timer = d2l.Timer()
        metric = d2l.Accumulator(2)  # loss_sum, num_tokens
        for batch in data_iter:
            X, X_vlen, Y, Y_vlen = [x.as_in_context(ctx) for x in batch]
            Y_input, Y_label, Y_vlen = Y[:, :-1], Y[:, 1:], Y_vlen-1
            with autograd.record():
                Y_hat, _ = model(X, Y_input, X_vlen, Y_vlen)
                l = loss(Y_hat, Y_label, Y_vlen)
            l.backward()
            d2l.grad_clipping(model, 1)
            num_tokens = Y_vlen.sum()
            trainer.step(num_tokens)
            metric.add(l.sum(), num_tokens)
        if epoch % 10 == 0:
            animator.add(epoch, (metric[0]/metric[1],))

    print('loss %.3f, %d tokens/sec on %s ' % (
        metric[0]/metric[1], metric[1]/timer.stop(), ctx))
예제 #2
0
def train_ch13(net,
               train_iter,
               test_iter,
               loss,
               trainer,
               num_epochs,
               ctx_list=d2l.try_all_gpus(),
               split_f=d2l.split_batch):
    num_batches, timer = len(train_iter), d2l.Timer()
    animator = d2l.Animator(xlabel='epoch',
                            xlim=[0, num_epochs],
                            ylim=[0, 1],
                            legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        # Store training_loss, training_accuracy, num_examples, num_features
        metric = d2l.Accumulator(4)
        for i, (features, labels) in enumerate(train_iter):
            timer.start()
            l, acc = train_batch_ch13(net, features, labels, loss, trainer,
                                      ctx_list, split_f)
            metric.add(l, acc, labels.shape[0], labels.size)
            timer.stop()
            if (i + 1) % (num_batches // 5) == 0:
                animator.add(
                    epoch + i / num_batches,
                    (metric[0] / metric[2], metric[1] / metric[3], None))
        test_acc = d2l.evaluate_accuracy_gpus(net, test_iter, split_f)
        animator.add(epoch + 1, (None, None, test_acc))
    print('loss %.3f, train acc %.3f, test acc %.3f' %
          (metric[0] / metric[2], metric[1] / metric[3], test_acc))
    print('%.1f examples/sec on %s' %
          (metric[2] * num_epochs / timer.sum(), ctx_list))
예제 #3
0
def train_recsys_rating(net, train_iter, test_iter, loss, trainer, num_epochs,
                        ctx_list=d2l.try_all_gpus(), evaluator=None,
                        **kwargs):
    timer = d2l.Timer()
    animator = d2l.Animator(xlabel='epoch', xlim=[1, num_epochs], ylim=[0, 2],
                            legend=['train loss', 'test RMSE'])
    for epoch in range(num_epochs):
        metric, l = d2l.Accumulator(3), 0.
        for i, values in enumerate(train_iter):
            timer.start()
            input_data = []
            values = values if isinstance(values, list) else [values]
            for v in values:
                input_data.append(gluon.utils.split_and_load(v, ctx_list))
            train_feat = input_data[0:-1] if len(values) > 1 else input_data
            train_label = input_data[-1]
            with autograd.record():
                preds = [net(*t) for t in zip(*train_feat)]
                ls = [loss(p, s) for p, s in zip(preds, train_label)]
            [l.backward() for l in ls]
            l += sum([l.asnumpy() for l in ls]).mean() / len(ctx_list)
            trainer.step(values[0].shape[0])
            metric.add(l, values[0].shape[0], values[0].size)
            timer.stop()
        if len(kwargs) > 0:  # it will be used in section AutoRec.
            test_rmse = evaluator(net, test_iter, kwargs['inter_mat'],
                                  ctx_list)
        else:
            test_rmse = evaluator(net, test_iter, ctx_list)
        train_l = l / (i + 1)
        animator.add(epoch + 1, (train_l, test_rmse))
    print('train loss %.3f, test RMSE %.3f'
          % (metric[0] / metric[1], test_rmse))
    print('%.1f examples/sec on %s'
          % (metric[2] * num_epochs / timer.sum(), ctx_list))
예제 #4
0
def train_ranking(net,
                  train_iter,
                  test_iter,
                  loss,
                  trainer,
                  test_seq_iter,
                  num_users,
                  num_items,
                  num_epochs,
                  ctx_list,
                  evaluator,
                  candidates,
                  eval_step=1):
    timer, hit_rate, auc = d2l.Timer(), 0, 0
    animator = d2l.Animator(xlabel='epoch',
                            xlim=[1, num_epochs],
                            ylim=[0, 1],
                            legend=['test hit rate', 'test AUC'])
    for epoch in range(num_epochs):
        metric, l = d2l.Accumulator(3), 0.
        for i, values in enumerate(train_iter):
            input_data = []
            for v in values:
                input_data.append(gluon.utils.split_and_load(v, ctx_list))
            with autograd.record():
                p_pos = [net(*t) for t in zip(*input_data[0:-1])]
                p_neg = [
                    net(*t) for t in zip(*input_data[0:-2], input_data[-1])
                ]
                ls = [loss(p, n) for p, n in zip(p_pos, p_neg)]
            [l.backward(retain_graph=False) for l in ls]
            l += sum([l.asnumpy() for l in ls]).mean() / len(ctx_list)
            trainer.step(values[0].shape[0])
            metric.add(l, values[0].shape[0], values[0].size)
            timer.stop()
        with autograd.predict_mode():
            if (epoch + 1) % eval_step == 0:
                hit_rate, auc = evaluator(net, test_iter, test_seq_iter,
                                          candidates, num_users, num_items,
                                          ctx_list)
                animator.add(epoch + 1, (hit_rate, auc))
    print('train loss %.3f, test hit rate %.3f, test AUC %.3f' %
          (metric[0] / metric[1], hit_rate, auc))
    print('%.1f examples/sec on %s' %
          (metric[2] * num_epochs / timer.sum(), ctx_list))
예제 #5
0
    'learning_rate': lr,
    'wd': wd
})

num_batches, timer = len(DataLoader_Single_train), d2l.Timer()
animator = d2l.Animator(
    xlabel='epoch',
    xlim=[0, num_epochs],
    ylim=[0, 1],
    legend=['train loss', 'train acc', 'val loss', 'val acc'])

ctx = d2l.try_all_gpus()
for epoch in range(num_epochs):
    # Training loop
    # Store training_loss, training_accuracy, num_examples, num_features
    metric = d2l.Accumulator(4)
    for i, (Xs_in, ys_in) in enumerate(DataLoader_Single_train):
        print("Training iteration: " + str(i))
        timer.start()
        Xs = gluon.utils.split_and_load(Xs_in.astype("float32"), ctx)
        ys = gluon.utils.split_and_load(ys_in.astype("float32"), ctx)
        with autograd.record():
            pys = [net(X.transpose(axes=(0, 3, 1, 2))) for X in Xs]
            ls = [loss(py, y) for py, y in zip(pys, ys)]
        for l in ls:
            l.backward()
        trainer.step(ys_in.shape[0])
        train_loss_sum = sum([float(l.sum().asnumpy()[0]) for l in ls])
        train_acc_sum = sum(
            d2l.accuracy(py.asnumpy(), y.asnumpy()) for py, y in zip(pys, ys))
        l, acc = train_loss_sum, train_acc_sum
예제 #6
0
def train_model(args, reporter=None):
    """Training process."""
    logger.enable_log_file('valid.log')

    ctx = d2l.try_all_gpus()

    log.info('Loading data from %s...', args.data_file)
    (train_iter, pos_weight, validate_feats, validate_valids, test_feats,
     test_valids) = load_data(args.data_file, args.batch_size)
    validate_feats = np.array(validate_feats, ctx=ctx[0])
    validate_valids = np.array(validate_valids, ctx=ctx[0])
    test_feats = np.array(test_feats, ctx=ctx[0])
    test_valids = np.array(test_valids, ctx=ctx[0])

    # Initialize loss function.
    log.info('Positive weight for CELoss: %.2f', pos_weight)
    valid_loss = gluon.loss.SoftmaxCrossEntropyLoss()

    fnet = [
        'Valid Hiddens: {}'.format(args.cls_hiddens),
        'Dropout: {}'.format(args.dropout),
    ]
    log.info('Network\n\t%s', '\n\t'.join(fnet))
    fparams = [
        'Batch Size: {}'.format(args.batch_size),
        'Epochs: {}'.format(args.epochs), 'Learning Rate: {}'.format(args.lr),
        'Weight Decay: {}'.format(args.wd)
    ]
    log.info('Hyper-Parameters:\n\t%s', '\n\t'.join(fparams))

    net = ValidNet(args.cls_hiddens, args.dropout)
    net.hybridize()
    net.initialize(init.Xavier(), ctx=ctx)
    log.info('Model initialized on %s', str(ctx))

    # Initialize trainer.
    trainer = gluon.Trainer(net.collect_params(), 'adam', {
        'learning_rate': args.lr,
        'wd': args.wd
    })

    log.info('Training...')
    metric = d2l.Accumulator(2)
    for epoch in range(args.epochs):
        # if epoch % 10 == 0:
        #     args.lr *= 0.1
        #     trainer.set_learning_rate(args.lr)
        #     log.info('Reset learning rate to %e', args.lr)
        if reporter is None:
            log.info('Epoch %d', epoch)
            progress = tqdm.tqdm(
                train_iter,
                bar_format='{desc} {percentage:3.0f}%|{bar:50}{r_bar}')
        else:
            progress = train_iter
        for iter_idx, (batch_feat, batch_valids) in enumerate(progress):
            np_feat = split_and_load(batch_feat, ctx, even_split=False)[0]
            np_valid = split_and_load(batch_valids, ctx, even_split=False)[0]
            with autograd.record():
                ls = get_batch_loss(net, valid_loss, np_feat, np_valid)
            ls.backward()
            l_mean = float(ls)
            trainer.step(1)
            metric.add(l_mean, 1)
            if reporter is None and iter_idx % 30 == 0:
                progress.set_description_str(desc='Loss {:.3f}'.format(l_mean),
                                             refresh=True)
        #val_acc = test_acc(net, validate_feats, validate_valids, print_log=False)
        #if reporter is None:
        #    log.info('Epoch %d: Loss %.3f, Valid Error %.2f%%', epoch,
        #             metric[0] / metric[1], val_acc)
        #else:
        #    reporter(epoch=epoch, accuracy=val_acc)

    #if reporter is None:
    #    log.info('Final loss %.3f', metric[0] / metric[1])

    log.info('Testing...')
    test_acc(net, test_feats, test_valids)
    return net
예제 #7
0
def train_bert(args, reporter=None):
    """Training process."""
    logger.enable_log_file('train-{}'.format(
        os.path.basename(args.data_file).replace('.csv', '.log')))

    ctx = d2l.try_all_gpus()

    log.info('Loading data from %s...', args.data_file)
    (train_iter, pos_weight, validate_feats, validate_thrpts, test_feats,
     test_thrpts, train_thrpt_avg,
     train_thrpt_std) = load_data(args.data_file, args.batch_size,
                                  args.center_hiddens)
    validate_feats = np.array(validate_feats, ctx=ctx[0])
    validate_thrpts = np.array(validate_thrpts, ctx=ctx[0])
    test_feats = np.array(test_feats, ctx=ctx[0])
    test_thrpts = np.array(test_thrpts, ctx=ctx[0])
    #log.info('Positive weight for CELoss: %.2f', pos_weight)

    fnet = [
        'Center Heads: {}'.format(args.center_heads),
        'Center FFN Hiddens: {}'.format(args.center_ffn_hiddens),
        'Center Layers: {}'.format(args.center_layers),
        'Reg Encoder FFN Hiddens: {}'.format(args.reg_encode_ffn_hiddens),
        'Reg Encoder Layers: {}'.format(args.reg_encode_layers),
        'Regression Hiddens: {}'.format(args.reg_hiddens),
        'Valid Hiddens: {}'.format(args.cls_hiddens),
    ]
    log.info('Network\n\t%s', '\n\t'.join(fnet))
    fparams = [
        'Batch Size: {}'.format(args.batch_size),
        'Dropout: {}'.format(args.dropout), 'Epochs: {}'.format(args.epochs),
        'Learning Rate: {}'.format(args.lr), 'Weight Decay: {}'.format(args.wd)
    ]
    log.info('Hyper-Parameters:\n\t%s', '\n\t'.join(fparams))

    net = BERTModel(args.center_hiddens, args.center_ffn_hiddens,
                    args.center_heads, args.center_layers,
                    args.reg_encode_ffn_hiddens, args.reg_encode_layers,
                    args.reg_hiddens, args.cls_hiddens, args.dropout)
    net.initialize(init.Xavier(), ctx=ctx)
    log.info('Model initialized on %s', str(ctx))

    # Initialize trainer.
    trainer = gluon.Trainer(net.collect_params(), 'adam', {
        'learning_rate': args.lr,
        'wd': args.wd
    })

    # Initialize loss functions.
    valid_loss = gluon.loss.SoftmaxCELoss()  #weight=pos_weight,
    thrpt_loss = gluon.loss.L2Loss()

    log.info('Training...')
    metric = d2l.Accumulator(2)
    for epoch in range(args.epochs):
        # if epoch % 10 == 0:
        #     args.lr *= 0.1
        #     trainer.set_learning_rate(args.lr)
        #     log.info('Reset learning rate to %e', args.lr)
        if reporter is None:
            log.info('Epoch %d', epoch)
            progress = tqdm.tqdm(
                train_iter,
                bar_format='{desc} {percentage:3.0f}%|{bar:50}{r_bar}')
        else:
            progress = train_iter
        for iter_idx, (batch_feat, batch_thrpt,
                       batch_label) in enumerate(progress):
            np_feat = split_and_load(batch_feat, ctx, even_split=False)[0]
            np_thrpt = split_and_load(batch_thrpt, ctx, even_split=False)[0]
            np_label = split_and_load(batch_label, ctx, even_split=False)[0]
            with autograd.record():
                ls = get_batch_loss(net, valid_loss, thrpt_loss, np_feat,
                                    np_thrpt, np_label)
            ls.backward()
            trainer.step(1)
            l_mean = float(ls)
            metric.add(l_mean, 1)
            if reporter is None and iter_idx % 30 == 0:
                progress.set_description_str(desc='Loss {:.3f}'.format(l_mean),
                                             refresh=True)
        val_acc = test_acc(net,
                           validate_feats,
                           validate_thrpts,
                           train_thrpt_avg,
                           train_thrpt_std,
                           print_log=False)
        if reporter is None:
            log.info(
                'Epoch %d: Loss %.3f, Valid Error %.2f%%, Thrpt Error %.3f (std %.3f)',
                epoch, metric[0] / metric[1], val_acc[0], val_acc[1][0],
                val_acc[1][1])
        else:
            # FIXME: Not working now
            reporter(epoch=epoch, accuracy=val_acc)

    if reporter is None:
        log.info('Final loss %.3f', metric[0] / metric[1])

    log.info('Testing...')
    test_acc(net, test_feats, test_thrpts, train_thrpt_avg, train_thrpt_std)
    return net
예제 #8
0
def train_bert(args, reporter=None):
    """Training process."""
    logger.enable_log_file('train-{}'.format(
        os.path.basename(args.data_file).replace('.csv', '.log')))

    ctx = d2l.try_all_gpus()

    log.info('Loading data from %s...', args.data_file)
    (train_iter, validate_feats, validate_thrpts, test_feats, test_thrpts,
     _) = load_data(args.data_file, args.batch_size, args.num_cls)
    validate_feats = np.array(validate_feats, ctx=ctx[0])
    validate_thrpts = np.array(validate_thrpts, ctx=ctx[0])
    test_feats = np.array(test_feats, ctx=ctx[0])
    test_thrpts = np.array(test_thrpts, ctx=ctx[0])

    fnet = [
        'Regression Hiddens: {}'.format(args.reg_hiddens),
        'Output Classes: {}'.format(args.num_cls)
    ]
    log.info('Network\n\t%s', '\n\t'.join(fnet))
    fparams = [
        'Batch Size: {}'.format(args.batch_size),
        'Dropout: {}'.format(args.dropout), 'Epochs: {}'.format(args.epochs),
        'Learning Rate: {}'.format(args.lr), 'Weight Decay: {}'.format(args.wd)
    ]
    log.info('Hyper-Parameters:\n\t%s', '\n\t'.join(fparams))

    net = ThrptPred(args.reg_hiddens, args.num_cls, args.dropout)
    net.initialize(init.Xavier(), ctx=ctx)
    log.info('Model initialized on %s', str(ctx))

    # Initialize trainer.
    trainer = gluon.Trainer(net.collect_params(), 'adam', {
        'learning_rate': args.lr,
        'wd': args.wd
    })

    # Initialize loss functions.
    thrpt_loss = gluon.loss.SoftmaxCrossEntropyLoss()
    sample_weight = np.array(list(range(1, 2 * args.num_cls, 2)), ctx=ctx[0])

    log.info('Training...')
    metric = d2l.Accumulator(2)
    for epoch in range(args.epochs):
        # if epoch % 10 == 0:
        #     args.lr *= 0.1
        #     trainer.set_learning_rate(args.lr)
        #     log.info('Reset learning rate to %e', args.lr)
        if reporter is None:
            log.info('Epoch %d', epoch)
            progress = tqdm.tqdm(
                train_iter,
                bar_format='{desc} {percentage:3.0f}%|{bar:50}{r_bar}')
        else:
            progress = train_iter
        for iter_idx, (batch_feat, batch_thrpt) in enumerate(progress):
            np_feat = split_and_load(batch_feat, ctx, even_split=False)[0]
            np_thrpt = split_and_load(batch_thrpt, ctx, even_split=False)[0]
            with autograd.record():
                ls = get_batch_loss(net, thrpt_loss, np_feat, np_thrpt,
                                    sample_weight)
            ls.backward()
            trainer.step(1)
            l_mean = float(ls)
            metric.add(l_mean, 1)
            if reporter is None and iter_idx % 30 == 0:
                progress.set_description_str(desc='Loss {:.3f}'.format(l_mean),
                                             refresh=True)
        val_acc = test_acc(net,
                           validate_feats,
                           validate_thrpts,
                           print_log=False)
        if reporter is None:
            log.info('Epoch %d: Loss %.3f, Accuracy %.2f%%', epoch,
                     metric[0] / metric[1], val_acc)
        else:
            reporter(epoch=epoch, accuracy=val_acc)

    if reporter is None:
        log.info('Final loss %.3f', metric[0] / metric[1])

    log.info('Testing...')
    test_acc(net, test_feats, test_thrpts)
    return net