Beispiel #1
0
                        help='the prefix of the model to save')
    parser.add_argument('--num-epochs', type=int, default=10,
                        help='the number of training epochs')
    parser.add_argument('--load-epoch', type=int,
                        help="load the model on an epoch using the model-prefix")
    parser.add_argument('--kv-store', type=str, default='local',
                        help='the kvstore type')
    parser.add_argument('--lr-factor', type=float, default=1,
                        help='times the lr with a factor for every lr-factor-epoch epoch')
    parser.add_argument('--lr-factor-epoch', type=float, default=1,
                        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    use_caffe_loss = args.caffe_loss

    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    else:
        data_shape = (1, 28, 28)
        net = get_lenet()

    # train
    if use_caffe_loss:
        train_model.fit(args, net, get_iterator(data_shape), mx.metric.Caffe())
    else:
        train_model.fit(args, net, get_iterator(data_shape))
Beispiel #2
0
def main():
    global args
    args = parser.parse_args()
    if args.save is '':
        args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    setup_logging(os.path.join(save_path, 'log.txt'))

    checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar')
    logging.debug("run arguments: %s", args)
    logging.info("using pretrained cnn %s", args.cnn)

    cnn = resnet.__dict__[args.cnn](pretrained=True)

    # loading annotations into memory...
    # Done (t=0.49s)
    # creating index...
    # index created!
    vocab = build_vocab()  # len(vocab) 10003

    model = CaptionModel(cnn,
                         vocab,
                         embedding_size=args.embedding_size,
                         rnn_size=args.rnn_size,
                         num_layers=args.num_layers,
                         share_embedding_weights=args.share_weights)

    # loading  annotations into memory...
    # Done (t=0.47s)
    # creating index...
    # index created!
    train_data = get_iterator(
        get_coco_data(vocab, train=True),
        batch_size=20,  #batch_size=args.batch_size    # default 128
        max_length=25,  #max_length=args.max_length    # default 30
        shuffle=True,
        num_workers=args.workers)

    # loading  annotations into memory...
    # Done (t=0.37s)
    # creating index...
    # index created!
    val_data = get_iterator(
        get_coco_data(vocab, train=False),
        batch_size=20,  #batch_size=args.eval_batch_size  # default 128
        max_length=25,  #max_length=args.max_length    # default 30
        shuffle=False,
        num_workers=args.workers)

    #if 'cuda' in args.type:
    #    cudnn.benchmark = True
    #    model.cuda()

    # optimizer = select_optimizer(    #args.optimizer  SGD
    #     args.optimizer, params=model.parameters(), lr=args.lr)  # args.lr = 0.1  float

    optimizer = select_optimizer(  # args.optimizer  SGD
        args.optimizer,
        params=model.parameters(),
        lr=args.lr)  # args.lr = 0.1  float

    regime = lambda e: {
        'lr': args.lr * (args.lr_decay**e),
        'momentum': args.momentum,
        'weight_decay': args.weight_decay
    }

    model.finetune_cnn(False)

    def forward(model, data, training=True, optimizer=None):
        use_cuda = 'cuda' in args.type  # return True or False
        loss = nn.CrossEntropyLoss()  # CrossEntropyLoss()
        perplexity = AverageMeter(
        )  # <utils.AverageMeter object at 0x7fa6be6f1780
        batch_time = AverageMeter()
        data_time = AverageMeter()

        if training:
            model.train()
        else:
            model.eval()
        end = time.time()

        # Train
        # train_perp = forward(
        #     model, train_data, training=True, optimizer=optimizer)

        for i, (imgs, (captions,
                       lengths)) in enumerate(data):  #len(data)键值对个数 2587
            time.sleep(1)
            if i == 3:
                break
            data_time.update(time.time() - end)

            #if use_cuda:
            #    imgs = imgs.cuda()
            #    captions = captions.cuda(async=True)

            imgs = Variable(imgs, volatile=not training)

            captions = Variable(captions, volatile=not training)

            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]

            pred, _ = model(imgs, input_captions, lengths)
            err = loss(pred, target_captions)
            # perplexity.update(math.exp(err.data[0]))
            perplexity.update(math.exp(err.item()))

            if training:
                optimizer.zero_grad()
                err.backward()
                clip_grad_norm(model.rnn.parameters(), args.grad_clip)
                optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % args.print_freq == 0:
                logging.info(
                    '{phase} - Epoch: [{0}][{1}/{2}]\t'
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                    'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format(
                        epoch,
                        i,
                        len(data),
                        phase='TRAINING' if training else 'EVALUATING',
                        batch_time=batch_time,
                        data_time=data_time,
                        perp=perplexity))

        return perplexity.avg

    for epoch in range(args.start_epoch, args.epochs):  #(0,10)
        if epoch >= args.finetune_epoch:  # args.finetune_epoch 3
            model.finetune_cnn(True)

        optimizer = adjust_optimizer(optimizer, epoch,
                                     regime)  # optimizer none

        # Train
        train_perp = forward(model,
                             train_data,
                             training=True,
                             optimizer=optimizer)

        # Evaluate
        val_perp = forward(model, val_data, training=False)

        logging.info('\n Epoch: {0}\t'
                     'Training Perplexity {train_perp:.4f} \t'
                     'Validation Perplexity {val_perp:.4f} \n'.format(
                         epoch + 1, train_perp=train_perp, val_perp=val_perp))
        model.save_checkpoint(checkpoint_file % (epoch + 1))
Beispiel #3
0
    parser.add_argument('--kv-store', type=str, default='local',
                        help='the kvstore type')
    parser.add_argument('--lr-factor', type=float, default=1,
                        help='times the lr with a factor for every lr-factor-epoch epoch')
    parser.add_argument('--lr-factor-epoch', type=float, default=1,
                        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    use_caffe_loss = args.caffe_loss
    use_caffe_data = args.caffe_data

    data_shape = ()
    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    elif args.network == 'lenet':
        if not use_caffe_data:
            data_shape = (1, 28, 28)
        net = get_lenet()
    else:
        net = get_network_from_json_file(args.network)

    # train
    if use_caffe_loss:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe())
    else:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
Beispiel #4
0
    parser.add_argument('--kv-store', type=str, default='local',
                        help='the kvstore type')
    parser.add_argument('--lr-factor', type=float, default=1,
                        help='times the lr with a factor for every lr-factor-epoch epoch')
    parser.add_argument('--lr-factor-epoch', type=float, default=1,
                        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    use_caffe_loss = args.caffe_loss
    use_caffe_data = args.caffe_data

    data_shape = ()
    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    elif args.network == 'lenet':
        if not use_caffe_data:
            data_shape = (1, 28, 28)
        net = get_lenet()
    else:
        net = get_network_from_json_file(args.network)

    # train
    if use_caffe_loss:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data), mx.metric.Caffe())
    else:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
Beispiel #5
0
def main(args):
    print("Loading data")
    dataset = args.data.rstrip('/').split('/')[-1]
    if dataset in ['yahoo', 'yelp']:
        with_label = True
    else:
        with_label = False
    corpus = Corpus(
        args.data, max_vocab_size=args.max_vocab,
        max_length=args.max_length, with_label=with_label
    )
    pad_id = corpus.word2idx[PAD_TOKEN]
    vocab_size = len(corpus.word2idx)
    print("\ttraining data size: ", len(corpus.train))
    print("\tvocabulary size: ", vocab_size)
    print("Constructing model")
    print(args)
    device = torch.device('cpu' if args.nocuda else 'cuda')
    model = TopGenVAE(
        vocab_size, args.embed_size, args.hidden_size, args.code_size,
        args.num_topics, args.dropout
    ).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr, weight_decay=args.wd)
    best_loss = None

    train_iter = get_iterator(corpus.train, args.batch_size, True,  device)
    valid_iter = get_iterator(corpus.valid, args.batch_size, False, device)
    test_iter  = get_iterator(corpus.test,  args.batch_size, False, device)
    print("\nStart training")
    try:
        for epoch in range(1, args.epochs+1):
            epoch_start_time = time.time()
            (tr_seq_loss, tr_bow_loss, tr_kld_z, tr_kld_t,
             tr_seq_ppl, tr_bow_ppl) = train(
                 train_iter, model, pad_id, optimizer, epoch
             )
            (va_seq_loss, va_bow_loss, va_kld_z, va_kld_t,
             va_seq_ppl, va_bow_ppl) = evaluate(
                 valid_iter, model, pad_id
             )
            print('-' * 90)
            meta = "| epoch {:2d} | time {:5.2f}s ".format(epoch, time.time()-epoch_start_time)
            print(meta + "| train loss {:5.2f} {:5.2f} ({:5.2f} {:5.2f}) "
                  "| train ppl {:5.2f} {:5.2f}".format(
                      tr_seq_loss, tr_bow_loss, tr_kld_z, tr_kld_t,
                      tr_seq_ppl, tr_bow_ppl))
            print(len(meta)*' ' + "| valid loss {:5.2f} {:5.2f} ({:5.2f} {:5.2f}) "
                  "| valid ppl {:5.2f} {:5.2f}".format(
                      va_seq_loss, va_bow_loss, va_kld_z, va_kld_t,
                      va_seq_ppl, va_bow_ppl), flush=True)
            epoch_loss = va_seq_loss + va_bow_loss + va_kld_z + va_kld_t
            if best_loss is None or epoch_loss < best_loss:
                best_loss = epoch_loss
                with open(get_savepath(args), 'wb') as f:
                    torch.save(model, f)
                
    except KeyboardInterrupt:
        print('-' * 90)
        print('Exiting from training early')


    with open(get_savepath(args), 'rb') as f:
        model = torch.load(f)
    (te_seq_loss, te_bow_loss, te_kld_z, te_kld_t,
     te_seq_ppl, te_bow_ppl) = evaluate(test_iter, model, pad_id)
    print('=' * 90)
    print("| End of training | test loss {:5.2f} {:5.2f} ({:5.2f} {:5.2f}) "
          "| test ppl {:5.2f} {:5.2f}".format(
              te_seq_loss, te_bow_loss, te_kld_z, te_kld_t,
              te_seq_ppl, te_bow_ppl))
    print('=' * 90)
Beispiel #6
0
def main():
    k = 10
    repeat = 4
    epochs = 30
    batchsize = 256
    learning_rate = 1e-4
    # dataset_object = tf.keras.datasets.mnist
    dataset_object = tf.keras.datasets.cifar10
    (x_train, y_train), (x_test, y_test) = dataset_object.load_data()
    x_train, x_test = x_train / 255.0, x_test / 255.0
    print('x_train:', x_train.shape, x_train.min(), x_train.max())
    print('y_train:', y_train.shape, y_train.min(), y_train.max())

    train_iterator = get_iterator(x_train, y_train, batchsize=batchsize)
    x_batch, x_perturb, y_batch = next(train_iterator)

    print('xbatch', x_batch.shape, 'xperturb', x_perturb.shape, 'ybatch',
          y_batch.shape)

    model = ResNetModel(k=k)
    print('x_batch:', x_batch.shape)
    z = model(x_batch, head='main', verbose=True)
    for z_ in z:
        print('z:', z_.shape)
    z = model(x_batch, head='aux')
    for z_ in z:
        print('z:', z_.shape)
    model.summary()

    # optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    plt.figure(figsize=(3, 3), dpi=300)
    ax = plt.gca()
    ax.set_xlim([-1, 1])
    ax.set_ylim([-1, 1])
    main_losses = []
    aux_losses = []
    for e in range(epochs):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate /
                                           (2 * (e + 1)))
        # mnist_generator = generate_mnist(x_train, y_train, batchsize=batchsize)

        if e % 2 == 0:
            trainhead = 'main'
        else:
            trainhead = 'aux'

        train_iterator = get_iterator(x_train,
                                      y_train,
                                      batchsize=batchsize,
                                      repeat=repeat)
        for k, (x_batch, x_perturb, y_batch) in enumerate(train_iterator):
            # if k % 2 == 0:
            #   trainhead = 'main'
            # else:
            #   trainhead = 'aux'

            with tf.GradientTape() as tape:
                z = model(x_batch, head=trainhead)
                zp = model(x_perturb, head=trainhead)

                losses = [IID_loss(z_, zp_) for z_, zp_ in zip(z, zp)]
                loss = tf.reduce_mean(losses)
                grads = tape.gradient(loss, model.trainable_variables)

            if k % 2 == 0:
                main_losses.append(loss.numpy())
            else:
                aux_losses.append(loss.numpy())

            optimizer.apply_gradients(zip(grads, model.trainable_variables))

            if k % 100 == 0:
                # take the last head and write
                save_images(x_batch, z[0], 'clusters/{}'.format(e), n=5)
                print('e: {} k: {} loss={}'.format(e, k, loss.numpy()))
                for i in range(1):
                    zmax = tf.argmax(z[i], axis=-1).numpy()
                    zpmax = tf.argmax(zp[i], axis=-1).numpy()
                    acc = (zmax == zpmax).mean()
                    print('\tacc={}'.format(acc), np.unique(zmax),
                          np.unique(zpmax))

        # Each epoch
        ztest = {r: [] for r in range(1)}
        ylabel = []
        # test_iterator = get_iterator(x_train, y_train, batchsize=batchsize, repeat=1)
        test_iterator = get_iterator(x_test,
                                     y_test,
                                     batchsize=batchsize,
                                     repeat=1)
        for j, (x_batch, x_perturb, y_batch) in enumerate(test_iterator):
            for i, h in enumerate(model(x_batch, head='main')):
                ztest[i].append(h)
            ylabel.append(y_batch)

        # ztest = np.concatenate(ztest, axis=0)
        ylabel = np.squeeze(np.concatenate(ylabel))
        print('ylabel', ylabel.shape)
        for r in range(1):
            ztest[r] = np.concatenate(ztest[r], axis=0)
            print('ztest', ztest[r].shape)
            convex_combo(ztest[r], ylabel, ax,
                         'pointcloud/{}_{}.png'.format(r, e))

    with open('losses_main.txt', 'w+') as f:
        for l in main_losses:
            f.write('{}\n'.format(l))

    with open('losses_aux.txt', 'w+') as f:
        for l in aux_losses:
            f.write('{}\n'.format(l))
Beispiel #7
0
    parser.add_argument(
        '--lr-factor-epoch',
        type=float,
        default=1,
        help='the number of epoch to factor the lr, could be .5')
    return parser.parse_args()


if __name__ == '__main__':
    args = parse_args()
    use_caffe_loss = args.caffe_loss
    use_caffe_data = args.caffe_data

    data_shape = ()
    if args.network == 'mlp':
        data_shape = (784, )
        net = get_mlp()
    elif args.network == 'lenet':
        if not use_caffe_data:
            data_shape = (1, 28, 28)
        net = get_lenet()
    else:
        net = get_network_from_json_file(args.network)

    # train
    if use_caffe_loss:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data),
                        mx.metric.Caffe())
    else:
        train_model.fit(args, net, get_iterator(data_shape, use_caffe_data))
def main():
    global args
    args = parser.parse_args()
    if args.save is '':
        args.save = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    if args.pretrained != 1:
        # if this is a model trained from scratch, train CNN from the starting of the epoch and train epoch much longer
        args.finetune_epoch = 0

    setup_logging(os.path.join(save_path, 'log.txt'))
    checkpoint_file = os.path.join(save_path, 'checkpoint_epoch_%s.pth.tar')

    logging.debug("run arguments: %s", args)
    if args.pretrained == 1:
        logging.info("using pretrained cnn %s", args.cnn)
        cnn = resnet.__dict__[args.cnn](pretrained=True)
    else:
        logging.info("using from-scratch cnn %s", args.cnn)
        cnn = resnet.__dict__[args.cnn](pretrained=False)

    vocab = build_vocab()
    model = CaptionModel(cnn, vocab,
                         embedding_size=args.embedding_size,
                         rnn_size=args.rnn_size,
                         num_layers=args.num_layers,
                         share_embedding_weights=args.share_weights)

    train_data = get_iterator(get_coco_data(vocab, train=True),
                              batch_size=args.batch_size,
                              max_length=args.max_length,
                              shuffle=True,
                              num_workers=args.workers)
    val_data = get_iterator(get_coco_data(vocab, train=False),
                            batch_size=args.eval_batch_size,
                            max_length=args.max_length,
                            shuffle=False,
                            num_workers=args.workers)

    if 'cuda' in args.type:
        cudnn.benchmark = True
        model.cuda()

    optimizer = select_optimizer(
        args.optimizer, params=model.parameters(), lr=args.lr)
    regime = lambda e: {'lr': args.lr * (args.lr_decay ** e),
                        'momentum': args.momentum,
                        'weight_decay': args.weight_decay}
    model.finetune_cnn(False)

    def forward(model, data, training=True, optimizer=None):
        use_cuda = 'cuda' in args.type
        loss = nn.CrossEntropyLoss()
        perplexity = AverageMeter()
        batch_time = AverageMeter()
        data_time = AverageMeter()

        if training:
            model.train()
        else:
            model.eval()

        end = time.time()
        for i, (imgs, (captions, lengths)) in enumerate(data):
            data_time.update(time.time() - end)
            if use_cuda:
                imgs = imgs.cuda()
                captions = captions.cuda(async=True)
            imgs = Variable(imgs, volatile=not training)
            captions = Variable(captions, volatile=not training)
            input_captions = captions[:-1]
            target_captions = pack_padded_sequence(captions, lengths)[0]

            pred, _ = model(imgs, input_captions, lengths)
            err = loss(pred, target_captions)
            perplexity.update(math.exp(err.data[0]))

            if training:
                optimizer.zero_grad()
                err.backward()
                clip_grad_norm(model.rnn.parameters(), args.grad_clip)
                optimizer.step()

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()
            if i % args.print_freq == 0:
                logging.info('{phase} - Epoch: [{0}][{1}/{2}]\t'
                             'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                             'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                             'Perplexity {perp.val:.4f} ({perp.avg:.4f})'.format(
                                 epoch, i, len(data),
                                 phase='TRAINING' if training else 'EVALUATING',
                                 batch_time=batch_time,
                                 data_time=data_time, perp=perplexity))

        return perplexity.avg

    for epoch in range(args.start_epoch, args.epochs):
        if epoch >= args.finetune_epoch:
            model.finetune_cnn(True)
        optimizer = adjust_optimizer(
            optimizer, epoch, regime)
        # Train
        train_perp = forward(
            model, train_data, training=True, optimizer=optimizer)
        # Evaluate
        val_perp = forward(model, val_data, training=False)

        logging.info('\n Epoch: {0}\t'
                     'Training Perplexity {train_perp:.4f} \t'
                     'Validation Perplexity {val_perp:.4f} \n'
                     .format(epoch + 1, train_perp=train_perp, val_perp=val_perp))
        if epoch % args.save_freq == 0 or epoch == args.epochs-1:
            model.save_checkpoint(checkpoint_file % (epoch + 1))
Beispiel #9
0
def main(args):
    print("Loading data")
    dataset = args.data.rstrip('/').split('/')[-1]
    corpus = Corpus(args.data,
                    max_vocab_size=args.max_vocab,
                    max_length=args.max_length)
    pad_id = corpus.word2idx[PAD_TOKEN]
    sos_id = corpus.word2idx[SOS_TOKEN]
    vocab_size = len(corpus.word2idx)
    print(args)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    torch.cuda.set_device(args.device_id)
    cent_name = dataset + "_centers.pt"
    centers = None
    if args.center:
        centers = torch.load(cent_name).to(device)
        # centers.requires_grad = False
        centers = centers.detach()
    model = LstmVAE(vocab_size,
                    args.embed_dim,
                    args.hidden_dim,
                    args.code_dim,
                    args.dropout,
                    centers=centers,
                    enc_type=args.enc_type,
                    de_type=args.de_type,
                    dist=args.dist,
                    fix=args.fix,
                    device=device).to(device)
    if args.flow:
        flow = NormalizingFlows(args.code_dim,
                                n_flows=args.n_flows,
                                reg=args.reg,
                                band=args.band).to(device)
        model.add_flow(flow)

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 betas=(args.beta, 0.999),
                                 weight_decay=args.wd)

    train_iter = get_iterator(corpus.train, args.batch_size, True, device)
    valid_iter = get_iterator(corpus.valid, args.batch_size, False, device)
    test_iter = get_iterator(corpus.test, args.batch_size, False, device)

    start_epoch = 1
    if args.load is True:
        start_epoch, model, optimizer = load_checkpoint(
            model, optimizer, device, args.save_name)

    print("\nStart training")
    try:
        for epoch in range(start_epoch, args.epochs + 1):
            (re_loss, kl_divergence, flow_kld, mi1, mi2, mmd_loss, nll_ppl,
             nll, iw_nll, sum_log_j, start, batch_time) = run(args,
                                                              train_iter,
                                                              model,
                                                              pad_id,
                                                              optimizer,
                                                              epoch,
                                                              train=True)
            if args.save:
                save_checkpoint(model, optimizer, epoch, args.save_name)
            print('-' * 90)
            meta = "| epoch {:2d} ".format(epoch)
            print(
                meta +
                "| train loss {:5.2f} ({:5.2f}) ({:5.2f}) | train ppl {:5.2f} ({:5.2f} {:5.2f}) | mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f}"
                "| Time {batch_time.val:5.2f} ({batch_time.avg:5.2f})\t".
                format(re_loss,
                       kl_divergence,
                       flow_kld,
                       nll_ppl,
                       nll,
                       iw_nll,
                       mmd_loss,
                       mi1,
                       mi2,
                       sum_log_j,
                       batch_time=batch_time))

            (re_loss, kl_divergence, flow_kld, mi1, mi2, mmd_loss, nll_ppl,
             nll, iw_nll, sum_log_j, _, _) = run(args,
                                                 valid_iter,
                                                 model,
                                                 pad_id,
                                                 optimizer,
                                                 epoch,
                                                 train=False)

            print(
                len(meta) * ' ' +
                "| valid loss {:5.2f} ({:5.2f}) ({:5.2f}) | valid ppl {:5.2f} ({:5.2f} {:5.2f})"
                "| mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f} \t"
                .format(re_loss,
                        kl_divergence,
                        flow_kld,
                        nll_ppl,
                        nll,
                        iw_nll,
                        mmd_loss,
                        mi1,
                        mi2,
                        sum_log_j,
                        flush=True))

            if dataset in ['yahoo'] and epoch in [15, 35]:
                for param_group in optimizer.param_groups:
                    param_group['lr'] *= 0.5

    except KeyboardInterrupt:
        print('-' * 50)
        print('Quit training')

    (re_loss, kl_divergence, flow_kld, mi1, mi2, mmd_loss, nll_ppl, nll,
     iw_nll, sum_log_j, _, _) = run(args,
                                    test_iter,
                                    model,
                                    pad_id,
                                    optimizer,
                                    epoch,
                                    train=False)
    print('=' * 90)
    print(
        "| Test results | test loss {:5.2f} ({:5.2f}) ({:5.2f}) | test ppl {:5.2f} ({:5.2f} {:5.2f}) | test mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f} "
        .format(re_loss, kl_divergence, flow_kld, nll_ppl, nll, iw_nll,
                mmd_loss, mi1, mi2, sum_log_j))
    print('=' * 90)

    with open(args.test_log_name, 'a') as fd:
        print('=' * 90, file=fd)
        print(
            "{} | dist {} | ende {} | em {} | | kla {} | mmd {} | flow {} | center {} | n flow {} | ker {} | reg {} | band {} | t {} | mmd w {} | iw {} | gpu {} | log {} |"
            .format(dataset, args.dist, args.de_type, args.embed_dim, args.kla,
                    args.mmd, args.flow, args.center, args.n_flows,
                    args.kernel, args.reg, args.band, args.t, args.mmd_w,
                    args.iw, args.device_id, args.test_log_name),
            file=fd)
        print('-' * 90, file=fd)
        print(
            "| Test results | test loss {:5.2f} ({:5.2f}) ({:5.2f}) | test ppl {:5.2f} ({:5.2f} {:5.2f}) | test mmd {:5.2f} | mi E {:5.2f} | mi R {:5.2f} | log J {:5.2f}"
            .format(re_loss, kl_divergence, flow_kld, nll_ppl, nll, iw_nll,
                    mmd_loss, mi1, mi2, sum_log_j),
            file=fd)
        print('=' * 90, file=fd)
Beispiel #10
0
def main(args):
    print("Loading data")
    dataset = args.data.rstrip('/').split('/')[-1]
    if dataset in ['yahoo']:
        with_label = True
    else:
        with_label = False
    if dataset in ['yahoo']:
        corpus = CorpusYahoo(args.data,
                             max_vocab_size=args.max_vocab,
                             max_length=args.max_length,
                             with_label=with_label)
        pad_id = corpus.word2idx['_PAD']
    else:
        corpus = Corpus(args.data,
                        max_vocab_size=args.max_vocab,
                        max_length=args.max_length,
                        with_label=with_label)
        pad_id = corpus.word2idx[PAD_TOKEN]
    vocab_size = len(corpus.word2idx)
    print("\ttraining data size: ", len(corpus.train))
    print("\tvocabulary size: ", vocab_size)
    print("Constructing model")
    print(args)
    device = torch.device('cpu' if args.nocuda else 'cuda')
    torch.cuda.set_device(args.cuda)
    if args.diag:
        model = MultiNormalVAE(vocab_size,
                               args.embed_size,
                               args.hidden_size,
                               args.code_size,
                               args.dropout,
                               batch_size=args.batch_size,
                               decomp=args.method,
                               copula=args.copula)
    else:
        model = LstmVAE(vocab_size,
                        args.embed_size,
                        args.hidden_size,
                        args.code_size,
                        args.dropout,
                        batch_size=args.batch_size,
                        decomp=args.method,
                        copula=args.copula)
    if args.multi:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.wd)
    best_loss = None

    train_iter = get_iterator(corpus.train, args.batch_size, True, device)
    valid_iter = get_iterator(corpus.valid, args.batch_size, False, device)
    test_iter = get_iterator(corpus.test, args.batch_size, False, device)

    start_epoch = 0
    if args.load is True:
        start_epoch, model, optimizer, losslogger = load_checkpoint(
            model, optimizer, device, args.save_name)

    tr_loggers = []
    va_loggers = []
    print("\nStart training")
    try:
        for epoch in range(start_epoch, args.epochs):
            epoch_start_time = time.time()
            (tr_seq_loss, tr_bow_loss, tr_kld, tr_mi, tr_tc, tr_dwkl,
             tr_seq_ppl, tr_bow_ppl, tr_log_copula, tr_mmd,
             batch_time) = train(train_iter, model, pad_id, optimizer, epoch)
            (va_seq_loss, va_bow_loss, va_kld, va_mi, va_tc, va_dwkl,
             va_seq_ppl, va_bow_ppl, va_log_copula,
             va_mmd) = evaluate(valid_iter, model, pad_id)

            tr_losslogger = {
                "epoch": epoch,
                "seq_loss": tr_seq_loss,
                "bow_loss": tr_bow_loss,
                "kld": tr_kld,
                "mutual info": tr_mi,
                "tc": tr_tc,
                "dwkl": tr_dwkl,
                "seq_ppl": tr_seq_ppl,
                "bow_ppl": tr_bow_ppl,
                "log_copula": tr_log_copula,
                "mmd": tr_mmd,
                "time": batch_time
            }
            tr_loggers.append(tr_losslogger)

            losslogger = {
                "epoch": epoch,
                "seq_loss": va_seq_loss,
                "bow_loss": va_bow_loss,
                "kld": va_kld,
                "mutual info": va_mi,
                "tc": va_tc,
                "dwkl": va_dwkl,
                "seq_ppl": va_seq_ppl,
                "bow_ppl": va_bow_ppl,
                "log_copula": va_log_copula,
                "mmd": va_mmd,
                "time": batch_time
            }
            va_loggers.append(losslogger)

            save_checkpoint(model, optimizer, losslogger, args.save_name)
            print('-' * 90)
            meta = "| epoch {:2d} | time {:5.2f}s ".format(
                epoch,
                time.time() - epoch_start_time)
            print(
                meta + "| train loss {:5.2f} {:5.2f} ({:5.2f}) "
                "| {:5.2f} {:5.2f} {:5.2f} "
                "| train ppl {:5.2f} {:5.2f} | log copula {:5.2f} | mmd {:5.2f}"
                "| Time {batch_time.val:5.2f} ({batch_time.avg:5.2f})\t".
                format(tr_seq_loss,
                       tr_bow_loss,
                       tr_kld,
                       tr_mi,
                       tr_tc,
                       tr_dwkl,
                       tr_seq_ppl,
                       tr_bow_ppl,
                       tr_log_copula,
                       tr_mmd,
                       batch_time=batch_time))
            print(
                len(meta) * ' ' + "| valid loss {:5.2f} {:5.2f} ({:5.2f}) "
                "| {:5.2f} {:5.2f} {:5.2f} "
                "| valid ppl {:5.2f} {:5.2f} | valid log copula {:5.2f} | valid mmd {:5.2f}"
                "| joint NLL {:5.2f}".format(
                    va_seq_loss, va_bow_loss, va_kld, va_mi, va_tc, va_dwkl,
                    va_seq_ppl, va_bow_ppl, va_log_copula, va_mmd,
                    va_seq_loss + va_kld - va_log_copula),
                flush=True)
            epoch_loss = va_seq_loss + va_kld
            if best_loss is None or epoch_loss < best_loss:
                best_loss = epoch_loss
                # with open(get_savepath(args), 'wb') as f:
                #     torch.save(model, f)

    except KeyboardInterrupt:
        print('-' * 90)
        print('Exiting from training early')

    save_logger(tr_loggers, va_loggers, args.loss_name)

    # with open(get_savepath(args), 'rb') as f:
    #     model = torch.load(f)
    (te_seq_loss, te_bow_loss, te_kld, te_mi, te_tc, te_dwkl, te_seq_ppl,
     te_bow_ppl, te_log_copula, te_mmd) = evaluate(test_iter, model, pad_id)
    print('=' * 90)
    print("| End of training | test loss {:5.2f} {:5.2f} ({:5.2f}) "
          "| {:5.2f} {:5.2f} {:5.2f} "
          "| test ppl {:5.2f} {:5.2f}"
          "| test log copula {:5.2f}"
          "| test mmd {:5.2f}"
          "| test nll {:5.2f}".format(te_seq_loss, te_bow_loss, te_kld, te_mi,
                                      te_tc, te_dwkl, te_seq_ppl, te_bow_ppl,
                                      te_log_copula, te_mmd,
                                      te_seq_loss + te_kld - te_log_copula))
    print('=' * 90)

    te_losslogger = {
        "seq_loss": te_seq_loss,
        "bow_loss": te_bow_loss,
        "kld": te_kld,
        "seq_ppl": te_seq_ppl,
        "bow_ppl": te_bow_ppl,
        "log_copula": te_log_copula,
        "mmd": te_mmd,
    }