コード例 #1
0
ファイル: train.py プロジェクト: CrhistyanSilva/localbitsback
def run(args):
    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    os.makedirs(args.out_dir, exist_ok=True)
    snap_dir = args.out_dir

    with open(os.path.join(snap_dir, 'log.txt'), 'a') as ff:
        print('\nMODEL SETTINGS: \n', args, '\n', file=ff)

    # SAVING
    torch.save(args, snap_dir + '.config')

    # Load snapshot parameters
    parameters_dict = None
    if args.state_parameters is not None:
        assert os.path.isfile(args.state_parameters)
        parameters_dict = json.load(open(args.state_parameters))
        args.learning_rate = parameters_dict['scheduler']['_last_lr'][0]

    args.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Device:', args.device)

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    dataset = load_imagenet_data(os.path.expanduser(args.imagenet64_data_path))
    validation_dataset = load_imagenet_data(os.path.expanduser(args.imagenet64_valid_data_path))

    train_loader = torch.utils.data.DataLoader(dataset, batch_size=args.batch_size, shuffle=True, drop_last=False)
    val_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=args.batch_size, shuffle=True,
                                             drop_last=False)

    # test_loader = torch.utils.data.DataLoader(
    #     dataset,
    #     batch_size=args.batch_size,
    #     shuffle=False,
    #     **kwargs)

    args.input_size = [3, 64, 64]
    # ==================================================================================================================
    # SELECT MODEL
    # ==================================================================================================================
    # flow parameters and architecture choice are passed on to model through args
    print(args.input_size)

    from compression.models.load_flowpp_imagenet64 import Imagenet64Model

    # Load model
    if args.imagenet64_model is None:
        model = Imagenet64Model(force_float32_cond=True).eval()
    else:
        model_ctor = compression.models.load_imagenet64_model
        model_filename = os.path.expanduser(args.imagenet64_model)
        model = model_ctor(model_filename, force_float32_cond=True, from_torch=args.from_torch)

    model.to(device=args.device)

    model_sample = model

    optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=args.step_size, gamma=args.gamma)

    # ==================================================================================================================
    # TRAINING
    # ==================================================================================================================
    train_bpd = []
    val_bpd = []

    # for early stopping
    best_val_bpd = np.inf
    best_val_loss = np.inf

    if args.state_parameters is None:
        last_epoch = 1
        run_number = 1
    else:
        last_epoch = parameters_dict['epoch']
        run_number = parameters_dict['run_number'] + 1
        scheduler.load_state_dict(parameters_dict['scheduler'])

    train_times = []
    model.double()

    for epoch in range(last_epoch, args.epochs + 1):
        t_start = time.time()
        if parameters_dict is not None:
            tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args, scheduler,
                                    True, parameters_dict['batch_idx'], run_number)
        else:
            tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args, scheduler, False)
        train_bpd.append(tr_bpd)
        train_times.append(time.time() - t_start)
        print('One training epoch took %.2f seconds' % (time.time() - t_start))

        if epoch < 5 or epoch % args.evaluate_interval_epochs == 0:
            v_loss, v_bpd = evaluate(
                val_loader, model, model_sample, args,
                epoch=epoch, file=snap_dir + 'log.txt')

            val_bpd.append(v_bpd)

            best_val_bpd = min(v_bpd, best_val_bpd)
            best_val_loss = min(v_loss, best_val_loss)

            print('(BEST: val bpd {:.4f}, val loss {:.4f})\n'.format(best_val_bpd, best_val_loss))
            print(f'VALIDATION: loss: {v_loss}, bpd: {v_bpd}')

            if math.isnan(v_loss):
                raise ValueError('NaN encountered!')

    train_bpd = np.hstack(train_bpd)
    val_bpd = np.array(val_bpd)

    # training time per epoch
    train_times = np.array(train_times)
    mean_train_time = np.mean(train_times)
    std_train_time = np.std(train_times, ddof=1)
    print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time))

    # ==================================================================================================================
    # EVALUATION
    # ==================================================================================================================
    final_model = torch.load(snap_dir + 'a.model')
    test_loss, test_bpd = evaluate(
        train_loader, test_loader, final_model, final_model, args,
        epoch=epoch, file=snap_dir + 'test_log.txt')

    print('Test loss / bpd: %.2f / %.2f' % (test_loss, test_bpd))
コード例 #2
0
def run(args, kwargs):

    # Only for the residual networks (resflow/sylvester) comparison.
    args.grad_norm_enabled = True

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)

    model, model_sample, optimizer, scheduler = setup(args)

    writer = tensorboardX.SummaryWriter(logdir=args.snap_dir)

    snap_dir = args.snap_dir
    # ==================================================================================================================
    # TRAINING
    # ==================================================================================================================
    train_bpd = []
    val_bpd = []

    # for early stopping
    best_val_bpd = np.inf
    best_train_bpd = np.inf
    epoch = 0

    train_times = []

    model.eval()
    model.train()

    starting_epoch = 1
    if args.restart_from_epoch is not None:
        starting_epoch = args.restart_from_epoch

    for epoch in range(starting_epoch, args.epochs + 1):
        t_start = time.time()
        tr_bpd = train(epoch, train_loader, model, optimizer, args)
        scheduler.step()
        train_bpd.append(tr_bpd)
        writer.add_scalar('train bpd', train_bpd[-1], epoch)
        train_times.append(time.time() - t_start)
        print('One training epoch took %.2f seconds' % (time.time() - t_start))

        if epoch in [1, 5, 10] or epoch % args.evaluate_interval_epochs == 0:
            tr_bpd = evaluate(train_loader, model, args, iw_samples=1)
            v_bpd = evaluate(val_loader, model, args, iw_samples=1)

            # Logging message.
            with open(snap_dir + 'log.txt', 'a') as ff:
                msg = 'epoch {}\ttrain bpd {:.3f}\tval bpd {:.3f}\t'.format(
                    epoch, tr_bpd, v_bpd)
                print(msg, file=ff)

            plot_samples(model_sample, args, epoch, v_bpd)

            val_bpd.append(v_bpd)
            writer.add_scalar('val bpd', v_bpd, epoch)

            # Model save based on val performance
            if v_bpd < best_val_bpd:
                best_train_bpd = tr_bpd
                best_val_bpd = v_bpd

                try:
                    if hasattr(model, 'module'):
                        torch.save(model.module, snap_dir + 'a.model')
                    else:
                        torch.save(model, snap_dir + 'a.model')
                    torch.save(optimizer, snap_dir + 'a.optimizer')
                    print('->model saved<-')
                except:
                    print('Saving was unsuccessful.')

            print('(BEST: train bpd {:.4f}, val bpd {:.4f})\n'.format(
                best_train_bpd, best_val_bpd))

            if math.isnan(v_bpd):
                raise ValueError('NaN encountered!')

    # training time per epoch
    train_times = np.array(train_times)
    mean_train_time = np.mean(train_times)
    std_train_time = np.std(train_times, ddof=1)
    print('Average train time per epoch: {:.2f} +/- {:.2f}'.format(
        mean_train_time, std_train_time))

    # ========================================================================
    # EVALUATION
    # ========================================================================
    final_model = torch.load(snap_dir + 'a.model')

    test_bpd = evaluate(test_loader, final_model, args)

    with open(snap_dir + 'log.txt', 'a') as ff:
        msg = 'epoch {}\ttest negative elbo bpd {:.4f}'.format(epoch, test_bpd)
        print(msg, file=ff)

    if 'residual' in args.model_type:
        print('Importance weighted eval needs exact determinants.')

    else:
        test_bpd = evaluate(test_loader, final_model, args, iw_samples=1000)

        with open(snap_dir + 'log.txt', 'a') as ff:
            msg = 'epoch {}\ttest negative log_px bpd {:.4f}'.format(
                epoch, test_bpd)
            print(msg, file=ff)
コード例 #3
0
def run(args, kwargs):

    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    snapshots_path = os.path.join(args.out_dir, 'vae_' + args.dataset + '_')
    snap_dir = snapshots_path + args.flow + '_gpunum_' + str(args.gpu_num)

    if args.flow != 'no_flow':
        snap_dir += '_' + 'num_flows_' + str(args.num_flows)
    if args.flow == 'orthogonal':
        snap_dir = snap_dir + '_num_vectors_' + str(args.num_ortho_vecs)
    elif args.flow == 'householder':
        snap_dir = snap_dir + '_num_householder_' + str(args.num_householder)
    elif args.flow == 'iaf':
        snap_dir = snap_dir + '_madehsize_' + str(args.made_h_size)

    snap_dir = snap_dir + '__' + args.model_signature + '/'

    args.snap_dir = snap_dir

    if not os.path.exists(snap_dir):
        os.makedirs(snap_dir)

    # SAVING
    torch.save(args, snap_dir + args.flow + '.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)

    # ==================================================================================================================
    # SELECT MODEL
    # ==================================================================================================================
    # flow parameters and architecture choice are passed on to model through args

    if args.flow == 'no_flow':
        model = VAE.VAE(args)
    elif args.flow == 'planar':
        model = VAE.PlanarVAE(args)
    elif args.flow == 'iaf':
        model = VAE.IAFVAE(args)
    elif args.flow == 'orthogonal':
        model = VAE.OrthogonalSylvesterVAE(args)
    elif args.flow == 'householder':
        model = VAE.HouseholderSylvesterVAE(args)
    elif args.flow == 'triangular':
        model = VAE.TriangularSylvesterVAE(args)
    else:
        raise ValueError('Invalid flow choice')

    if args.cuda:
        print("Model on GPU")
        model.cuda()

    print(model)

    optimizer = optim.Adamax(model.parameters(), lr=args.learning_rate, eps=1.e-7)

    # ==================================================================================================================
    # TRAINING
    # ==================================================================================================================
    train_loss = []
    val_loss = []

    # for early stopping
    best_loss = np.inf
    best_bpd = np.inf
    e = 0
    epoch = 0

    train_times = []

    for epoch in range(1, args.epochs + 1):

        t_start = time.time()
        tr_loss = train(epoch, train_loader, model, optimizer, args)
        train_loss.append(tr_loss)
        train_times.append(time.time()-t_start)
        print('One training epoch took %.2f seconds' % (time.time()-t_start))

        v_loss, v_bpd = evaluate(val_loader, model, args, epoch=epoch)

        val_loss.append(v_loss)

        # early-stopping
        if v_loss < best_loss:
            e = 0
            best_loss = v_loss
            if args.input_type != 'binary':
                best_bpd = v_bpd
            print('->model saved<-')
            torch.save(model, snap_dir + args.flow + '.model')
            # torch.save(model, snap_dir + args.flow + '_' + args.architecture + '.model')

        elif (args.early_stopping_epochs > 0) and (epoch >= args.warmup):
            e += 1
            if e > args.early_stopping_epochs:
                break

        if args.input_type == 'binary':
            print('--> Early stopping: {}/{} (BEST: loss {:.4f})\n'.format(e, args.early_stopping_epochs, best_loss))

        else:
            print('--> Early stopping: {}/{} (BEST: loss {:.4f}, bpd {:.4f})\n'.format(e, args.early_stopping_epochs,
                                                                                   best_loss, best_bpd))

        if math.isnan(v_loss):
            raise ValueError('NaN encountered!')

    train_loss = np.hstack(train_loss)
    val_loss = np.array(val_loss)

    plot_training_curve(train_loss, val_loss, fname=snap_dir + '/training_curve_%s.pdf' % args.flow)

    # training time per epoch
    train_times = np.array(train_times)
    mean_train_time = np.mean(train_times)
    std_train_time = np.std(train_times, ddof=1)
    print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time))

    # ==================================================================================================================
    # EVALUATION
    # ==================================================================================================================

    test_score_file = snap_dir + 'test_scores.txt'

    with open('experiment_log.txt', 'a') as ff:
        print(args, file=ff)
        print('Stopped after %d epochs' % epoch, file=ff)
        print('Average train time per epoch: %.2f +/- %.2f' % (mean_train_time, std_train_time), file=ff)

    final_model = torch.load(snap_dir + args.flow + '.model')

    if args.testing:
        validation_loss, validation_bpd = evaluate(val_loader, final_model, args)
        test_loss, test_bpd = evaluate(test_loader, final_model, args, testing=True)

        with open('experiment_log.txt', 'a') as ff:
            print('FINAL EVALUATION ON VALIDATION SET\n'
                  'ELBO (VAL): {:.4f}\n'.format(validation_loss), file=ff)
            print('FINAL EVALUATION ON TEST SET\n'
                  'NLL (TEST): {:.4f}\n'.format(test_loss), file=ff)
            if args.input_type != 'binary':
                print('FINAL EVALUATION ON VALIDATION SET\n'
                      'ELBO (VAL) BPD : {:.4f}\n'.format(validation_bpd), file=ff)
                print('FINAL EVALUATION ON TEST SET\n'
                      'NLL (TEST) BPD: {:.4f}\n'.format(test_bpd), file=ff)


    else:
        validation_loss, validation_bpd = evaluate(val_loader, final_model, args)
        # save the test score in case you want to look it up later.
        _, _ = evaluate(test_loader, final_model, args, testing=True, file=test_score_file)

        with open('experiment_log.txt', 'a') as ff:
            print('FINAL EVALUATION ON VALIDATION SET\n'
                  'ELBO (VALIDATION): {:.4f}\n'.format(validation_loss), file=ff)
            if args.input_type != 'binary':
                print('FINAL EVALUATION ON VALIDATION SET\n'
                      'ELBO (VAL) BPD : {:.4f}\n'.format(validation_bpd), file=ff)
def run(args, kwargs):
    # Would probably help, but experiments were done before.
    args.grad_norm_enabled = False

    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    if 'imagenet' in args.dataset and args.evaluate_interval_epochs > 5:
        args.evaluate_interval_epochs = 5

    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:16].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    snapshots_path = args.out_dir
    snap_dir = snapshots_path + '/'

    snap_dir += args.exp_name + args.dataset + '_' + 'flows_' + str(
        args.n_subflows)

    snap_dir = snap_dir + '_' + args.model_signature + '/'

    args.snap_dir = snap_dir

    if not os.path.exists(snap_dir):
        os.makedirs(snap_dir)

    with open(snap_dir + 'log.txt', 'a') as ff:
        print('\nMODEL SETTINGS: \n', args, '\n', file=ff)

    writer = tensorboardX.SummaryWriter(logdir=snap_dir)

    # SAVING
    torch.save(args, snap_dir + '.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)

    # ==================================================================================================================
    # SELECT MODEL
    # ==================================================================================================================
    # flow parameters and architecture choice are passed on to model through
    # args

    model_pv = Flow(args,
                    args.input_size,
                    n_levels=args.n_levels,
                    n_subflows=args.n_subflows,
                    use_splitprior=args.use_splitprior,
                    n_context=None,
                    normalize_translation=128.,
                    normalize_scale=256.)

    if args.dequantize_distribution == 'uniform':
        model_hx = None
        model_qu_x = Uniform(args.input_size)

    elif args.dequantize_distribution == 'flow':
        model_hx = torch.nn.Sequential(
            Normalize_without_ldj(translation=128., scale=256.),
            DenseNet(args,
                     input_size=(3, args.input_size[1], args.input_size[2]),
                     n_inputs=3,
                     n_outputs=args.n_context,
                     depth=4,
                     growth=32,
                     dropout_p=args.dropout_p),
            torch.nn.Conv2d(args.n_context,
                            args.n_context,
                            kernel_size=2,
                            stride=2,
                            padding=0),
            DenseNet(args,
                     n_inputs=args.n_context,
                     input_size=(3, args.input_size[1], args.input_size[2]),
                     n_outputs=args.n_context,
                     depth=4,
                     growth=32,
                     dropout_p=args.dropout_p),
        )
        model_qu_x = TemplateDistribution(
            transformations=[ReverseTransformation(Sigmoid())],
            distribution=Flow(args,
                              args.input_size,
                              n_levels=args.dequantize_levels,
                              n_subflows=args.dequantize_subflows,
                              n_context=args.n_context,
                              use_splitprior=False,
                              normalize_translation=0.,
                              normalize_scale=1.,
                              parametrize_inverse=True))
    else:
        raise ValueError

    model = DiscreteLowerboundModel(model_pv, model_qu_x, model_hx)

    args.device = torch.device(
        "cuda:0" if torch.cuda.is_available() else "cpu")
    model.to(args.device)
    model_sample = model
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = torch.nn.DataParallel(model, dim=0)

    def lr_lambda(epoch):
        factor = min(1., (epoch + 1) / args.warmup) * np.power(
            args.lr_decay, epoch)
        print('Learning rate factor:', factor)
        return factor

    optimizer = optim.Adamax(model.parameters(),
                             lr=args.learning_rate,
                             eps=1.e-7)
    scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer,
                                                  lr_lambda,
                                                  last_epoch=-1)

    # Log the number of params.
    number_of_params = np.sum(
        [np.prod(tensor.size()) for tensor in model.parameters()])
    fn = snap_dir + 'log.txt'
    with open(fn, 'a') as ff:
        msg = 'Number of Parameters: {}'.format(number_of_params)
        print(msg, file=ff)
        print(msg)

    # ==================================================================================================================
    # TRAINING
    # ==================================================================================================================
    train_bpd = []
    val_bpd = []

    # for early stopping
    best_val_bpd = np.inf
    best_train_bpd = np.inf
    epoch = 0

    train_times = []

    model.eval()
    model.train()

    for epoch in range(1, args.epochs + 1):
        t_start = time.time()
        tr_bpd = train(epoch, train_loader, model, optimizer, args)
        scheduler.step()
        train_bpd.append(tr_bpd)
        writer.add_scalar('train bpd', train_bpd[-1], epoch)
        train_times.append(time.time() - t_start)
        print('One training epoch took %.2f seconds' % (time.time() - t_start))

        if epoch < 25 or epoch % args.evaluate_interval_epochs == 0:
            tr_bpd = evaluate(train_loader, model, args, iw_samples=1)
            v_bpd = evaluate(val_loader, model, args, iw_samples=1)

            # Logging message.
            with open(snap_dir + 'log.txt', 'a') as ff:
                msg = 'epoch {}\ttrain bpd {:.3f}\tval bpd {:.3f}\t'.format(
                    epoch, tr_bpd, v_bpd)
                print(msg, file=ff)

            # Sample and time sampling.
            torch.cuda.synchronize()
            start_sample = time.time()
            plot_samples(model_sample, args, epoch, v_bpd)
            torch.cuda.synchronize()
            print('Sampling took {} seconds'.format(time.time() -
                                                    start_sample))

            val_bpd.append(v_bpd)
            writer.add_scalar('val bpd', v_bpd, epoch)

            # Model save based on val performance
            if v_bpd < best_val_bpd:
                best_train_bpd = tr_bpd
                best_val_bpd = v_bpd

                try:
                    if hasattr(model, 'module'):
                        torch.save(model.module, snap_dir + 'a.model')
                    else:
                        torch.save(model, snap_dir + 'a.model')
                    torch.save(optimizer, snap_dir + 'a.optimizer')
                    print('->model saved<-')
                except:
                    print('Saving was unsuccessful.')

            print('(BEST: train bpd {:.4f}, val bpd {:.4f})\n'.format(
                best_train_bpd, best_val_bpd))

            if math.isnan(v_bpd):
                raise ValueError('NaN encountered!')

    # training time per epoch
    train_times = np.array(train_times)
    mean_train_time = np.mean(train_times)
    std_train_time = np.std(train_times, ddof=1)
    print('Average train time per epoch: {:.2f} +/- {:.2f}'.format(
        mean_train_time, std_train_time))

    # ========================================================================
    # EVALUATION
    # ========================================================================
    final_model = torch.load(snap_dir + 'a.model')

    test_bpd = evaluate(test_loader, final_model, args)

    with open(snap_dir + 'log.txt', 'a') as ff:
        msg = 'epoch {}\ttest negative elbo bpd {:.4f}'.format(epoch, test_bpd)
        print(msg, file=ff)

    test_bpd = evaluate(test_loader, final_model, args, iw_samples=1000)

    with open(snap_dir + 'log.txt', 'a') as ff:
        msg = 'epoch {}\ttest negative log_px bpd {:.4f}'.format(
            epoch, test_bpd)
        print(msg, file=ff)
コード例 #5
0
def run(args, kwargs):

    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    if 'imagenet' in args.dataset and args.evaluate_interval_epochs > 5:
        args.evaluate_interval_epochs = 5

    # ==================================================================================================================
    # SNAPSHOTS
    # ==================================================================================================================
    args.model_signature = str(datetime.datetime.now())[0:19].replace(' ', '_')
    args.model_signature = args.model_signature.replace(':', '_')

    snapshots_path = os.path.join(
        args.out_dir, args.variable_type + '_' + args.distribution_type + args.dataset)
    snap_dir = snapshots_path

    snap_dir += '_' + 'flows_' + \
        str(args.n_flows) + '_levels_' + str(args.n_levels)

    snap_dir = snap_dir + '__' + args.model_signature + '/'

    args.snap_dir = snap_dir

    if not os.path.exists(snap_dir):
        os.makedirs(snap_dir)

    with open(snap_dir + 'log.txt', 'a') as ff:
        print('\nMODEL SETTINGS: \n', args, '\n', file=ff)

    # SAVING
    torch.save(args, snap_dir + '.config')

    # ==================================================================================================================
    # LOAD DATA
    # ==================================================================================================================
    train_loader, val_loader, test_loader, args = load_dataset(args, **kwargs)

    # ==================================================================================================================
    # SELECT MODEL
    # ==================================================================================================================
    # flow parameters and architecture choice are passed on to model through args
    print(args.input_size)

    import models.Model as Model

    model = Model.Model(args)
    args.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    model.set_temperature(args.temperature)
    model.enable_hard_round(args.hard_round)

    model_sample = model

    # ====================================
    # INIT
    # ====================================
    # data dependend initialization on CPU
    for batch_idx, data in enumerate(train_loader):
        model(data)
        break

    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = torch.nn.DataParallel(model, dim=0)

    model.to(args.device)

    def lr_lambda(epoch):
        return min(1., (epoch+1) / args.warmup) * np.power(args.lr_decay, epoch)
    optimizer = optim.Adamax(
        model.parameters(), lr=args.learning_rate, eps=1.e-7)
    scheduler = torch.optim.lr_scheduler.LambdaLR(
        optimizer, lr_lambda, last_epoch=-1)

    # ==================================================================================================================
    # TRAINING
    # ==================================================================================================================
    train_bpd = []
    val_bpd = []

    # for early stopping
    best_val_bpd = np.inf
    best_train_bpd = np.inf
    epoch = 0

    train_times = []

    model.eval()
    model.train()

    for epoch in range(1, args.epochs + 1):
        t_start = time.time()
        scheduler.step()
        tr_loss, tr_bpd = train(epoch, train_loader, model, optimizer, args)
        train_bpd.append(tr_bpd)
        train_times.append(time.time()-t_start)
        print('One training epoch took %.2f seconds' % (time.time()-t_start))

        if epoch < 25 or epoch % args.evaluate_interval_epochs == 0:
            v_loss, v_bpd = evaluate(
                train_loader, val_loader, model, model_sample, args,
                epoch=epoch, file=snap_dir + 'log.txt')

            val_bpd.append(v_bpd)

            # Model save based on TRAIN performance (is heavily correlated with validation performance.)
            if np.mean(tr_bpd) < best_train_bpd:
                best_train_bpd = np.mean(tr_bpd)
                best_val_bpd = v_bpd
                torch.save(model, snap_dir + 'a.model')
                torch.save(optimizer, snap_dir + 'a.optimizer')
                print('->model saved<-')

            print('(BEST: train bpd {:.4f}, test bpd {:.4f})\n'.format(
                best_train_bpd, best_val_bpd))

            if math.isnan(v_loss):
                raise ValueError('NaN encountered!')

    train_bpd = np.hstack(train_bpd)
    val_bpd = np.array(val_bpd)

    # training time per epoch
    train_times = np.array(train_times)
    mean_train_time = np.mean(train_times)
    std_train_time = np.std(train_times, ddof=1)
    print('Average train time per epoch: %.2f +/- %.2f' %
          (mean_train_time, std_train_time))

    # ==================================================================================================================
    # EVALUATION
    # ==================================================================================================================
    final_model = torch.load(snap_dir + 'a.model')
    test_loss, test_bpd = evaluate(
        train_loader, test_loader, final_model, final_model, args,
        epoch=epoch, file=snap_dir + 'test_log.txt')

    print('Test loss / bpd: %.2f / %.2f' % (test_loss, test_bpd))
コード例 #6
0
def main(main_args=None):
    """
    use main_args to run this script as function in another script
    """

    # =========================================================================
    # PARSE EXPERIMENT SETTINGS, SETUP SNAPSHOTS DIRECTORY, LOGGING
    # =========================================================================
    args, kwargs = parse_args(main_args)

    # =========================================================================
    # LOAD DATA
    # =========================================================================
    logger.info('LOADING DATA:')
    train_loader, val_loader, test_loader, args = load_image_dataset(args, **kwargs)

    # =========================================================================
    # SAVE EXPERIMENT SETTINGS
    # =========================================================================
    logger.info(f'EXPERIMENT SETTINGS:\n{args}\n')
    torch.save(args, os.path.join(args.snap_dir, 'config.pt'))

    # =========================================================================
    # INITIALIZE MODEL AND OPTIMIZATION
    # =========================================================================
    model = init_model(args)
    optimizer, scheduler = init_optimizer(model, args)
    num_params = sum([param.nelement() for param in model.parameters()])
    logger.info(f"MODEL:\nNumber of model parameters={num_params}\n{model}\n")

    if args.load:
        logger.info(f'LOADING CHECKPOINT FROM PRE-TRAINED MODEL: {args.load}')
        init_with_args = args.flow == "boosted" and args.loaded_init_component is not None and args.loaded_all_trained is not None
        load(model, optimizer, args.load, args, init_with_args)

    # =========================================================================
    # TRAINING
    # =========================================================================
    training_required = args.epochs > 0 or args.load is None
    if training_required:
        logger.info('TRAINING:')
        if args.tensorboard:
            logger.info(f'Follow progress on tensorboard: tb {args.snap_dir}')

        train_loss, val_loss = train(train_loader, val_loader, model, optimizer, scheduler, args)

    # =========================================================================
    # VALIDATION
    # =========================================================================
    logger.info('VALIDATION:')
    if training_required:
        load(model, optimizer, args.snap_dir + 'model.pt', args)
    val_loss, val_rec, val_kl = evaluate(val_loader, model, args, results_type='Validation')

    # =========================================================================
    # TESTING
    # =========================================================================
    if args.testing:
        logger.info("TESTING:")
        test_loss, test_rec, test_kl = evaluate(test_loader, model, args, results_type='Test')
        test_nll = evaluate_likelihood(test_loader, model, args, S=args.nll_samples, MB=args.nll_mb, results_type='Test')
コード例 #7
0
def run(args):

    print('\nMODEL SETTINGS: \n', args, '\n')
    print("Random Seed: ", args.manual_seed)

    train_loader, val_loader, test_loader, args = load_dataset(args)

    encoder = MLP_encoder(args)
    decoder = MLP_decoder(args)
    if args.flow == "planar":
        model = VAE.PlanarVAE(encoder, decoder, args)
    elif args.flow == "NICE":  # NICE-planar
        model = VAE.NICEVAE_amor(encoder, decoder, args)
    elif args.flow == "NICE_MLP":
        model = VAE.NICEVAE(encoder, decoder, args)
    elif args.flow == "syl_orthogonal":
        model = VAE.Sylvester_ortho_VAE(encoder, decoder, args)
    elif args.flow == "real":
        model = VAE.RealNVPVAE(encoder, decoder, args)

    if args.vampprior:
        load = torch.utils.data.DataLoader(train_loader.dataset,
                                           batch_size=args.num_pseudos,
                                           shuffle=True)
        pseudo_inputs = next(iter(load))[0] if args.data_as_pseudo else None
        model.init_pseudoinputs(pseudo_inputs)

    if args.cuda:
        print("Model on GPU")
        model.cuda()

    print(model)

    optimizer = optim.RMSprop(model.parameters(),
                              lr=args.learning_rate,
                              momentum=0.9)

    #### Training
    train_loss = []
    val_loss = []

    epoch = 0
    t = time.time()
    for epoch in range(1, args.epochs + 1):

        tr_loss = train(epoch, train_loader, model, optimizer, args)
        train_loss.append(tr_loss.mean())

        v_loss = evaluate(val_loader, model, args)
        val_loss.append(v_loss)

    train_loss = np.hstack(train_loss)
    val_loss = np.hstack(val_loss)
    #plot_training_curve(train_loss, val_loss)
    results = {
        "train_loss": train_loss.tolist(),
        "val_loss": val_loss.tolist()
    }

    #### Testing

    validation_loss = evaluate(val_loader, model, args)
    test_loss, log_likelihood = evaluate(test_loader,
                                         model,
                                         args,
                                         testing=True)
    results["ELBO"] = test_loss
    results["log_likelihood"] = log_likelihood

    elapsed = time.time() - t
    results["Running time"] = elapsed

    # Save the results.
    json_dir = args.out_dir + f"{args.flow}perm_k_{args.num_flows}_RMSProp_lr{args.learning_rate}_4"
    print("Saving data at: " + json_dir)
    output_folder = pathlib.Path(json_dir)
    output_folder.mkdir(parents=True, exist_ok=True)
    results_json = json.dumps(results, indent=4, sort_keys=True)
    (output_folder / "results.json").write_text(results_json)