def psi_init(L, hidden_layer_sizes, nout, Form='euler'):
    nat_ording = True
    model_r=MADE(L,hidden_layer_sizes, nout, \
                 num_masks=1, natural_ordering=nat_ording)
    model_i=MADE(L,hidden_layer_sizes, nout, \
                 num_masks=1, natural_ordering=nat_ording)

    ppsi = Psi(model_r, model_i, L, form=Form, autoregressive=True)

    return ppsi
Ejemplo n.º 2
0
def MakeMade(scale,
             cols_to_train,
             seed,
             dataset,
             fixed_ordering=None,
             special_orders=[],
             layers=4,
             residual=False,
             dropout=False,
             per_row_dropout=False,
             prefix_dropout=False,
             fixed_dropout_ratio=False,
             disable_learnable_unk=False,
             input_no_emb_if_leq=True,
             embs_tied=False,
             embed_size=32):
    # TODO: if passed in a single heuristic order, be sure to InvertOrder().
    num_masks = 1
    if len(special_orders):
        num_masks = len(special_orders)
    model = MADE(
        nin=len(cols_to_train),
        hidden_sizes=[scale] * layers
        if layers > 0 else [512, 256, 512, 128, 1024],
        nout=sum([c.DistributionSize() for c in cols_to_train]),
        input_bins=[c.DistributionSize() for c in cols_to_train],
        input_encoding="embed",
        output_encoding="embed",
        seed=seed,
        do_direct_io_connections=False,
        natural_ordering=False if seed is not None else True,
        residual_connections=residual,
        embed_size=embed_size,
        fixed_ordering=fixed_ordering,
        dropout_p=dropout or per_row_dropout or prefix_dropout,
        fixed_dropout_p=fixed_dropout_ratio,
        num_masks=num_masks,
        per_row_dropout_p=per_row_dropout,
        prefix_dropout=prefix_dropout,
        disable_learnable_unk=disable_learnable_unk,
        input_no_emb_if_leq=input_no_emb_if_leq,
        embs_tied=embs_tied,
    ).to(get_device())

    if len(special_orders):
        print('assigning to model.orderings:')
        print(special_orders)
        model.orderings = special_orders

    return model
Ejemplo n.º 3
0
def MakeModel(scale, cols_to_train, seed, fixed_ordering=None):

    if args.inv_order:
        print('Inverting order!!!!!!!!!!')
        fixed_ordering = InvertOrder(fixed_ordering)

    return MADE(
        nin=len(cols_to_train),
        hidden_sizes=[
            scale,
        ] * 4,
        nout=sum([c.DistributionSize() for c in cols_to_train]),
        input_bins=[c.DistributionSize() for c in cols_to_train],
        input_encoding="embed",
        output_encoding="embed",
        embed_size=64,
        # input_no_emb_if_leq=False,
        embs_tied=args.embs_tied,
        input_no_emb_if_leq=True,
        seed=seed,
        natural_ordering=False if seed is not None else True,
        residual_connections=args.residual,
        fixed_ordering=fixed_ordering,
        do_direct_io_connections=args.direct_io,
        dropout_p=args.dropout,
    ).to(DEVICE)
Ejemplo n.º 4
0
    def __init__(self, n_z, n_h, n_made):
        super(InverseAutoregressiveBlock, self).__init__()

        # made: take as inputs: z_{t-1}, h; output: m_t, s_t
        self.made = MADE(num_input=n_z,
                         num_output=n_z * 2,
                         num_hidden=n_made,
                         num_context=n_h)
        self.sigmoid_arg_bias = nn.Parameter(torch.ones(n_z) * 2)
Ejemplo n.º 5
0
    def __init__(self, data_dim, mask, hidden_dims, n_hidden, made_rev):
        super(NewCouplingLayer, self).__init__()
        self.mask = mask == 1
        self.made_rev = made_rev

        self.n_1 = np.ceil(data_dim / 2).astype(int)
        self.n_2 = np.floor(data_dim / 2).astype(int)

        self.made = MADE(n_in=self.n_1, hidden_dims=[n_hidden], gaussian=True)
        self.scale = ScaleTranslate(
            self.n_1, self.n_2, n_hidden, hidden_dims, actfun="tanh"
        )
        self.translate = ScaleTranslate(self.n_1, self.n_2, n_hidden, hidden_dims)
Ejemplo n.º 6
0
def execute_one_round():
    
    args = {'data_name' : config.data_name, 'train_size' : config.train_size, 'valid_size' : config.validation_size, 'test_size' : config.test_size}
    if args['data_name'] == 'grid':
        args['width'] = config.width
        args['height'] = config.height
    elif args['data_name'] == 'Boltzmann':
        args['n'] = config.n_boltzmann
        args['m'] = config.m_boltzmann
    elif args['data_name'] == 'k_sparse':
        args['n'] = config.n_of_k_sparse
        args['sparsity_degree'] = config.sparsity_degree
    elif args['data_name'] == 'BayesNet':
        args['n'] = config.n_of_BayesNet
        args['par_num'] = config.par_num_of_BayesNet
    elif args['data_name'].startswith('mnist'):
        args['digit'] = config.digit
    
    data = get_data(args)

    print('data loaded')

    # if config.generate_samples:
    #     n = len(data['train_data'])
    #     for i in range(n):
    #         im = Image.fromarray(255*data['train_data'][i,:].reshape([config.height, config.width]))
    #         im.convert('RGB').save(config.generated_samples_dir+'train_' + str(i)+'.png')

    model = MADE()

    print('model initiated')
    
    model.fit(data['train_data'], data['valid_data'])
        
    pred = model.predict(data['test_data'])
        
    res = dict()    
    res['NLL'], res['KL'] = evaluate(pred, data['test_data_probs'])
    print('KL: ' + str(res['KL']), file=sys.stderr)
    print('NLL: ' + str(res['NLL']), file=sys.stderr)
    sys.stderr.flush()
    res['train_end_epochs'] = model.train_end_epochs
    res['num_of_connections'] = model.num_of_connections()

    if config.generate_samples:
        n = config.num_of_generated_samples_each_execution
        generated_samples = model.generate(n).reshape(n, config.height, config.width)
        for i in range(n):
            im = Image.fromarray(255*generated_samples[i,:,:])
            im.convert('RGB').save(config.generated_samples_dir+str(i)+'.png')
    return res
Ejemplo n.º 7
0
def MakeMadeDmv(cols_to_train, seed, fixed_ordering=None):

    if args.inv_order:
        print('Inverting order!!!!!!!!!!')
        fixed_ordering = InvertOrder(fixed_ordering)

    if args.special_dmv_arch:
        return MADE(
            nin=len(cols_to_train),
            hidden_sizes=[256] * 5,
            nout=sum([c.DistributionSize() for c in cols_to_train]),
            input_bins=[c.DistributionSize() for c in cols_to_train],
            input_encoding="embed",
            output_encoding="embed",
            embed_size=128,
            input_no_emb_if_leq=True,
            embs_tied=True,
            seed=seed,
            do_direct_io_connections=True,  #args.direct_io,
            natural_ordering=False if seed is not None else True,
            residual_connections=args.residual,
            fixed_ordering=fixed_ordering,
            dropout_p=args.dropout,
        ).to(DEVICE)

    hiddens = [args.fc_hiddens] * args.layers
    natural_ordering = False

    if args.layers == 0:
        # Default ckpt.
        hiddens = [512, 256, 512, 128, 1024]
        natural_ordering = True

    model = MADE(
        nin=len(cols_to_train),
        hidden_sizes=hiddens,
        residual_connections=args.residual,
        nout=sum([c.DistributionSize() for c in cols_to_train]),
        input_bins=[c.DistributionSize() for c in cols_to_train],
        input_encoding="embed"
        if args.dataset in ["dmv-full", "kdd", "synthetic"] else "binary",
        output_encoding="embed"
        if args.dataset in ["dmv-full", "kdd", "synthetic"] else "one_hot",
        seed=seed,
        do_direct_io_connections=args.direct_io,
        natural_ordering=False if seed is not None else True,
        fixed_ordering=fixed_ordering,
        dropout_p=args.dropout,
        num_masks=max(1, args.special_orders),
    ).to(DEVICE)

    # XXX this is copied from train_many_orderings
    if args.special_orders > 0:
        special_orders = [
            # # MutInfo Max Marg
            # np.array([6, 1, 4, 0, 7, 3, 5, 2, 10, 9, 8]),
            # # CL Max Marg/Dom
            # np.array([6, 1, 4, 0, 5, 7, 3, 2, 10, 9, 8]),
            # # Random
            # np.random.RandomState(0).permutation(np.arange(11)),
        ][:args.special_orders]
        k = len(special_orders)
        for i in range(k, args.special_orders):
            special_orders.append(
                np.random.RandomState(i - k + 1).permutation(
                    np.arange(len(cols_to_train))))
        print('Special orders', np.array(special_orders))

        if args.inv_order:
            for i, order in enumerate(special_orders):
                special_orders[i] = np.asarray(InvertOrder(order))
            print('Inverted special orders:', special_orders)

        model.orderings = special_orders

    if args.use_query_order:
        model.use_query_order = True

    if args.use_best_order:
        model.use_best_order = True

    if args.use_worst_order:
        model.use_worst_order = True

    return model
Ejemplo n.º 8
0
def BuckyBall():
    start_time = time.time()

    init_out_dir()
    print_args()

    if args.ham == 'buckey':
        ham = buckyball_2(args.beta)
#    elif args.ham == 'sk':
#        ham = SKModel(args.n, args.beta, args.device, seed=args.seed)
#    elif args.ham == 'full':
#        ham = FullModel()
#    elif args.ham == 'buckey':
#        ham = buckyball_2(args.beta)
    else:
        raise ValueError('Unknown ham: {}'.format(args.ham))
    #ham.J.requires_grad = False

    net = MADE(**vars(args))
    net.to(args.device)
    my_log('{}\n'.format(net))

    params = list(net.parameters())
    params = list(filter(lambda p: p.requires_grad, params))
    nparams = int(sum([np.prod(p.shape) for p in params]))
    my_log('Total number of trainable parameters: {}'.format(nparams))

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(params, lr=args.lr)
    elif args.optimizer == 'sgdm':
        optimizer = torch.optim.SGD(params, lr=args.lr, momentum=0.9)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(params, lr=args.lr, alpha=0.99)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.9, 0.999))
    elif args.optimizer == 'adam0.5':
        optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.5, 0.999))
    else:
        raise ValueError('Unknown optimizer: {}'.format(args.optimizer))

    init_time = time.time() - start_time
    my_log('init_time = {:.3f}'.format(init_time))

    my_log('Training...')
    sample_time = 0
    train_time = 0
    start_time = time.time()
    if args.beta_anneal_to < args.beta:
        args.beta_anneal_to = args.beta
    beta = args.beta
    while beta <= args.beta_anneal_to:
        for step in range(args.max_step):
            optimizer.zero_grad()

            sample_start_time = time.time()
            with torch.no_grad():
                sample, x_hat = net.sample(args.batch_size)
            assert not sample.requires_grad
            assert not x_hat.requires_grad
            sample_time += time.time() - sample_start_time

            train_start_time = time.time()

            log_prob = net.log_prob(sample)
            with torch.no_grad():
                energy = ham.energy(sample)
                loss = log_prob + beta * energy
            assert not energy.requires_grad
            assert not loss.requires_grad
            loss_reinforce = torch.mean((loss - loss.mean()) * log_prob)
            loss_reinforce.backward()

            if args.clip_grad > 0:
                # nn.utils.clip_grad_norm_(params, args.clip_grad)
                parameters = list(filter(lambda p: p.grad is not None, params))
                max_norm = float(args.clip_grad)
                norm_type = 2
                total_norm = 0
                for p in parameters:
                    param_norm = p.grad.data.norm(norm_type)
                    total_norm += param_norm.item()**norm_type
                    total_norm = total_norm**(1 / norm_type)
                    clip_coef = max_norm / (total_norm + args.epsilon)
                    for p in parameters:
                        p.grad.data.mul_(clip_coef)

            optimizer.step()

            train_time += time.time() - train_start_time

            if args.print_step and step % args.print_step == 0:
                free_energy_mean = loss.mean() / beta / args.n
                free_energy_std = loss.std() / beta / args.n
                entropy_mean = -log_prob.mean() / args.n
                energy_mean = energy.mean() / args.n
                mag = sample.mean(dim=0)
                mag_mean = mag.mean()
                if step > 0:
                    sample_time /= args.print_step
                    train_time /= args.print_step
                used_time = time.time() - start_time
                my_log(
                    'beta = {:.3g}, # {}, F = {:.8g}, F_std = {:.8g}, S = {:.5g}, E = {:.5g}, M = {:.5g}, sample_time = {:.3f}, train_time = {:.3f}, used_time = {:.3f}'
                    .format(
                        beta,
                        step,
                        free_energy_mean.item(),
                        free_energy_std.item(),
                        entropy_mean.item(),
                        energy_mean.item(),
                        mag_mean.item(),
                        sample_time,
                        train_time,
                        used_time,
                    ))
                sample_time = 0
                train_time = 0

        with open(args.fname, 'a', newline='\n') as f:
            f.write('{} {} {:.3g} {:.8g} {:.8g} {:.8g} {:.8g}\n'.format(
                args.n,
                args.seed,
                beta,
                free_energy_mean.item(),
                free_energy_std.item(),
                energy_mean.item(),
                entropy_mean.item(),
            ))

        if args.ham == 'hop':
            ensure_dir(args.out_filename + '_sample/')
            np.savetxt('{}_sample/sample{:.2f}.txt'.format(
                args.out_filename, beta),
                       sample.cpu().numpy(),
                       delimiter=' ',
                       fmt='%d')
            np.savetxt('{}_sample/log_prob{:.2f}.txt'.format(
                args.out_filename, beta),
                       log_prob.cpu().detach().numpy(),
                       delimiter=' ',
                       fmt='%.5f')

        beta += args.beta_inc
Ejemplo n.º 9
0
state_dir = 'out'

ham_args, features = get_ham_args_features()
state_filename = '{state_dir}/{ham_args}/{features}/out{args.out_infix}_save/10000.state'.format(
    **locals())

target_layer = 1
num_channel = 1
out_dir = '../support/fig/filters/{ham_args}/{features}/layer{target_layer}'.format(
    **locals())

if __name__ == '__main__':
    ensure_dir(out_dir + '/')

    if args.net == 'made':
        net = MADE(**vars(args))
    elif args.net == 'pixelcnn':
        net = PixelCNN(**vars(args))
    else:
        raise ValueError('Unknown net: {}'.format(args.net))
    net.to(args.device)
    print('{}\n'.format(net))

    print(state_filename)
    state = torch.load(state_filename, map_location=args.device)
    net.load_state_dict(state['net'])

    sample = torch.zeros([num_channel, 1, args.L, args.L], requires_grad=True)
    nn.init.normal_(sample)

    optimizer = torch.optim.Adam([sample], lr=1e-3, weight_decay=1)
Ejemplo n.º 10
0
def train(train_data, test_data, image_shape):
    """ Trains MADE model on binary image dataset.
        Arguments:
        train_data: A (n_train, H, W, 1) uint8 numpy array of binary images with values in {0, 1}
        test_data: An (n_test, H, W, 1) uint8 numpy array of binary images with values in {0, 1}
        image_shape: (H, W), height and width of the image

        Returns:
        - a (# of training iterations,) numpy array of train_losses evaluated every minibatch
        - a (# of epochs + 1,) numpy array of test_losses evaluated once at initialization and after each epoch
        - a numpy array of size (100, H, W, 1) of samples with values in {0, 1}
    """

    use_cuda = True
    device = torch.device('cuda') if use_cuda else None

    train_data = torch.from_numpy(
        train_data.reshape(
            (train_data.shape[0],
             train_data.shape[1] * train_data.shape[2]))).float().to(device)
    test_data = torch.from_numpy(
        test_data.reshape(
            (test_data.shape[0],
             test_data.shape[1] * test_data.shape[2]))).float().to(device)

    def nll_loss(batch, output):
        return F.binary_cross_entropy(torch.sigmoid(output), batch)

    H, W = image_shape
    input_dim = H * W

    made = MADE(input_dim)
    epochs = 10
    lr = 0.005
    batch_size = 32

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=batch_size,
                                               shuffle=True)
    optimizer = torch.optim.Adam(made.parameters(), lr=lr)

    init_test_loss = nll_loss(test_data, made(test_data))
    train_losses = []
    test_losses = [init_test_loss.item()]

    # Training
    for epoch in range(epochs):
        for batch in train_loader:
            optimizer.zero_grad()
            output = made(batch)
            loss = nll_loss(batch, output)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        test_loss = nll_loss(test_data, made(test_data))
        test_losses.append(test_loss.item())
        print(f'{epoch + 1}/{epochs} epochs')

    # Generate samples
    made.eval()
    samples = torch.zeros(size=(100, H * W)).to(device)
    with torch.no_grad():
        for i in range(H * W):
            logits = made(samples)
            probas = torch.sigmoid(logits)
            pixel_i_samples = torch.bernoulli(probas[:, i])
            samples[:, i] = pixel_i_samples

    return np.array(train_losses), np.array(test_losses), samples.reshape(
        (100, H, W, 1)).detach().cpu().numpy()
Ejemplo n.º 11
0
import torch
import tensorflow as tf
#import edward2 as ed
from org_edward2_made import MADE as ed_MADE_org
import numpy as np
from disc_utils import one_hot, one_hot_argmax, multiplicative_inverse, one_hot_add, one_hot_minus, one_hot_multiply
from made import MADE
vocab_size = 90
input_shape = [10, 4, vocab_size]
tf_made = ed_MADE_org(vocab_size, hidden_dims=[20, 20])
tf_made.build(input_shape)
print(tf_made.built)
#print(tf_made.network.get_weights())
print('making torch_MADE from ed2 conversion')
torch_made = MADE(input_shape, vocab_size, hidden_dims=[20, 20, 20])
print('torch made model', torch_made)

inp = torch.ones(input_shape)
res = torch_made(inp)
print('res shape', res.shape)
print('inputs::::', inp[0, 0, :], inp[0, 1, :], inp[0, 2, :])
print('outputs::::', res[0, 0, :], res[0, 1, :], res[0, 2, :])

inp_tf = tf.ones(input_shape)
res_tf = tf_made(inp_tf)
print('res shape', res_tf.shape)
print('inputs::::', inp_tf[0, 0, :], inp_tf[0, 1, :], inp_tf[0, 2, :])
print('outputs::::', res_tf[0, 0, :], res_tf[0, 1, :], res_tf[0, 2, :])
Ejemplo n.º 12
0
    if args.gpu is not None:
        cuda.get_device(args.gpu).use()

    # reproducibility is good
    np.random.seed(42)

    # load the dataset
    print("loading binarized mnist from", args.data_path)
    mnist = np.load(args.data_path)
    xtr, xte = mnist['train_data'], mnist['valid_data']

    # construct model and ship to GPU
    hidden_list = list(map(int, args.hiddens.split(',')))
    model = MADE(xtr.shape[1],
                 hidden_list,
                 xtr.shape[1],
                 num_masks=args.num_masks,
                 gpu=args.gpu)
    if args.gpu is not None:
        model.to_gpu(args.gpu)
        xtr = cuda.to_gpu(xtr)
        xte = cuda.to_gpu(xte)

    # set up the optimizer
    opt = chainer.optimizers.Adam(alpha=1e-3, weight_decay_rate=1e-4)
    opt.setup(model)

    # start the training
    for epoch in range(100):
        print("epoch %d" % (epoch, ))
        run_epoch(
Ejemplo n.º 13
0
    mnist = np.load(args.data_path)
    xtr, xva = mnist['train_data'], mnist['valid_data']
    # split validation set in validation + test set
    num_val = xva.shape[0] // 2
    xte = xva[:num_val, :]
    xva = xva[num_val:, :]
    xtr = torch.from_numpy(xtr).cuda()
    xva = torch.from_numpy(xva).cuda()
    xte = torch.from_numpy(xte).cuda()
    print('training_set: ' + str(xtr.shape))
    print('validation_set: ' + str(xva.shape))
    print('test_set: ' + str(xte.shape))
    # construct model and ship to GPU
    hidden_list = list(map(int, args.hiddens.split(',')))
    model = MADE(xtr.size(1),
                 hidden_list,
                 xtr.size(1),
                 num_masks=args.num_masks)
    print(model)
    print("number of model parameters:",
          sum([np.prod(p.size()) for p in model.parameters()]))
    model.cuda()

    # set up the optimizer
    #opt = torch.optim.Adagrad(model.parameters(), lr=1e-2, eps=1e-6)
    opt = torch.optim.Adadelta(model.parameters())
    epochs_no_improve = 0
    best_loss = math.inf
    best_epoch = 0
    path = './experiments/' + args.data_path + '/'
    for n in hidden_list:
        path += '_' + str(n)
Ejemplo n.º 14
0
    n_RV = 374  # number of RVs
    scope_list = np.arange(n_RV)
    scope_temp = np.delete(scope_list, np.where(scope_list % 34 == 17))
    init_scope = list(np.delete(scope_temp, np.where(scope_temp % 34 == 33)))
    # modify data to remove 0 (imag) columns
    data_train = data_train[:, init_scope]
    data_pos = data_pos[:, init_scope]
    data_neg = data_neg[:, init_scope]

    xtr = torch.from_numpy(data_train).float().cuda()
    xte = torch.from_numpy(data_pos).float().cuda()
    xod = torch.from_numpy(data_neg).float().cuda()

    # construct model and ship to GPU
    hidden_list = list(map(int, args.hiddens.split(',')))
    model = MADE(xtr.size(1), hidden_list, xtr.size(1) * 2, num_masks=args.num_masks)
    print("number of model parameters:", sum([np.prod(p.size()) for p in model.parameters()]))
    model.cuda()

    # set up the optimizer
    opt = torch.optim.Adam(model.parameters(), args.learning_rate, weight_decay=args.weight_decay)
    scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=45, gamma=0.1)

    # list to store loss
    loss_tr = []
    loss_te = []
    loss_od = []
    # start the training
    for epoch in range(args.epoch):
        scheduler.step(epoch)
        loss_tr.append(run_epoch('train'))
Ejemplo n.º 15
0
max_epochs = 1000
# -----------------------------------

# Get dataset.
data = get_data(dataset_name)
train = torch.from_numpy(data.train.x)
# Get data loaders.
train_loader, val_loader, test_loader = get_data_loaders(data, batch_size)
# Get model.
n_in = data.n_dims
if model_name.lower() == "maf":
    model = MAF(n_in, n_mades, hidden_dims)
elif model_name.lower() == "made":
    model = MADE(n_in,
                 hidden_dims,
                 random_order=random_order,
                 seed=seed,
                 gaussian=True)
# Get optimiser.
optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-6)

# Format name of model save file.
save_name = f"{model_name}_{dataset_name}_{'_'.join(str(d) for d in hidden_dims)}.pt"
# Initialise list for plotting.
epochs_list = []
train_losses = []
val_losses = []
# Initialiise early stopping.
i = 0
max_loss = np.inf
# Training loop.
Ejemplo n.º 16
0
    if verbose:
        print(f"{split} Average epoch loss: {np.mean(losses)}")
    return losses


if __name__ == "__main__":
    # load the dataset from some path
    mnist = np.load("binarized_mnist.npz")
    x_train, x_test = mnist["train_data"], mnist["valid_data"]
    x_train = torch.as_tensor(x_train).cuda()
    x_test = torch.as_tensor(x_test).cuda()

    hidden_list = [500]
    resample_every = 20

    model = MADE(x_train.size(1), hidden_list, x_train.size(1))
    print(
        "number of model parameters: {np.sum([np.prod(p.size()) for p in model.parameters()])}"
    )
    model.cuda()

    opt = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=50, gamma=0.1)

    # The training
    for epoch in range(100):
        print(f"Epoch {epoch}")
        scheduler.step()

        # get an estimate of the test loss
        run_one_epoch("test", upto=5)
Ejemplo n.º 17
0
state_filename = '{state_dir}/{model_args}/{features}/out{args.out_infix}_save/10000.state'.format(
    **locals())


def get_mean_err(count, x_sum, x_sqr_sum):
    x_mean = x_sum / count
    x_sqr_mean = x_sqr_sum / count
    x_std = sqrt(abs(x_sqr_mean - x_mean**2))
    x_err = x_std / sqrt(count)
    x_ufloat = ufloat(x_mean, x_err)
    return x_ufloat


if __name__ == '__main__':
    if args.net == 'made':
        net = MADE(**vars(args))
    elif args.net == 'pixelcnn':
        net = PixelCNN(**vars(args))
    else:
        raise ValueError('Unknown net: {}'.format(args.net))
    net.to(args.device)
    print('{}\n'.format(net))

    print(state_filename)
    state = torch.load(state_filename, map_location=args.device)
    ignore_param(state['net'], net)
    net.load_state_dict(state['net'])

    F_sum = 0
    F_sqr_sum = 0
    S_sum = 0
Ejemplo n.º 18
0
    # reproducibility is good
    np.random.seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)

    # load the dataset
    print("loading binarized mnist from", args.data_path)
    mnist = np.load(args.data_path)
    xtr, xte = mnist['train_data'], mnist['valid_data']
    xtr = torch.from_numpy(xtr).cuda()
    xte = torch.from_numpy(xte).cuda()

    # construct model and ship to GPU
    hidden_list = list(map(int, args.hiddens.split(',')))
    model = MADE(xtr.size(1),
                 hidden_list,
                 xtr.size(1),
                 num_masks=args.num_masks)
    print("number of model parameters:",
          sum([np.prod(p.size()) for p in model.parameters()]))
    model.cuda()

    # set up the optimizer
    opt = torch.optim.Adam(model.parameters(), 1e-3, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=45, gamma=0.1)

    # start the training
    for epoch in range(100):
        print("epoch %d" % (epoch, ))
        scheduler.step(epoch)
        run_epoch(
            'test',
Ejemplo n.º 19
0
 def __init__(self, dim: int, hidden_dims: List[int], reverse: bool):
     super(MAFLayer, self).__init__()
     self.dim = dim
     self.made = MADE(dim, hidden_dims, gaussian=True, seed=None)
     self.reverse = reverse
Ejemplo n.º 20
0
def main():
    start_time = time.time()

    init_out_dir()
    if args.clear_checkpoint:
        clear_checkpoint()
    last_step = get_last_checkpoint_step()
    if last_step >= 0:
        my_log('\nCheckpoint found: {}\n'.format(last_step))
    else:
        clear_log()
    print_args()

    if args.net == 'made':
        net = MADE(**vars(args))
    elif args.net == 'pixelcnn':
        net = PixelCNN(**vars(args))
    elif args.net == 'bernoulli':
        net = BernoulliMixture(**vars(args))
    else:
        raise ValueError('Unknown net: {}'.format(args.net))
    net.to(args.device)
    my_log('{}\n'.format(net))

    params = list(net.parameters())
    params = list(filter(lambda p: p.requires_grad, params))
    nparams = int(sum([np.prod(p.shape) for p in params]))
    my_log('Total number of trainable parameters: {}'.format(nparams))
    named_params = list(net.named_parameters())

    if args.optimizer == 'sgd':
        optimizer = torch.optim.SGD(params, lr=args.lr)
    elif args.optimizer == 'sgdm':
        optimizer = torch.optim.SGD(params, lr=args.lr, momentum=0.9)
    elif args.optimizer == 'rmsprop':
        optimizer = torch.optim.RMSprop(params, lr=args.lr, alpha=0.99)
    elif args.optimizer == 'adam':
        optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.9, 0.999))
    elif args.optimizer == 'adam0.5':
        optimizer = torch.optim.Adam(params, lr=args.lr, betas=(0.5, 0.999))
    else:
        raise ValueError('Unknown optimizer: {}'.format(args.optimizer))

    if args.lr_schedule:
        # 0.92**80 ~ 1e-3
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, factor=0.92, patience=100, threshold=1e-4, min_lr=1e-6)

    if last_step >= 0:
        state = torch.load('{}_save/{}.state'.format(args.out_filename,
                                                     last_step))
        ignore_param(state['net'], net)
        net.load_state_dict(state['net'])
        if state.get('optimizer'):
            optimizer.load_state_dict(state['optimizer'])
        if args.lr_schedule and state.get('scheduler'):
            scheduler.load_state_dict(state['scheduler'])

    init_time = time.time() - start_time
    my_log('init_time = {:.3f}'.format(init_time))

    my_log('Training...')
    sample_time = 0
    train_time = 0
    start_time = time.time()
    for step in range(last_step + 1, args.max_step + 1):
        optimizer.zero_grad()

        sample_start_time = time.time()
        with torch.no_grad():
            sample, x_hat = net.sample(args.batch_size)
        assert not sample.requires_grad
        assert not x_hat.requires_grad
        sample_time += time.time() - sample_start_time

        train_start_time = time.time()

        log_prob = net.log_prob(sample)
        # 0.998**9000 ~ 1e-8
        beta = args.beta * (1 - args.beta_anneal**step)
        with torch.no_grad():
            energy = ising.energy(sample, args.ham, args.lattice,
                                  args.boundary)
            loss = log_prob + beta * energy
        assert not energy.requires_grad
        assert not loss.requires_grad
        loss_reinforce = torch.mean((loss - loss.mean()) * log_prob)
        loss_reinforce.backward()

        if args.clip_grad:
            nn.utils.clip_grad_norm_(params, args.clip_grad)

        optimizer.step()

        if args.lr_schedule:
            scheduler.step(loss.mean())

        train_time += time.time() - train_start_time

        if args.print_step and step % args.print_step == 0:
            free_energy_mean = loss.mean() / args.beta / args.L**2
            free_energy_std = loss.std() / args.beta / args.L**2
            entropy_mean = -log_prob.mean() / args.L**2
            energy_mean = energy.mean() / args.L**2
            mag = sample.mean(dim=0)
            mag_mean = mag.mean()
            mag_sqr_mean = (mag**2).mean()
            if step > 0:
                sample_time /= args.print_step
                train_time /= args.print_step
            used_time = time.time() - start_time
            my_log(
                'step = {}, F = {:.8g}, F_std = {:.8g}, S = {:.8g}, E = {:.8g}, M = {:.8g}, Q = {:.8g}, lr = {:.3g}, beta = {:.8g}, sample_time = {:.3f}, train_time = {:.3f}, used_time = {:.3f}'
                .format(
                    step,
                    free_energy_mean.item(),
                    free_energy_std.item(),
                    entropy_mean.item(),
                    energy_mean.item(),
                    mag_mean.item(),
                    mag_sqr_mean.item(),
                    optimizer.param_groups[0]['lr'],
                    beta,
                    sample_time,
                    train_time,
                    used_time,
                ))
            sample_time = 0
            train_time = 0

            if args.save_sample:
                state = {
                    'sample': sample,
                    'x_hat': x_hat,
                    'log_prob': log_prob,
                    'energy': energy,
                    'loss': loss,
                }
                torch.save(state, '{}_save/{}.sample'.format(
                    args.out_filename, step))

        if (args.out_filename and args.save_step
                and step % args.save_step == 0):
            state = {
                'net': net.state_dict(),
                'optimizer': optimizer.state_dict(),
            }
            if args.lr_schedule:
                state['scheduler'] = scheduler.state_dict()
            torch.save(state, '{}_save/{}.state'.format(
                args.out_filename, step))

        if (args.out_filename and args.visual_step
                and step % args.visual_step == 0):
            torchvision.utils.save_image(
                sample,
                '{}_img/{}.png'.format(args.out_filename, step),
                nrow=int(sqrt(sample.shape[0])),
                padding=0,
                normalize=True)

            if args.print_sample:
                x_hat_np = x_hat.view(x_hat.shape[0], -1).cpu().numpy()
                x_hat_std = np.std(x_hat_np, axis=0).reshape([args.L] * 2)

                x_hat_cov = np.cov(x_hat_np.T)
                x_hat_cov_diag = np.diag(x_hat_cov)
                x_hat_corr = x_hat_cov / (
                    sqrt(x_hat_cov_diag[:, None] * x_hat_cov_diag[None, :]) +
                    args.epsilon)
                x_hat_corr = np.tril(x_hat_corr, -1)
                x_hat_corr = np.max(np.abs(x_hat_corr), axis=1)
                x_hat_corr = x_hat_corr.reshape([args.L] * 2)

                energy_np = energy.cpu().numpy()
                energy_count = np.stack(
                    np.unique(energy_np, return_counts=True)).T

                my_log(
                    '\nsample\n{}\nx_hat\n{}\nlog_prob\n{}\nenergy\n{}\nloss\n{}\nx_hat_std\n{}\nx_hat_corr\n{}\nenergy_count\n{}\n'
                    .format(
                        sample[:args.print_sample, 0],
                        x_hat[:args.print_sample, 0],
                        log_prob[:args.print_sample],
                        energy[:args.print_sample],
                        loss[:args.print_sample],
                        x_hat_std,
                        x_hat_corr,
                        energy_count,
                    ))

            if args.print_grad:
                my_log('grad max_abs min_abs mean std')
                for name, param in named_params:
                    if param.grad is not None:
                        grad = param.grad
                        grad_abs = torch.abs(grad)
                        my_log('{} {:.3g} {:.3g} {:.3g} {:.3g}'.format(
                            name,
                            torch.max(grad_abs).item(),
                            torch.min(grad_abs).item(),
                            torch.mean(grad).item(),
                            torch.std(grad).item(),
                        ))
                    else:
                        my_log('{} None'.format(name))
                my_log('')
Ejemplo n.º 21
0
def run(split, upto=None):
	torch.set_grad_enabled(split=='train')
	model.train() if split == 'train' else  model.eval()
	nsamples = 1 if split == 'train' else xte
	N, D = x.size()
	B = 128
	n_steps = N // B if upto is None else min(N//B, upto)
	losses = []
	for step in range(n_steps):
		xb = Variable(x[step * B: step * B + B])
		xbhat = torch.zeros_like(xb)
		for s in range(nsamples):
			if step % args.resample_every == 0 or split == 'test':
			model.update_masks()
			xbhat += model(xb)
		xbhat /= nsamples

		loss = F.binary_cross_entropy_with_logits(xbhat, xb, size_average=False) / B
		lossf = loss.data.item()
		losses.append(lossf)

		if split == 'train':
			opt.zero_grad()
			loss.backward()
			opt.step()

	print("%s epoch avg loss: %f" %(split, np.mean(losses)))

if __name__ == '__main__':
	parser = argparse.ArgumentParser()
    parser.add_argument('-d', '--data-path', required=True, type=str, help="Path to binarized_mnist.npz")
    parser.add_argument('-q', '--hiddens', type=str, default='500', help="Comma separated sizes for hidden layers, e.g. 500, or 500,500")
    parser.add_argument('-n', '--num-masks', type=int, default=1, help="Number of orderings for order/connection-agnostic training")
    parser.add_argument('-r', '--resample-every', type=int, default=20, help="For efficiency we can choose to resample orders/masks only once every this many steps")
    parser.add_argument('-s', '--samples', type=int, default=1, help="How many samples of connectivity/masks to average logits over during inference")
    args = parser.parse_args()

    np.random_seed(42)
    torch.manual_seed(42)
    torch.cuda.manual_seed_all(42)

    print("loading binarized mnist from", args.data_path)
    mnist = np.load(args.data_path)
    xtr, xte = mnist['train_data'], mnist['valid_data']
    xtr = torch.from_numpy(xtr).cuda()
    xte = torch.from_numpy(xte).cuda()

    # construct model and ship to GPU
    hidden_list = list(map(int, args.hiddens.split(',')))
    model = MADE(xtr.size(1), hidden_list, xtr.size(1), num_masks=args.num_masks)
    print("number of model parameters:",sum([np.prod(p.size()) for p in model.parameters()]))
    model.cuda()

    # set up the optimizer
    opt = torch.optim.Adam(model.parameters(), 1e-3, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(opt, step_size=45, gamma=0.1)
    
    # start the training
    for epoch in range(100):
        print("epoch %d" % (epoch, ))
        scheduler.step(epoch)
        run_epoch('test', upto=5) # run only a few batches for approximate test accuracy
        run_epoch('train')
    
    print("optimization done. full test set eval:")
    run_epoch('test')