예제 #1
0
파일: main.py 프로젝트: afcarl/autopaint
def run_cond_aevb(base_data, cond_data):
    start_time = time.time()

    # Create aevb function
    # Training parameters

    D_c = cond_data.shape[1]
    D_b = base_data.shape[1]
    N_data = cond_data.shape[0]
    assert cond_data.shape[0] == base_data.shape[0]
    enc_layers = [
        D_c, hidden_units, hidden_units, hidden_units, 2 * latent_dimensions
    ]
    dec_layers = [
        latent_dimensions + D_b, hidden_units, hidden_units, hidden_units, D_c
    ]

    N_weights_enc, encoder, encoder_log_like = make_gaussian_nn(enc_layers)
    N_weights_dec, decoder, decoder_log_like = make_binary_nn(dec_layers)

    # Optimize aevb
    batch_size = 100
    num_training_iters = 1600
    rs = npr.RandomState(0)

    parser = WeightsParser()
    parser.add_shape('encoding weights', (N_weights_enc, ))
    parser.add_shape('decoding weights', (N_weights_dec, ))
    initial_combined_weights = rs.randn(len(parser)) * param_scale

    batch_idxs = make_batches(N_data, batch_size)

    def batch_value_and_grad(weights, iter):
        iter = iter % len(batch_idxs)
        # cur_base = base_data[batch_idxs[iter]]
        cur_cond = cond_data[batch_idxs[iter]]
        cur_im = base_data[batch_idxs[iter]]
        cur_b = apply_mask(cur_im)
        return lower_bound(weights, encoder, decoder_log_like, N_weights_enc,
                           cur_b, cur_cond, samples_per_image,
                           latent_dimensions, rs)

    lb_grad = grad(batch_value_and_grad)

    base_test = np.repeat(apply_mask(base_data[0:10, :]), 10, axis=0)

    def callback(params, i, grad):
        ml = batch_value_and_grad(params, i)
        print "log marginal likelihood:", ml

        # #Generate samples
        num_samples = 100
        images_per_row = 10
        zs = rs.randn(100, latent_dimensions)
        # zs = rs.randn(10,latent_dimensions)
        # zs = np.repeat(zs,10,axis = 0)
        # base_test = base_data[0:num_samples,:]
        # base_test = np.repeat(base_data[0:10,:],10,axis = 0)
        dec_in = np.concatenate((zs, base_test), axis=1)
        samples = decoder(parser.get(params, 'decoding weights'), dec_in)
        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        # plot_images(samples, ax, ims_per_row=images_per_row)
        plot_shape = (100, 784)
        im_samples = np.zeros(plot_shape)
        im_mean = np.zeros(plot_shape)
        im_map = np.zeros(plot_shape)
        for k in xrange(plot_shape[0]):
            if k % 10 == 0:
                im_samples[k, :] = base_test[k, :]
                im_mean[k, :] = base_test[k, :]
                im_map[k, :] = base_test[k, :]
            else:
                im_mean[k, :] = samples[k - 1, :]
                im_samples[k, :] = np.random.binomial(1, samples[k - 1, :])
                im_map[k, :] = np.round(samples[k - 1, :])

        plot_images(im_samples, ax, ims_per_row=images_per_row)
        plt.savefig('samples.png')

        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        plot_images(im_mean, ax, ims_per_row=images_per_row)
        plt.savefig('mean_samples.png')

        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        plot_images(im_map, ax, ims_per_row=images_per_row)
        plt.savefig('map_samples.png')

        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        plot_images(base_test, ax, ims_per_row=images_per_row)
        plt.savefig('blurred_samples.png')

    final_params = adam(lb_grad,
                        initial_combined_weights,
                        num_training_iters,
                        callback=callback)

    def decoder_with_weights(zs):
        return decoder(parser.get(final_params, 'decoding weights'), zs)

    return decoder_with_weights

    finish_time = time.time()
    print "total runtime", finish_time - start_time
예제 #2
0
 def dropout_grad(weights, i):
     mask = npr.RandomState(seed * 10**6 + i).rand(
         len(weights)) > dropout_fraction
     masked_weights = weights * mask / (1 - dropout_fraction)
     return grad(masked_weights, i)
 def init_var_params(D, rs=npr.RandomState(0), **kwargs):
     log_weights = np.ones(k)
     component_weights = [
         init_component_var_params(D, rs=rs, **kwargs) for i in range(k)
     ]
     return np.concatenate([log_weights] + component_weights)
예제 #4
0
        # Initialize the pseudo inputs for the first layer by sampling from the data, the pseudo outputs equal to the inputs
        x0[0] = np.ndarray.flatten(
            np.array(X)[
                rs.choice(len(X), num_pseudo_params, replace=False), :])
        y0[0] = x0[0]

        # For every other layer, set the inducing outputs to the inducing inputs (which are sampled from N(0,.01)) and lengthscale large
        for layer in xrange(1, n_layers):
            y0[layer] = x0[layer]
            layer_params[layer][3] = np.log(1)

        return pack_all_params(layer_params, x0, y0)

    # Initialize covariance parameters and hiddens.
    rs = npr.RandomState(1234)
    init_params = .1 * rs.randn(total_num_params)

    print("Optimizing covariance parameters...")
    objective = lambda params: -log_likelihood(params)

    if smart_init == 1:
        init_params = smart_initialize_params(init_params)

    plot_xs = np.reshape(np.linspace(-5, 5, 300), (300, 1))
    plot_full_gp(ax_first, init_params, plot_xs)
    ax_first.set_title("Initial full predictions")
    print("Objective: ", objective(init_params))

    cov_params = minimize(value_and_grad(objective),
                          init_params,
예제 #5
0
def build_parabola(D=1, n_data=20, noise_std=0.1):
    rs = npr.RandomState(0)
    inputs = np.linspace(-4, 4, num=n_data)
    targets = inputs**2 + rs.randn(n_data) * noise_std
    inputs = inputs.reshape((len(inputs), D))
    return inputs, targets
예제 #6
0
 # Model hyper-parameters
 T = 10
 Dx = 5
 Dy = 3
 alpha = 0.42
 r = 0.1
 obs = 'sparse'
 
 # Training parameters
 param_scale = 0.5
 num_epochs = 1000
 step_size = 0.001
 
 N = 4
 
 data_seed = npr.RandomState(0)
 model_params = init_model_params(Dx, Dy, alpha, r, obs, data_seed)
 
 print("Generating data...")
 x_true, y_true = generate_data(model_params, T, data_seed)
 
 lml = log_marginal_likelihood(model_params, T, y_true)
 print("True log-marginal likelihood: "+str(lml))
 
 seed = npr.RandomState(0)
 
 # Initialize proposal parameters
 prop_params = init_prop_params(T, Dx, param_scale, seed)
 combined_init_params = (model_params, prop_params)
 
 lgss_smc_obj = lgss_smc(T, Dx, Dy, N)
def sample_data(n_data=80, noise_std=0.1, context_size=3):
    rs = npr.RandomState(0)
    inputs  = np.linspace(-6,6,n_data)
    targets = np.sin(inputs)**3 + rs.randn(n_data) * noise_std
    return inputs[:, None], targets[:, None]
예제 #8
0
 def sample(var_param, n_samples, seed=None):
     my_rs = rs if seed is None else npr.RandomState(seed)
     mean, beta = unpack_params(var_param)
     L = beta_to_L(beta)
     return np.dot( my_rs.randn(n_samples, dim), L) + mean
예제 #9
0
 def sample(var_param, n_samples, seed=None):
     my_rs = rs if seed is None else npr.RandomState(seed)
     mean, log_scale = unpack_params(var_param)
     return mean + np.exp(log_scale)*my_rs.standard_t(df, size=(n_samples, dim))
예제 #10
0
파일: main.py 프로젝트: afcarl/autopaint
def run_aevb(train_images):

    # run_aevb(train_images)


    start_time = time.time()

    # Create aevb function
    # Training parameters

    D = train_images.shape[1]

    dec_layers = [latent_dimensions, hidden_units,hidden_units, D]

    init_mean = np.zeros(latent_dimensions)
    init_log_stddevs = np.log(1*np.ones(latent_dimensions))
    init_log_stepsize = np.log(.001)

    rs = np.random.npr.RandomState(0)
    sample_and_run_grad, parser = build_grad_sampler(latent_dimensions,100, approx=True)
    N_weights_dec, decoder, decoder_log_like = make_binary_nn(dec_layers)
    N_weights_enc = len(parser)
    encoder = sample_and_run_grad
    parser.add_shape('decoding weights', (N_weights_dec,))

    params = np.zeros(len(parser))
    parser.put(params, 'mean', init_mean)
    parser.put(params, 'log_stddev', init_log_stddevs)
    parser.put(params, 'log_stepsize', init_log_stepsize)
    dec_w = get_pretrained_dec_w()
    parser.put(params, 'decoding weights',dec_w)
    # parser.put(params, 'decoding weights',rs.randn(N_weights_dec) * param_scale)

    # Optimize aevb
    batch_size = 100
    num_training_iters = 1600
    rs = npr.RandomState(0)

    batch_idxs = make_batches(train_images.shape[0], batch_size)

    def batch_value_and_grad(weights, iter):
        iter = iter % len(batch_idxs)
        cur_data = train_images[batch_idxs[iter]]
        return lower_bound(weights,encoder,decoder_log_like,N_weights_enc,cur_data,samples_per_image,latent_dimensions,rs)
    lb_grad = grad(batch_value_and_grad)

    def callback(params, i, grad):
        ml = batch_value_and_grad(params,i)
        print "log marginal likelihood:", ml

        #Print params
        print 'norm of stdev', np.linalg.norm(np.exp(parser.get(params, 'mean')))
        print 'stepsize' , np.exp(parser.get(params,'log_stepsize'))


        #Generate samples
        num_samples = 100
        images_per_row = 10
        zs = rs.randn(num_samples,latent_dimensions)
        # samples = np.random.binomial(1,decoder(parser.get(params, 'decoding weights'), zs))
        samples = decoder(parser.get(params, 'decoding weights'), zs)
        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        plot_images(samples, ax, ims_per_row=images_per_row)
        plt.savefig('samples.png')

    final_params = adam(lb_grad, params, num_training_iters, callback=callback)

    def decoder_with_weights(zs):
        return decoder(parser.get(final_params, 'decoding weights'), zs)
    return decoder_with_weights

    finish_time = time.time()
    print "total runtime", finish_time - start_time
def train_nn(pred_fun,
             loss_fun,
             num_weights,
             train_smiles,
             train_raw_targets,
             train_params,
             seed=0,
             validation_smiles=None,
             validation_raw_targets=None):
    """loss_fun has inputs (weights, smiles, targets)"""
    print("Total number of weights in the network:", num_weights)
    init_weights = npr.RandomState(seed).randn(
        num_weights) * train_params['init_scale']

    num_print_examples = 100
    train_targets, undo_norm = normalize_array(train_raw_targets)
    training_curve = []

    def callback(weights, iter):
        if iter % 10 == 0:
            train_preds = undo_norm(
                pred_fun(weights, train_smiles[:num_print_examples]))
            cur_loss = loss_fun(weights, train_smiles[:num_print_examples],
                                train_targets[:num_print_examples])
            training_curve.append(cur_loss)
            print("Iteration", iter, "loss", cur_loss,\
                "train RMSE", rmse(train_preds, train_raw_targets[:num_print_examples]),\
                "max of weights", np.max(np.abs(weights)), end="")
            if validation_smiles is not None:
                validation_preds = undo_norm(
                    pred_fun(weights, validation_smiles))
                print("Validation RMSE",
                      iter,
                      ":",
                      rmse(validation_preds, validation_raw_targets),
                      end="")
            print("")

    # Build gradient using autograd.
    print("gradding")
    grad_fun = grad(loss_fun)

    grad_fun_with_data = build_batched_grad(
        grad=grad_fun,
        batch_size=train_params['batch_size'],
        inputs=train_smiles,
        targets=train_targets)
    # grad_fun_with_data is function that computes the gradient of the loss given the weights and the batch number (to determine training weights and targets)

    print("optimizing")
    # Optimize weights.
    trained_weights = adam(grad_fun_with_data,
                           init_weights,
                           callback=callback,
                           num_iters=train_params['num_iters'],
                           step_size=train_params['step_size'])

    print("optimized")

    def predict_func(new_smiles):
        """Returns to the original units that the raw targets were in."""
        return undo_norm(pred_fun(trained_weights, new_smiles))

    return predict_func, trained_weights, training_curve
예제 #12
0
def init_net_params(scale, layer_sizes, rs=npr.RandomState(0)):
    """Build a (weights, biases) tuples for all layers."""
    return [(scale * rs.randn(m, n),   # weight matrix
             scale * rs.randn(n))      # bias vector
            for m, n in zip(layer_sizes[:-1], layer_sizes[1:])]
예제 #13
0
파일: mlbl.py 프로젝트: yuviiii/7788
    def train(self, X, indX, XY, V, indV, VY, IM, VIM, count_dict, word_dict, embed_map, prog):
        """
        Trains the LBL
        """
        self.start = self.seed
        self.init_params(embed_map, count_dict)
        self.step = 0
        inds = np.arange(len(X))
        numbatches = len(inds) / self.batchsize
        tic = time.time()
        bleu = [0.0]*4
        best = 0.0
        scores = '/'.join([str(b) for b in bleu])
        patience = 10
        count = 0
        done = False
        # delete
        x=[]
        y=[]
        # Main loop
        lm_tools.display_phase(1)
        for epoch in range(self.maxepoch):
            if done:
                break
            self.epoch = epoch            
            prng = npr.RandomState(self.seed + epoch + 1)
            prng.shuffle(inds)
            for minibatch in range(numbatches):

                batchX = X[inds[minibatch::numbatches]]
                batchY = XY[inds[minibatch::numbatches]].toarray()
                batchindX = indX[inds[minibatch::numbatches]].astype(int).flatten()
                batchindX = np.floor(batchindX/5).astype(int)
                batchIm = IM[batchindX]

                loss_val = self.compute_obj(self.params, batchX, batchIm, batchY)                
                self.backward(self.params, batchX, batchIm, batchY)
                self.update_params(batchX)
                
                if np.isnan(loss_val):
                    print 'NaNs... breaking out'
                    done = True
                    break

                # Print out progress
                if np.mod(minibatch * self.batchsize, prog['_details']) == 0 and minibatch > 0:
                    print "epoch/pts: %04d/%05d, cross-entropy loss: %.2f, time: %.2f" % (epoch+1, minibatch * self.batchsize, loss_val, (time.time()-tic)/60)
                if np.mod(minibatch * self.batchsize, prog['_samples']) == 0 and minibatch > 0:
                    print "best: %s" % (scores)
                    print '\nSamples:'
                    # lm_tools.generate_and_show(self, word_dict, count_dict, VIM, k=3)
                    VIM_example = VIM[prog['_val_example_idx'], :]
                    VIM_file_list = prog['_val_example_file']
                    #lm_tools.generate_and_show_html(self, word_dict, count_dict, VIM_example, VIM_file_list, show=prog['_show_browser'], k=3)
                    print ' '
                if np.mod(minibatch * self.batchsize, prog['_update']) == 0 and minibatch > 0:
                    self.update_hyperparams()
                    self.step += 1
                    print "learning rate: %.4f, momentum: %.4f" % (self.eta_t, self.p_t)

                # Compute BLEU
                if np.mod(minibatch * self.batchsize, prog['_bleu']) == 0 and minibatch > 0:
                    bleu = lm_tools.compute_bleu(self, word_dict, count_dict, VIM, prog, k=3)
                    x.append(minibatch * self.batchsize)
                    y.append(bleu[-1])
                    if bleu[-1] >= best:
                        count = 0
                        best = bleu[-1]
                        scores = '/'.join([str(b) for b in bleu])
                        print 'bleu score = {}'.format(bleu[-1])
                        lm_tools.save_model(self, self.loc)
                    else:
                        count += 1
                        if count == patience:
                            done = True
                            break

            self.update_hyperparams()
            self.step += 1
        pl.plot(x,y)
        pl.show()
        return best
예제 #14
0
파일: rnn.py 프로젝트: zzc1996/autograd
def create_rnn_params(input_size, state_size, output_size,
                      param_scale=0.01, rs=npr.RandomState(0)):
    return {'init hiddens': rs.randn(1, state_size) * param_scale,
            'change':       rs.randn(input_size + state_size + 1, state_size) * param_scale,
            'predict':      rs.randn(state_size + 1, output_size) * param_scale}
예제 #15
0
def init_random_params(scale, layer_sizes, rs=npr.RandomState(0)):
    """Build a list of (weights, biases) tuples, one for each layer."""
    return [(rs.randn(insize, outsize) * scale,  # weight matrix
             rs.randn(outsize) * scale)  # bias vector
            for insize, outsize in zip(layer_sizes[:-1], layer_sizes[1:])]
예제 #16
0
 def sample(var_param, n_samples, seed=None):
     my_rs = rs if seed is None else npr.RandomState(seed)
     mean, log_std = unpack_params(var_param)
     return my_rs.randn(n_samples, dim) * np.exp(log_std) + mean
    train_data = train_data.reshape(len(train_data), 1)

    # train_data = np.loadtxt('/cluster/mshen/prj/gans/out/2017-06-19/a_generate/X.csv', delimiter = ',')
    # train_data = train_data.reshape(len(train_data), 1)

    init_gen_params = init_random_params(param_scale, gen_layer_sizes)
    init_dsc_params = init_random_params(param_scale, dsc_layer_sizes)

    num_batches = int(np.ceil(len(train_data) / batch_size))

    def batch_indices(iter):
        idx = iter % num_batches
        return slice(idx * batch_size, (idx + 1) * batch_size)

    # Define training objective
    seed = npr.RandomState(1)

    def objective(gen_params, dsc_params, iter):
        idx = batch_indices(iter)
        return gan_objective(gen_params, dsc_params, train_data[idx],
                             batch_size, noise_dim, seed)

    # Get gradients of objective using autograd.
    both_objective_grad = multigrad(objective, argnums=[0, 1])

    print(
        "     Epoch     |    Objective  |       Fake probability | Real Probability  "
    )

    def print_perf(gen_params, dsc_params, iter, gen_gradient, dsc_gradient):
        if iter % 10 == 0:
예제 #18
0
# Load MNIST and Set Up Data
N_data, train_images, train_labels, test_images, test_labels = load_mnist()
train_images = np.round(train_images[0:10000])
train_labels = train_labels[0:10000]
test_images = np.round(test_images[0:10000])

# Starter Code for 4d
# A correct solution here only requires you to correctly write the neglogprob!
# Because this setup is numerically finicky
# the default parameterization I've given should give results if neglogprob is correct.
K = 30
D = 784

# Random initialization, with set seed for easier debugging
# Try changing the weighting of the initial randomization, default 0.01
init_params = npr.RandomState(0).randn(K, D) * 0.01

# Implemented batching for you
batch_size = 10
num_batches = int(np.ceil(len(train_images) / batch_size))


def batch_indices(iter):
    idx = iter % num_batches
    return slice(idx * batch_size, (idx + 1) * batch_size)


# This is numerically stable code to for the log of a bernoulli density
# In particular, notice that we're keeping everything as log, and using logaddexp
# We never want to take things out of log space for stability
def bernoulli_log_density(targets, unnormalized_logprobs):
예제 #19
0
def init_prop_params(T, Dx, scale = 0.5, rs = npr.RandomState(0)):
    return [(scale * rs.randn(Dx), # Bias
             1. + scale * rs.randn(Dx), # Linear times A/mu0
             scale * rs.randn(Dx)) # Log-var
            for t in range(T)]
예제 #20
0
파일: main.py 프로젝트: afcarl/autopaint
def run_variational_network(train_images, N_weights_dec, decoder,
                            decoder_log_like, trained_weights, all_mean):
    start_time = time.time()

    # Create aevb function
    # Training parameters

    D = train_images.shape[1]

    enc_layers = [D, hidden_units, hidden_units, 2 * latent_dimensions]

    N_weights_enc, encoder, encoder_log_like = make_gaussian_nn(enc_layers)

    # Optimize aevb
    batch_size = 10
    num_training_iters = 1600
    rs = npr.RandomState(0)

    parser = WeightsParser()
    parser.add_shape('encoding weights', (N_weights_enc, ))
    initial_enc_w = rs.randn(len(parser)) * param_scale

    batch_idxs = make_batches(train_images.shape[0], batch_size)
    banded_cov = create_banded_cov(all_cov.shape[0], 10)
    log_prior = build_logprob_mvn(all_mean, banded_cov)

    def batch_value_and_grad(enc_w, iter):
        iter = iter % len(batch_idxs)
        cur_data = train_images[batch_idxs[iter]]
        return enc_lower_bound(enc_w, trained_weights, encoder,
                               decoder_log_like, log_prior, N_weights_enc,
                               cur_data, samples_per_image, latent_dimensions,
                               rs)

    lb_grad = grad(batch_value_and_grad)

    def callback(params, i, grad):
        ml = batch_value_and_grad(params, i)
        print "log marginal likelihood:", ml
        #Generate samples
        num_samples = 100
        images_per_row = 10
        # zs = train_images[0:100,:]
        zs = np.zeros((100, 10))
        zs[:, 1] = .5
        zs[:, 5] = .5
        (mus, log_sigs) = encoder(params, zs)
        # sigs = np.exp(log_sigs)
        # noise = rs.randn(1,100,784)
        # samples = mus + sigs*noise
        # samples = np.reshape(samples,(100*1,784),order = 'F')
        samples = mus

        fig = plt.figure(1)
        fig.clf()
        ax = fig.add_subplot(111)
        plot_images(samples, ax, ims_per_row=images_per_row)
        plt.savefig('samples.png')

    final_params = adam(lb_grad,
                        initial_enc_w,
                        num_training_iters,
                        callback=callback)

    def decoder_with_weights(zs):
        return decoder(parser.get(final_params, 'decoding weights'), zs)

    return decoder_with_weights

    finish_time = time.time()
    print "total runtime", finish_time - start_time
예제 #21
0
def main(num_petridish_iter = 1, num_top_points = 2, perf_thresh = 0.97, layer_sizes= [2, 3, 1], L2_reg= 0.00001, param_scale = 0.15, batch_size = 20, num_epochs = 20, step_size = 0.01, hyper_iter = 600, hyper_step_size = 2.0, hyper_decay = 0.4, hyper_decay_after = 200, hyper_decay_every = 150, hyper_L2_reg = 0.00001, rank_loss_scaling_factor = 100.0, mse_tolerance = None, outputFname = '/tmp/results', train_sorted_c_r_list = None, full_sorted_c_r_list=None, fake_train_images = None, fake_train_labels = None, fake_valid_images = None, fake_valid_labels = None):
    
    seed = None 
    random_state=npr.RandomState(seed=seed) #seed is supplied from outerloop_petridish
    
    #Generate weight initialization
    #Following two lines will Generate new init file from Macbook with petridish (seed=0) for each new layer_size config 
    #init_params_list = petridish.generate_init_params(full_sorted_c_r_list, param_scale, layer_sizes, random_state)
    dir_path = os.path.dirname(os.path.realpath(__file__))+'/'
    init_param_fname = dir_path+'init_param_files/'+str(layer_sizes)+'_'+str(param_scale)+'.pkl'
    #helper.save_pickle(init_param_fname, init_params_list)
    #init_params_list = helper.load_pickle(init_param_fname)
    
    if fake_valid_images is None or fake_valid_labels is None:
        fake_valid_images = np.copy(fake_train_images)
        fake_valid_labels = np.copy(fake_train_labels)

    fake_train_images = random_state.randn(batch_size, layer_sizes[0])
    fake_train_labels = np.round(random_state.rand(batch_size, layer_sizes[-1])) #np.random.randint(2, size=(batch_size, layer_sizes[-1]))
    #fake_valid_images = random_state.randn(batch_sexp_numze, layer_sizes[0])
    #fake_valid_labels = np.random.randint(2, size=(batch_size, layer_sizes[-1]))
    fake_valid_images = np.copy(fake_train_images)
    fake_valid_labels = np.copy(fake_train_labels)

    #1. Warm up petridish with K number of points 
    #   a. call petridish with specific weight initialization, and other hyper-parameters 
    #   b. returns learnt training data
    
    #Sample K=4 random points (for now 6 fixed points)
    best_scores = [] #List of ground-truth values of the best predicted scores at each iteration. Start with the first K scores.
    for c, r in train_sorted_c_r_list:
        best_scores.append(r)  #for the warm-up, best scores have ground-truth values of the initial training points
    
    #Warming up Petridish
    success_flag = False
    iteration = 0
    lines = []  #Contains all the logs to be returned
    while iteration < num_petridish_iter:
    
        
        lines.append('Outerloop: Iteration '+ str(iteration)+'\n')
        lines.append('train_sorted_c_r_list ' + str(train_sorted_c_r_list)+'\n') 
    
        outputFname_iter = outputFname#+str(iteration)+'.txt'
        #if os.path.exists(outputFname_iter):
        #    os.remove(outputFname_iter) #Remove file if it exists so that we do not keep appending to it
        
        combined_init_params, mask_params, init_params = helper.get_init_param_list(1, train_sorted_c_r_list, param_scale, layer_sizes, None, random_state)
        #combined_init_params, mask_params = petridish.create_combined_init_params(init_params_list[0:len(train_sorted_c_r_list)], layer_sizes)
        
        #Train
        results = petridish.main(layer_sizes= layer_sizes, L2_reg= L2_reg, param_scale = param_scale, batch_size = batch_size, num_epochs = num_epochs, step_size = step_size, hyper_iter =hyper_iter, hyper_step_size = hyper_step_size, hyper_decay = hyper_decay, hyper_decay_after = hyper_decay_after, hyper_decay_every = hyper_decay_every, hyper_L2_reg = hyper_L2_reg, rank_loss_scaling_factor = rank_loss_scaling_factor, mse_tolerance = mse_tolerance, outputFname = outputFname_iter, sorted_c_r_list = train_sorted_c_r_list, init_params_list=None, combined_init_params = combined_init_params, mask_params = mask_params, random_state = random_state, hyper_train = True, fake_train_images = fake_train_images, fake_train_labels = fake_train_labels, fake_valid_images = fake_valid_images, fake_valid_labels = fake_valid_labels)
      
        #Extract trained data (This currently returns True if rank order correct)
        (train_mse, success_flag, train_score_list, learnt_images, learnt_train_labels, learnt_valid_images, learnt_valid_labels) = results
        for idx, (c, r) in enumerate(train_sorted_c_r_list):
            lines.append('Outerloop: Train Point Scores '+ str(c)+' ' + str(-train_score_list[idx])+'\n')
        
        ##If results did not converge, then check for intermediate convergence (first MSE convergence and then rank convergence)
        #if success_flag == False:
        #    extracted_learnt_images = helper.read_images_from_file(fname=outputFname_iter, num_images=len(fake_train_images), checkStr="MSE Loss converged")
        #    if extracted_learnt_images is None: #No MSE convergence
        #       extracted_learnt_images  = helper.read_images_from_file(fname=outputFname_iter, num_images=len(fake_train_images), checkStr="Rank order correct")
        #    else:
        #       print ('Outerloop: MSE converged')
        #if extracted_learnt_images is not None:
        #    learnt_images = extracted_learnt_images
        #    print ('Outerloop: Rank correct')
    
    #2. Run full grid-search (architecture search/auto-tune) with petridish
    #   a. Evaluate hundreds of points cheaply by running petridish in test/evaluation mode (no hyper-training)
    #   b. Take Y (say two) points from petridish (two best? or best and worst?)
    
        #Reduce the number of points over which we will do petridish search 
        #Exclude the points that we have already evaluated ground truth for. No need predicting those
        #For now, predict all for debugging purposes. But for selecting top predicted points, select from test_sorted_c_r_list (TODO)
        test_sorted_c_r_list = sorted(list(set(full_sorted_c_r_list)-set(train_sorted_c_r_list)), key = lambda tup: tup[1], reverse=True) #TODO: Change this approach of set
        #if len(test_sorted_c_r_list) != (len(full_sorted_c_r_list) - len(train_sorted_c_r_list)):
        #    print ("Outerloop: Error in Computing test_sorted_c_r_list"); import sys; sys.exit()
        #combined_init_params, mask_params = petridish.create_combined_init_params(init_params_list[0:len(test_sorted_c_r_list)], layer_sizes)
        combined_init_params, mask_params, _ = helper.get_init_param_list(1, test_sorted_c_r_list, param_scale, layer_sizes, init_params, random_state)
        
        #Test (Setting hyper_train=False and fake_train_images = learn_images)
        results = petridish.main(layer_sizes= layer_sizes, L2_reg= L2_reg, param_scale = param_scale, batch_size = batch_size, num_epochs = num_epochs, step_size = step_size, hyper_iter =hyper_iter, hyper_step_size = hyper_step_size, hyper_decay = hyper_decay, hyper_decay_after = hyper_decay_after, hyper_decay_every = hyper_decay_every, hyper_L2_reg = hyper_L2_reg, rank_loss_scaling_factor = rank_loss_scaling_factor, sorted_c_r_list = test_sorted_c_r_list, init_params_list=None, combined_init_params = combined_init_params, mask_params = mask_params, random_state = random_state, hyper_train = False, fake_train_images = learnt_images, fake_train_labels = fake_train_labels, fake_valid_images = learnt_valid_images, fake_valid_labels = learnt_valid_labels)
    
        #Extract Results 
        (test_mse, success_flag, test_score_list, learnt_images, learnt_train_labels, learnt_valid_images, learnt_valid_labels) = results 
        for idx, (c, r) in enumerate(test_sorted_c_r_list):
            lines.append('Outerloop: Test Point Scores '+ str(c)+' '+ str(-test_score_list[idx])+'\n')
        top_c_r_list = helper.extract_top_arch(test_score_list, test_sorted_c_r_list, K=num_top_points-1) #Extract top num_top_points-1 points with best predicted performance
        
        #Select one point randomly (not the best). This is to prevent the model from greedily selecting low-performance points in a local region
        #Ideally, this should be done with some random probability i.e initially, when the model is poor, do random selection
        #Later, when the model is more accurate, do only greedy selection
        reduced_c_r_list = [(c,r) for (c,r) in test_sorted_c_r_list if (c,r) not in top_c_r_list]
        randomly_selected_c_r_list = helper.randomly_select_K(reduced_c_r_list, perf_thresh, K=1) 

        top_c_r_list = top_c_r_list + randomly_selected_c_r_list
        
        lines.append('Train and Test mse '+ str(train_mse)+' '+ str(test_mse)+'\n')
    
    #3.   Evaluate ground truth for the points with best predicted score
        for idx, (c, r) in enumerate(top_c_r_list):
            if idx == num_top_points: 
                break
            best_scores.append(r) #Evaluate ground-truth for the best points and append to this list
    
    #4. Stopping condition - if we reached the best
        lines.append('Outerloop: Scores '+ str(best_scores)+'\n')
        for score in best_scores:
            if (score >= perf_thresh):
                lines.append('Outerloop: Found best solution'+str(best_scores)+'\n')
                return (lines, best_scores, train_mse, test_mse)
        
        lines.append('Adding Top predicted points '+str(top_c_r_list)+'\n') 
        #Continue - Add Y top (one or two) points to the train_sorted_c_r_list for further evaluation
        train_sorted_c_r_list = helper.add_new_arch_ground_truth(train_sorted_c_r_list, top_c_r_list)
        
    #5. Run petridish again with additional points (Restart or resume from learnt images/learn images+learnt weights?)
    
    #6. Go Back to 2.
        
        iteration = iteration + 1
    
    lines.append('Outerloop: NOT Found best solution'+str(best_scores)+'\n')
    return (lines, best_scores, train_mse, test_mse) 
예제 #22
0
test_images = np.round(test_images[0:10000])
# test_labels = np.round(test_images[0:10000]) a typo
test_labels = test_labels[0:10000]

K = 10
prior_std = 10.

# Choose two pixels and plot the K specific weights against eachother
contourK = 2
px1 = 392 # Middle Pixel
# px2 = px1 + 28*5 +1 # Middle Pixel + 5 rows down
px2 = px1+14 # Middle left-most edge

# Random initialization, with set seed for easier debugging
# Try changing the weighting of the initial randomization, default 0.01
init_params = (npr.RandomState(0).randn(K, D) * 0.01, npr.RandomState(1).randn(K, D) * 0.01)



def logistic_logprob(params, images, labels):
    # params is a block of S x K x D params
    # images is N x D
    # labels is N x K one-hot
    # return S logprobs, summing over N
    mul = np.einsum('skd,nd->snk', params, images)
    normalized = mul - logsumexp(mul, axis=-1, keepdims=True)
    return np.einsum('snk,nk->s', normalized, labels)

def diag_gaussian_log_density(x, mu, log_std):
    # assumes that mu and log_std are (S x K X D),
    # so we sum out the last two dimensions.
예제 #23
0
def build_step_function_dataset(D=1, n_data=40, noise_std=0.1):
    rs = npr.RandomState(0)
    inputs = np.linspace(-2, 2, num=n_data)
    targets = np.sign(inputs) + rs.randn(n_data) * noise_std
    inputs = inputs.reshape((len(inputs), D))
    return inputs, targets
예제 #24
0
def objective(var_params, iter):
    return -elbo_estimate(var_params, logprob_given_data,
                          num_samples=50, rs=npr.RandomState(iter))
예제 #25
0
def build_mog_bbsvi(logprob, num_samples, k=10, rs=npr.RandomState(0)):
    init_component_var_params = init_gaussian_var_params
    component_log_density = variational_log_density_gaussian
    component_sample = sample_diag_gaussian

    def unpack_mixture_params(mixture_params):
        log_weights = log_normalize(mixture_params[:k])
        var_params = np.reshape(mixture_params[k:], (k, -1))
        return log_weights, var_params

    def init_var_params(D, rs=npr.RandomState(0), **kwargs):
        log_weights = np.ones(k)
        component_weights = [
            init_component_var_params(D, rs=rs, **kwargs) for i in range(k)
        ]
        return np.concatenate([log_weights] + component_weights)

    def sample(var_mixture_params, num_samples, rs):
        """Sample locations aren't a continuous function of parameters
        due to multinomial sampling."""
        log_weights, var_params = unpack_mixture_params(var_mixture_params)
        samples = np.concatenate([
            component_sample(params_k, num_samples, rs)[:, np.newaxis, :]
            for params_k in var_params
        ],
                                 axis=1)
        ixs = np.random.choice(k, size=num_samples, p=np.exp(log_weights))
        return np.array([samples[i, ix, :] for i, ix in enumerate(ixs)])

    def mixture_log_density(var_mixture_params, x):
        """Returns a weighted average over component densities."""
        log_weights, var_params = unpack_mixture_params(var_mixture_params)
        component_log_densities = np.vstack(
            [component_log_density(params_k, x) for params_k in var_params]).T

        # print ((component_log_densities).shape)
        # print ((component_log_densities + log_weights).shape)

        # fasdfa
        # print (logsumexp(component_log_densities + log_weights, axis=1, keepdims=False).shape)
        # fsafda

        return logsumexp(component_log_densities + log_weights,
                         axis=1,
                         keepdims=False)  #over clusters

    def mixture_elbo(var_mixture_params, t):
        # We need to only sample the continuous component parameters,
        # and integrate over the discrete component choice

        def mixture_lower_bound(params):
            """Provides a stochastic estimate of the variational lower bound."""
            samples = component_sample(params, num_samples, rs)
            log_qs = mixture_log_density(var_mixture_params, samples)

            log_ps = logprob(samples, t)

            log_ps = np.reshape(log_ps, (num_samples, -1))
            log_qs = np.reshape(log_qs, (num_samples, -1))

            return np.mean(log_ps - log_qs)  #over samples

            # log_w = log_ps - log_qs
            # elbo = logmeanexp(log_w)

            # return elbo

        log_weights, var_params = unpack_mixture_params(var_mixture_params)
        component_elbos = np.stack(
            [mixture_lower_bound(params_k) for params_k in var_params])

        # print (component_elbos.shape)
        # print (log_weights.shape)
        # fasdf

        # return np.sum(component_elbos + log_weights) #over clusters
        return np.sum(component_elbos * np.exp(log_weights))

    return init_var_params, mixture_elbo, mixture_log_density, sample
def train_nn(pred_fun,
             loss_fun,
             num_weights,
             train_aa,
             train_raw_targets,
             train_params,
             seed=0,
             validation_aa=None,
             validation_raw_targets=None):
    """loss_fun has inputs (weights, smiles, targets)"""
    print "Total number of weights in the network:", num_weights
    init_weights = npr.RandomState(seed).randn(
        num_weights) * train_params['init_scale']

    num_print_examples = 32
    train_targets, undo_norm = normalize_array(train_raw_targets)
    training_curve = [[], [], []]  # Test error, Val error

    def callback(weights, iter):
        if iter % 10 == 0:
            print "max of weights", np.max(np.abs(weights))
            selection = npr.choice(train_aa.size, size=num_print_examples)
            train_preds = undo_norm(pred_fun(weights, train_aa[selection]))
            cur_loss = loss_fun(weights, train_aa[selection],
                                train_targets[selection])
            #            train_preds = undo_norm(pred_fun(weights, train_aa[:num_print_examples]))
            #            cur_loss = loss_fun(weights, train_aa[:num_print_examples], train_targets[:num_print_examples])
            training_curve[0].append(cur_loss)
            train_RMSE = rmse(train_preds, train_raw_targets[selection])
            #            train_RMSE = rmse(train_preds, train_raw_targets[:num_print_examples])
            training_curve[1].append(train_RMSE)
            print "Iteration", iter, "loss", cur_loss,\
                  "train RMSE", train_RMSE,
            if validation_aa is not None:
                selection = npr.choice(validation_aa.size,
                                       size=num_print_examples)
                validation_preds = undo_norm(
                    pred_fun(weights, validation_aa[selection]))
                val_RMSE = rmse(validation_preds,
                                validation_raw_targets[selection])
                training_curve[2].append(val_RMSE)
                print "Validation RMSE", iter, ":", val_RMSE,

    # Build gradient using autograd.
    grad_fun = grad(loss_fun)
    grad_fun_with_data = build_batched_grad(grad_fun,
                                            train_params['batch_size'],
                                            train_aa, train_targets)

    # Optimize weights.
    trained_weights = adam(grad_fun_with_data,
                           init_weights,
                           callback=callback,
                           num_iters=train_params['num_iters'],
                           step_size=train_params['step_size'])

    def predict_func(new_aa):
        """Returns to the original units that the raw targets were in."""
        return undo_norm(pred_fun(trained_weights, new_aa))

    return predict_func, trained_weights, training_curve
예제 #27
0
def init_net_params(layer_sizes, scale=0.1, rs=npr.RandomState(0)):
    return [(scale * rs.randn(m, n), scale * rs.randn(n))
            for m, n in zip(layer_sizes[:-1], layer_sizes[1:])]
    combined_init_params = (init_gen_params, init_rec_params)
    # print(np.array(init_gen_params).shape)
    # print(np.array(init_rec_params).shape)

    print("Loading training data...")
    X = bimodal_data()
    print(X.shape)

    num_batches = int(np.ceil(len(X) / batch_size))

    def batch_indices(iter):
        idx = iter % num_batches
        return slice(idx * batch_size, (idx + 1) * batch_size)

    # Define training objective
    seed = npr.RandomState(0)

    def objective(combined_params, iter):
        data_idx = batch_indices(iter)
        gen_params, rec_params = combined_params
        return -vae_lower_bound(gen_params, rec_params, X[data_idx],
                                seed) / data_dim

    # Get gradients of objective using autograd.
    objective_grad = grad(objective)

    #TO VIZUALIZE
    # plt.ion()
    # plt.show(block=False)

    print(
        plot_xs = np.reshape(np.linspace(-5, 5, 300), (300, 1))
        pred_mean, pred_cov = combined_predict_fun(params, X, y, plot_xs)
        plot_gp(ax_end_to_end, X, y, pred_mean, pred_cov, plot_xs)
        ax_end_to_end.set_title("X to y")

        layer1_params, layer2_params, hiddens = unpack_all_params(params)
        h_star_mean, h_star_cov = predict_layer_funcs[0](
            layer1_params, X, hiddens, plot_xs)
        y_star_mean, y_star_cov = predict_layer_funcs[0](
            layer2_params, np.atleast_2d(hiddens).T, y, plot_xs)

        plot_gp(ax_x_to_h, X, hiddens, h_star_mean, h_star_cov, plot_xs)
        ax_x_to_h.set_title("X to hiddens")

        plot_gp(ax_h_to_y, np.atleast_2d(hiddens).T,
                y, y_star_mean, y_star_cov, plot_xs)
        ax_h_to_y.set_title("hiddens to y")

        plt.draw()
        plt.pause(1.0 / 60.0)

    # Initialize covariance parameters and hiddens.
    rs = npr.RandomState(0)
    init_params = 0.1 * rs.randn(total_num_params)

    print("Optimizing covariance parameters...")
    def objective(params): return -log_marginal_likelihood(params)
    cov_params = minimize(value_and_grad(objective), init_params, jac=True,
                          method='CG', callback=callback)
    plt.pause(10.0)
예제 #30
0
def fit_nn_reg(X,
               y,
               hidden_layer_sizes,
               batch_size,
               epochs,
               X_test,
               y_test,
               mean_y_train=0.0,
               std_y_train=1.0,
               nonln='relu',
               weight_prior_std=1.0,
               noise_var=0.1,
               plot_toy=False):

    layer_sizes = np.array([X.shape[1]] + hidden_layer_sizes + [1])
    if nonln == 'tanh':
        nonlinearity = np.tanh
    elif nonln == 'relu':
        nonlinearity = lambda x: np.maximum(x, 0.0)
    elif nonln == 'rbf':
        nonlinearity = lambda x: norm.pdf(x, 0, 1)
    elif nonln == 'sin':
        nonlinearity = lambda x: np.sin(x)
    elif nonln == 'sigmoid':
        nonlinearity = lambda x: 1 / (1 + np.exp(-x))


    num_weights, predictions, logprob, get_error \
        = make_nn_funs(layer_sizes, nonlinearity=nonlinearity, weight_prior_std=weight_prior_std, noise_var=noise_var)
    logprob_grad = grad(logprob)
    Ntrain = X.shape[0]

    print("    Epoch      |   train RMSE   |   test RMSE")

    if plot_toy:
        # Set up figure.
        fig = plt.figure(figsize=(12, 8), facecolor='white')
        ax = fig.add_subplot(111, frameon=True)
        plt.show(block=False)

    def print_perf(epoch, w):
        rmse_train = get_error(w, X, y, location=0.0, scale=1.0)
        rmse_test = get_error(w, X_test, y_test, location=0.0, scale=1.0)
        print("{0:15}|{1:15}|{2:15}|".format(epoch, rmse_train, rmse_test))

        if plot_toy:
            # Plot data and functions.
            plt.cla()
            ax.plot(X.ravel(), y.ravel(), 'bx')
            plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300, 1))
            outputs = predictions(w, plot_inputs)
            ax.plot(plot_inputs, outputs)
            ax.set_ylim([-1, 1])
            plt.draw()
            plt.pause(1.0 / 60.0)

    # Train with adam
    batch_idxs = make_batches(X.shape[0], batch_size)

    # Initialize parameters
    rs = npr.RandomState(0)
    init_weights = 0.1 * rs.randn(num_weights)
    w = init_weights
    N_test = X_test.shape[0]

    m1 = 0
    m2 = 0
    beta1 = 0.9
    beta2 = 0.999
    epsilon = 1e-8
    alpha = 1e-2
    t = 0
    log_prob_vec = []
    for epoch in range(epochs):
        permutation = np.random.choice(range(X.shape[0]),
                                       X.shape[0],
                                       replace=False)
        print_perf(epoch, w)
        for idxs in batch_idxs:
            t += 1
            lp = logprob(w, X[permutation[idxs]], y[permutation[idxs]],
                         X.shape[0])
            log_prob_vec.append(lp)
            grad_w = logprob_grad(w, X[permutation[idxs]],
                                  y[permutation[idxs]], X.shape[0])
            m1 = beta1 * m1 + (1 - beta1) * grad_w
            m2 = beta2 * m2 + (1 - beta2) * grad_w**2
            m1_hat = m1 / (1 - beta1**t)
            m2_hat = m2 / (1 - beta2**t)
            w += alpha * m1_hat / (np.sqrt(m2_hat) + epsilon)
            t += 1

    return w, np.array(log_prob_vec)