Example #1
0
    def fit(self, x_train, y_train, params, reg_param=None):
        ''' Wrapper for MLE through gradient descent '''
        assert x_train.shape[0] == self.params['D_in']
        assert y_train.shape[0] == self.params['D_out']

        ### make objective function for training
        self.objective, self.gradient = self.make_objective(x_train, y_train, reg_param)

        ### set up optimization
        step_size = 0.01
        max_iteration = 5000
        check_point = 100
        weights_init = self.weights.reshape((1, -1))
        mass = None
        optimizer = 'adam'
        random_restarts = 5

        if 'step_size' in params.keys():
            step_size = params['step_size']
        if 'max_iteration' in params.keys():
            max_iteration = params['max_iteration']
        if 'check_point' in params.keys():
            self.check_point = params['check_point']
        if 'init' in params.keys():
            weights_init = params['init']
        if 'call_back' in params.keys():
            call_back = params['call_back']
        if 'mass' in params.keys():
            mass = params['mass']
        if 'optimizer' in params.keys():
            optimizer = params['optimizer']
        if 'random_restarts' in params.keys():
            random_restarts = params['random_restarts']

        def call_back(weights, iteration, g):
            ''' Actions per optimization step '''
            objective = self.objective(weights, iteration)
            self.objective_trace = np.vstack((self.objective_trace, objective))
            self.weight_trace = np.vstack((self.weight_trace, weights))
            if iteration % check_point == 0:
                mag = np.linalg.norm(self.gradient(weights, iteration))
                # print("Iteration {} lower bound {}; gradient mag: {}".format(iteration, objective, mag))

        ### train with random restarts
        optimal_obj = 1e16
        optimal_weights = self.weights

        for i in range(random_restarts):
            if optimizer == 'adam':
                adam(self.gradient, weights_init, step_size=step_size, num_iters=max_iteration,
                     callback=call_back)
            local_opt = np.min(self.objective_trace[-100:])

            if local_opt < optimal_obj:
                opt_index = np.argmin(self.objective_trace[-100:])
                self.weights = self.weight_trace[-100:][opt_index].reshape((1, -1))
            weights_init = self.random.normal(0, 1, size=(1, self.D))

        self.objective_trace = self.objective_trace[1:]
        self.weight_trace = self.weight_trace[1:]
Example #2
0
    def fit(self, step_size=1e-2, max_iteration=5000, check_point=None, params_init=None, call_back=None, verbose=True, optimizer='adam', mass=None, reset=True):
        ''' Optimization of the variational objective '''
        if check_point is not None:
            self.check_point = check_point

        if params_init is None:
            mean_init = self.random.normal(0, 0.1, size=self.D)
            parametrized_var_init = self.random.normal(0, 0.1, size=self.D)
            params_init = np.concatenate([mean_init, parametrized_var_init])

        assert len(params_init) == 2 * self.D

        self.verbose = verbose

        if call_back is None:
            call_back = self.call_back

        if reset:
            self.ELBO = np.empty((1, 1))
            self.variational_params = np.empty((1, 2 * self.D))

        if optimizer == 'adam':
            adam(self.gradient, params_init, step_size=step_size, num_iters=max_iteration, callback=call_back)
        elif optimizer == 'sgd':
            if mass is None:
                mass = 1e-16
            sgd(self.gradient, params_init, step_size=step_size, num_iters=max_iteration, callback=call_back, mass=mass)
        elif optimizer == 'debug':
            params = params_init
            for i in range(max_iteration):
                params -= step_size * self.gradient(params, i)
                self.debug_call_back(params, i)

        self.variational_params = self.variational_params[1:]
        self.ELBO = self.ELBO[1:]
Example #3
0
    def find_minimum(self, current_x, constant, target, initial_guess, n_timesteps, n_steps, mode = 'MPC'):
        self.current_x = current_x
        self.target_x = target
        self.time = n_timesteps
        if mode == 'MPC':
            param_vec = constant
            new_Cin = adam(self.MPC_grad_wrapper, Cin, num_iters = n_steps, step_size = 0.01)
            return new_Cin

        elif mode == 'param_est':
            self.Cin = constant
            param_vec = initial_guess
            new_param_vec = adam(self.param_est_grad_wrapper, param_vec, num_iters = n_steps)
            return new_param_vec
Example #4
0
    def train(self, n_iters=100, n_mc_samples=200, callback=None):
        def discriminator_loss(params, x_p, x_q):
            logit_p = sigmoid(self.discriminator.predict(params, x_p))
            logit_q = sigmoid(self.discriminator.predict(params, x_q))
            loss = agnp.mean(agnp.log(logit_q)) + agnp.mean(
                agnp.log(1 - logit_p))
            return loss

        grad_discriminator_loss = autograd.elementwise_grad(discriminator_loss)

        # Train the generator, fixing the discriminator
        def generator_loss(params, z):
            og = self.generator.predict(params, z)[0, :, :]
            ratio = self.discriminator.predict(self.discriminator.get_params(),
                                               og)
            preds = sigmoid(ratio)
            op_preds = 1 - preds
            ll = agnp.mean(ratio) - agnp.mean(self.model.log_prob(og))
            return ll

        grad_generator_loss = autograd.elementwise_grad(generator_loss)

        for i in range(n_iters):
            print("Iteration %d " % (i + 1))
            # Fix the generator, train the discriminator
            # Sample random generator samples
            z = agnp.random.uniform(-10, 10, size=(n_mc_samples, 20))

            # Samples from the prior
            prior_samples = agnp.random.uniform(-10,
                                                10,
                                                size=(n_mc_samples,
                                                      self.n_params))
            var_dist_samples = self.generator.predict(
                self.generator.get_params(), z)[0, :, :]

            # Requires a differentiable Discriminator
            ret = adam(
                lambda x, i: -grad_discriminator_loss(x, prior_samples,
                                                      var_dist_samples),
                self.discriminator.get_params())
            self.discriminator.set_params(ret)

            # Requires a differentiable Generator
            ret = adam(lambda x, i: grad_generator_loss(x, z),
                       self.generator.get_params(),
                       callback=callback)
            self.generator.set_params(ret)
Example #5
0
def train_bnn(data='expx', n_data=50, n_samples=20, arch=[1,20,1],
              prior_params=None, prior_type=None, act='rbf',
              iters=300, lr=0.01, plot=True, save=False):

    if type(data) == str:
        inputs, targets = build_toy_dataset(data=data, n_data=n_data)
    else:
        inputs, targets = data

    if plot: fig, ax = p.setup_plot()

    init_params= init_var_params(arch)

    def loss(params, t):
        return vlb_objective(params, inputs, targets, arch, n_samples, act=act,
                             prior_params=prior_params, prior_type=prior_type)



    def callback(params, t, g):
        plot_inputs = np.linspace(-10, 10, num=500)[:, None]

        f_bnn = sample_bnn(params, plot_inputs, 5, arch, act)
        #print(params[1])
        # Plot data and functions.
        p.plot_iter(ax, inputs, plot_inputs, targets, f_bnn)
        print("ITER {} | LOSS {}".format(t, -loss(params, t)))

    var_params = adam(grad(loss),init_params ,
                      step_size=lr, num_iters=iters, callback=callback)
def train_SBLVbnn(inputs, targets, dimz=1, dimx=1, dimy=1,
                arch = [20, 20], lr=0.01, iters=500, n_samples=10, act=rbf):

    arch = [dimx+dimz] + arch + [dimy]
    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    plt.ion()
    plt.show(block=False)

    def objective(params, t):
        return vlb_objective(params, inputs, targets, arch, n_samples, act)

    def callback(params, t, g):
        N_samples, nd = 5, 80
        plot_inputs = np.linspace(-8, 8, num=80)
        f_bnn = sample_bnn(params, plot_inputs[:, None], N_samples, arch, act)
        plt.cla()
        ax.plot(inputs.ravel(), targets.ravel(), 'k.')
        ax.plot(plot_inputs, f_bnn.T, color='r')
        ax.set_ylim([-5, 5])
        plt.draw()
        plt.pause(1.0 / 60.0)

        print("ITER {} | OBJ {}".format(t, -objective(params, t)))

    var_params = adam(grad(objective), init_var_params(arch, dimz),
                      step_size=lr, num_iters=iters, callback=callback)

    return var_params
Example #7
0
    def train(self, num_iters=100):

        trainable_params = self.getTrainableParamsFromCheckpoint(
            'saved_params.p')

        # Callback to run the test set and update the next graph
        def callback(full_params, i, g):

            if (i and i % 100 == 0):
                self.saveParamsToCheckpoint(full_params)

            # Every 500 steps, run the algorithm on the test set
            if (i % 500 == 0):
                true_labels, predicted_labels = [], []
                for graph_and_fbs in self.test_set:
                    if (np.random.random() < 0.3):
                        continue
                    true_labels.append(graph_and_fbs[0].inheritancePattern)
                    predicted_labels.append(
                        self.predict(full_params, graph_and_fbs, 1))

                # Print the confusion matrix and kappa score on the test set
                self.printMetrics(true_labels, predicted_labels)

            # Swap out the current graph and update the current label
            self.updateCurrentGraphAndLabel(training=True)

        # Optimize
        grads = grad(self.fullLoss)
        final_params = adam(grads,
                            trainable_params,
                            num_iters=num_iters,
                            callback=callback)
        return final_params
Example #8
0
    def train(self,
              n_mc_samples,
              n_elbo_samples=20,
              step_size=0.01,
              num_iters=1000,
              verbose=False,
              callback=None):
        def variational_objective(params, var_it, n_mc_samples=n_mc_samples):
            samples = self.v_dist.sample(params, n_mc_samples)
            elbo = self.v_dist.entropy(params) + agnp.mean(
                self.model.log_prob(samples))
            return -elbo

        if verbose:

            def cb(params, i, g):
                print("Negative ELBO: %f" % variational_objective(
                    params, i, n_mc_samples=n_elbo_samples))
                if callback is not None:
                    callback(params, i, g)
        else:
            cb = callback

        grad_elbo = autograd.elementwise_grad(variational_objective)
        ret = adam(lambda x, i: grad_elbo(x, i),
                   self.v_dist.get_params(),
                   step_size=step_size,
                   num_iters=num_iters,
                   callback=cb)
        self.v_dist.set_params(ret)
        return ret
Example #9
0
File: nn_ag.py Project: pnickl/reg
    def fit(self,
            target,
            input,
            nb_epochs=500,
            batch_size=16,
            lr=1e-3,
            verbose=True):

        nb_batches = int(np.ceil(len(input) / batch_size))

        def batch_indices(iter):
            idx = iter % nb_batches
            return slice(idx * batch_size, (idx + 1) * batch_size)

        def _objective(params, iter):
            self.params = params
            idx = batch_indices(iter)
            return self.cost(target[idx], input[idx])

        def _callback(params, iter, grad):
            if iter % (nb_batches * 10) == 0:
                self.params = params
                if verbose:
                    print('Epoch: {}/{}.............'.format(
                        iter // nb_batches, nb_epochs),
                          end=' ')
                    print("Loss: {:.4f}".format(self.cost(target, input)))

        _gradient = grad(_objective)

        self.params = adam(_gradient,
                           self.params,
                           step_size=lr,
                           num_iters=nb_epochs * nb_batches,
                           callback=_callback)
Example #10
0
    def train(self, train_x, train_y, iters):

        self.train_x = train_x
        self.train_y = train_y

        self.train_loss = []
        self.pbar = tqdm(total=iters, desc='Optimising parameters')

        init_params = self.params
        # Optimisation via Autograd's implementation of Adam
        optimised_params = adam(grad(self.objective_train),
                                init_params,
                                step_size=0.01,
                                num_iters=iters,
                                callback=self.callback)

        self.params = optimised_params
        self.pbar.close()

        # Plot evolution of training loss
        means = []
        for i in range(iters):
            if i == 0:
                means.append(self.train_loss[i])
            else:
                mean = ((means[(i - 1)] * i) + self.train_loss[i]) / (i + 1)
                means.append(mean)

        plt.plot(self.train_loss, label='SE Loss')
        plt.plot(means, c='r', linewidth=3, label='Averge SE Loss')
        plt.title("Training Error")
        plt.legend()
        plt.show()

        return
Example #11
0
    def trainMarginal(self, num_iters=100):

        params = {}
        for group, dist in self.params.emission_dists.items():
            params[group] = dist.recognizer_params

        emission_grads = grad(self.marginalLoss)

        def callback(x, i, g):
            if (i % 25 == 0):
                print('i', i)

        gs = emission_grads(params)

        opt_params = adam(emission_grads,
                          params,
                          num_iters=num_iters,
                          callback=callback)

        # Update the model parameters
        for group in self.params.emission_dists.keys():
            self.params.emission_dists[group].recognizer_params = opt_params[
                group]

        return opt_params
Example #12
0
    def trainSVAE(self, num_iters=100):

        svae_params = ({}, {})
        for group, dist in self.params.emission_dists.items():
            svae_params[0][group] = dist.recognizer_params
            svae_params[1][group] = dist.generative_hyper_params

        emission_grads = grad(self.svaeLoss)

        def callback(x, i, g):
            if (i % 25 == 0):
                print('i', i)

        opt_params = adam(emission_grads,
                          svae_params,
                          num_iters=num_iters,
                          callback=callback)

        # Update the model parameters
        for group in self.params.emission_dists.keys():
            self.params.emission_dists[group].recognizer_params = opt_params[
                0][group]
            self.params.emission_dists[
                group].generative_hyper_params = opt_params[1][group]

        return opt_params
def train_bnn(inputs, targets, arch = [1, 20, 20, 1], lr=0.01, iters=50, n_samples=10, act=np.tanh):

    fig = plt.figure(facecolor='white')
    ax = fig.add_subplot(111)
    plt.ion()
    plt.show(block=False)


    def objective(params,t):
        return vlb_objective(params, inputs, targets, arch, n_samples, act)

    def callback(params, t, g):
        # Sample functions from posterior f ~ p(f|phi) or p(f|varphi)
        N_samples, nd = 5, 400
        plot_inputs = np.linspace(-8, 8, num=400)
        f_bnn = sample_bnn(params, plot_inputs[:,None], N_samples, arch, act)

        plt.cla()
        ax.plot(inputs.ravel(), targets.ravel(), 'k.')
        ax.plot(plot_inputs, f_bnn.T, color='r')
        ax.set_ylim([-5, 5])
        plt.draw()
        plt.pause(1.0 / 60.0)

        print("ITER {} | OBJ {}".format(t, -objective(params, t)))

    var_params = adam(grad(objective), init_var_params(arch),
                      step_size=lr, num_iters=iters, callback=callback)

    return var_params
    def fit(self,
            X,
            y,
            batch_size=5,
            n_iter=10000,
            lr=0.001,
            lr_type='constant'):
        X = np.array(X).astype(np.float32)
        y = np.array(y).reshape(-1, 1).astype(np.float32)

        m, n = X.shape

        epochs = ceil(n_iter / floor(m / batch_size))

        # print(epochs)

        objective_grad = grad(self.objective)

        for i in range(epochs):
            for j in range(0, m, batch_size):

                self.X_batch = X[j:j + batch_size]
                self.y_batch = y[j:j + batch_size]

                step_size = lr

                self.params = adam(objective_grad,
                                   self.params,
                                   step_size=step_size,
                                   num_iters=1)

            self.X_batch = X
            self.y_batch = y
Example #15
0
    def fit(self, X, y):
        def objective(weights, iteration):
            # The sum of squared errors
            squared_error = (y - self.predict(X, weights))**2
            return np.sum(squared_error)

        def callback(weights, iteration, g):
            it = iteration + 1
            if it % self.checkpoint == 0 or it in {1, self.num_iters}:
                obj = objective(weights, iteration)
                padding = int(np.log10(self.num_iters) + 1)
                print(
                    f"[Iteration {it:{padding}d}] Sum of squared errors: {obj:.6f}"
                )

        # Ensure that X is two-dimensional
        X = np.asarray(X).reshape(-1, 1)
        y = np.asarray(y)

        # Reinitialize the weights vector
        weights_init = self.random.normal(size=self.n_weights)

        # Run optimizatio
        self.weights = adam(
            grad(objective),
            weights_init,
            num_iters=self.num_iters,
            step_size=self.step_size,
            callback=callback,
        )
def adam_solve(lambda_flows,
               grad_energy_bound,
               samples,
               u_func,
               h,
               m=1000,
               step_size=0.001):
    '''
        Uses adam solver to optimize the energy bound
    '''
    output = np.copy(
        lambda_flows)  # Copies so original parameters are not modified
    print("BEFORE LEARNING:\n{}".format(output))
    grad_energy_bound = autograd.grad(
        energy_bound)  # Autograd gradient of energy bound
    g_eb = lambda lambda_flows, i: grad_energy_bound(
        lambda_flows,
        samples,
        h,
        u_func,
        #beta= (0.1 + i/1000))
        beta=min(1, 0.01 + i / 10000))  # Annealing
    output = adam(g_eb,
                  output,
                  num_iters=m,
                  callback=callback,
                  step_size=step_size)
    print("AFTER LEARNING:\n{}".format(output))

    # Resample and flow a larger number of samples to better show fit
    samples = np.random.randn(20000)[:, np.newaxis]
    samples_flowed = flow_samples(output, samples, h)
    np.savetxt("./linear_plots/flow_params.txt", output)
    return samples_flowed
Example #17
0
def fit_nn(x, y, arch):
    def nll(weights, t):
        return map_objective(weights, x, y)

    return adam(grad(nll),
                init_random_params(arch),
                step_size=0.05,
                num_iters=500)
Example #18
0
 def pack(self):
     print("   Iter  |    Ball radius        |     Density  ")
     self.logits = adam(grad(
         lambda logits, i: -1 * self.ball_radius(self.box_warp(logits), i)),
                        self.logits,
                        num_iters=self.n_iters,
                        callback=self.print_status)
     # one more print at final iteration
     self.print_status(i=self.n_iters)
    def fit(self, X, method="exact"):
        '''
        function: fit
        Description: Fit the model to the data in X. method can either be  "exact" 
        for standard maximum likelihood learning using the exact marginal log likelihood,
        or "bbsvl" for black-box stochastic variational learning using diagonal
        Gaussian variational posteriors. The optimized W and Psi parameters should be stored
        in member variables W and Psi after learning. 
            
        Inputs:
            X -    (np.array) Data matrix. Shape (N,D)
        Outputs:   None
        '''
        K = self.K
        D = self.D

        gamma = np.log(np.diag(np.cov(X.T)))
        #gamma = np.random.randn(D)*1e-5
        N, _ = X.shape
        W = np.random.randn(K, D) * 1e-5

        if method == "exact":
            #gamma = np.log(np.diag(np.cov(X.T)))
            init_params = np.concatenate((gamma.flatten(), W.flatten()))
            fprime = (self.marginal_likelihood_wrapper(init_params, X))
            learnt_params = adam(fprime, init_params)
            self.W = learnt_params[D:].reshape(K, D)
            self.Psi = np.diag(np.exp(learnt_params[:self.D]))
        elif method == "bbsvl":
            #gamma = np.log(np.diag(np.cov(X.T)))
            mus = np.random.randn(X.shape[0], X.shape[1]) / 100
            stds = np.random.randn(X.shape[0], X.shape[1]) * 1e-5
            init_var_params = np.concatenate(
                (gamma.flatten(), W.flatten(), mus.flatten(), stds.flatten()))
            fprime = (self.svl_wrapper(X, init_var_params))
            learnt_params = adam(fprime, init_var_params)
            self.W = learnt_params[D:(D * K + D)].reshape((K, D))
            self.Psi = np.diag(np.exp(learnt_params[:D].reshape(D)))
            self.mus = learnt_params[D * (K + 1):D * (K + 1) + N * K].reshape(
                (N, K))
            self.stds = np.exp(learnt_params[-(N * K):].reshape((N, K)))
        else:
            print 'invalid method'
        pass
def variational_inference(Sigma_W, y_train, x_train, S, max_iteration,
                          step_size, verbose):
    '''implements wrapper for variational inference via bbb for bayesian regression'''
    D = Sigma_W.shape[0]
    Sigma_W_inv = np.linalg.inv(Sigma_W)
    Sigma_W_det = np.linalg.det(Sigma_W)
    variational_dim = D

    # define the log prior on the model parameters
    def log_prior(W):
        constant_W = -0.5 * (D * np.log(2 * np.pi) + np.log(Sigma_W_det))
        exponential_W = -0.5 * np.diag(np.dot(np.dot(W, Sigma_W_inv), W.T))
        log_p_W = constant_W + exponential_W
        return log_p_W

    # define the log likelihood
    def log_lklhd(W):
        log_odds = np.matmul(W, x_train) + 10
        p = 1 / (1 + np.exp(-log_odds))
        log_likelihood = y_train * np.log(p)
        return log_likelihood

    # define the log joint density
    log_density = lambda w, t: log_lklhd(w) + log_prior(w)

    # build variational objective.
    objective, gradient, unpack_params = black_box_variational_inference(
        log_density, D, num_samples=S)

    def callback(params, t, g):
        if verbose:
            if verbose:
                if t % 10 == 0:
                    var_means = params[:D]
                    var_variance = np.diag(np.exp(params[D:])**2)
                    print(
                        "Iteration {} lower bound {}; gradient mag: {}".format(
                            t, -objective(params, t),
                            np.linalg.norm(gradient(params, t))))
                    print('Variational Mean: ', var_means)
                    print('Variational Variances: ', var_variance)

    print("Optimizing variational parameters...")
    # initialize variational parameters
    init_mean = 0 * np.ones(D)
    init_log_std = -1 * np.ones(D)
    init_var_params = np.concatenate([init_mean, init_log_std])

    # perform gradient descent using adam (a type of gradient-based optimizer)
    variational_params = adam(gradient,
                              init_var_params,
                              step_size=step_size,
                              num_iters=max_iteration,
                              callback=callback)

    return variational_params
def adam_solve(lambda_flows, grad_energy_bound, samples, u_func, h, m=1000, step_size=0.001,
               bnn=False):
    '''
        Uses adam solver to optimize the energy bound
    '''
    output = np.copy(lambda_flows) # Copies to avoid changing initial conditions
    print("BEFORE LEARNING:\n{}".format(output))
    grad_energy_bound = autograd.grad(energy_bound)  # Autograd gradient of energy
    g_eb = lambda lambda_flows, i: grad_energy_bound(lambda_flows, samples, h, u_func, 
                                                     #beta= (0.1 + i/1000))
                                                     beta=min(2, i/1000), # Annealing
                                                     #beta=min(1, 0.01+i/10000),
                                                     bnn=bnn) # Annealing
    output = adam(g_eb, output, num_iters=m, callback=callback, step_size=step_size)
    print("\nAFTER LEARNING:\n{}".format(output))

    #samples = np.random.randn(30000)[:,np.newaxis] # Plot with more samples for better clarity
    q_0_mu = np.array([0,0])
    q_0_sigma = 1
    D = q_0_mu.shape[0]
    #samples = np.random.multivariate_normal(q_0_mu, q_0_sigma*np.eye(D), 20000)

    samples_flowed = flow_samples(output, samples, h)
    #np.savetxt("./data_fit_1d/flow_params.txt", output)
    np.savetxt("./nn_fit/flow_params.txt", output)
    if(bnn):
        np.savetxt("./nn_fit/energy_bound.txt", e_bound)
        fig, ax = plt.subplots()
        ax.plot(e_bound)
        ax.set(title="Energy Bound")
        plt.savefig("./nn_fit/energy_bound.png")
        plt.close()

        np.savetxt("./nn_fit/joint_probs.txt", joint_probs)
        fig, ax = plt.subplots()
        ax.plot(joint_probs)
        ax.set(title="Joint Probability")
        plt.savefig("./nn_fit/joint_probs.png")
        plt.close()

        np.savetxt("./nn_fit/flow_probs.txt", flow_probs)
        fig, ax = plt.subplots()
        ax.plot(flow_probs)
        ax.set(title="Flow Probs")
        plt.savefig("./nn_fit/flow_probs.png")
        plt.close()

        np.savetxt("./nn_fit/grad_norms.txt", grad_norms)
        fig, ax = plt.subplots()
        ax.plot(grad_norms)
        ax.set(title="Gradient Norms")
        plt.savefig("./nn_fit/grad_norms.png")
        plt.close()


    return samples_flowed
Example #22
0
def run():
    # train and save the neural network
    global inputs, targets, training_error

    training_error = []

    # max number of iterations in optimization
    num_iters = 100

    N = 100 # Number of uniformly sampled trajectories in training data set.

    # sample training data
    # x_traj, y_traj, index = randomsample(N)
    x_traj, y_traj, index = shufflesample(N*10, sampling_rate = 0.1)

    # normalize the training data
    x_scaler = MinMaxScaler((-1,1))
    x_scaler.fit(x_traj)
    y_scaler = MinMaxScaler((-1,1))
    y_scaler.fit(y_traj)

    x_traj_scale = x_scaler.transform(x_traj)
    y_traj_scale = y_scaler.transform(y_traj)

    inputs = x_traj_scale
    targets = y_traj_scale

    # Decide NN architecture
    D = x_traj.shape[1]
    G = 20

    init_weights = initialize_weights(G, D)

    print('----------  Optimizing KOOPMAN NEURAL NET for {} iterations ..... \n'.format(num_iters))
    # use adam to optimize
    opt_weights = adam(grad(objective), init_weights, step_size=0.01, num_iters = num_iters, callback=callback)

    # use sgd to optimize
    # opt_weights = sgd(grad(objective), init_weights, step_size=0.1, num_iters = num_iters, callback=callback)

    print('done')

    # save the optimal weights and related parameters
    np.savez('data/sample_1/optweights_tanh_minmax_random1000shuffle_G20_layer2_sgd_2.npz', optweights = opt_weights, x_scaler = x_scaler, y_scaler = y_scaler, index = index, training_error = training_error)

    # Pick a trajectory and check the prediction of the nn on this trajectory

    x_traj_test, y_traj_test = sample_multitraj(6350, 6351)
    inputs = x_scaler.transform(x_traj_test)
    targets = y_scaler.transform(y_traj_test)
    outputs = nn_encode_foward_decode(opt_weights, inputs)
    re = np.mean([np.linalg.norm(targets[i] - outputs[i]) / np.linalg.norm(targets[i]) for i in range(len(targets))])
    print('Relative training norm error {:+1.4e}'.format(re))

    figplot(outputs, url = None )
Example #23
0
    def partial_fit_base(self, X, y):
        check_is_fitted(self, "base_model_")

        batch_indices = generate_batch(
            X, self.autograd_config.get("batch_size", 32))

        esp = 1e-11  # where should this live?
        step_size = self.autograd_config.get("step_size", 0.05)
        callback = (None if self.autograd_config.get("verbose", False) else
                    simple_callback)
        num_iters = self.autograd_config.get("num_iters", 1000)

        nclass = self.n_classes_
        model_dump = self.base_model_.booster_.dump_model()
        trees_ = [m["tree_structure"] for m in model_dump["tree_info"]]

        trees_params = multi_tree_to_param(X, y, trees_)
        model_ = gbm_gen(trees_params[0], X, trees_params[2], trees_params[1],
                         False, 2)

        def training_loss(weights, idx=0):
            # Training loss is the negative log-likelihood of the training labels.
            t_idx_ = batch_indices(idx)
            preds = sigmoid(model_(weights, X[t_idx_, :]))
            label_probabilities = preds * y[t_idx_] + (1 - preds) * (1 -
                                                                     y[t_idx_])
            # print(label_probabilities)
            loglik = -np.sum(np.log(label_probabilities))

            num_unpack = 3
            reg = 0
            # reg_l1 = np.sum(np.abs(flattened)) * 1.
            for idx_ in range(0, len(weights), num_unpack):
                param_temp_ = weights[idx_:idx_ + num_unpack]
                flattened, _ = weights_flatten(param_temp_[:2])
                reg_l1 = np.sum(np.abs(flattened)) * 1.0
                reg += reg_l1
            return loglik + reg

        training_gradient_fun = grad(training_loss)
        param_ = adam(
            training_gradient_fun,
            trees_params[0],
            callback=callback,
            step_size=step_size,
            num_iters=num_iters,
        )

        self.base_param_ = copy.deepcopy(trees_params)
        self.partial_param_ = param_
        self.is_partial = True
        return self
Example #24
0
 def train(self,
           step_size=0.01,
           num_iters=1000,
           verbose=False,
           callback=None):
     init = self.model.get_params()
     final_params = adam(lambda x, _: -self.grad(x),
                         init,
                         step_size=step_size,
                         num_iters=num_iters,
                         callback=callback)
     self.model.set_params(final_params.reshape(init.shape))
     return self.model
    def run(self):
        L2_reg = self.L2_reg
        activations = self.activations
        step_size = self.step_size
        y_type = self.y_type
        loss_type = self.loss_type

        # Initial neural net parameters
        init_params = initialize_parameters(self.layer_sizes, var=self.w_var)

        print("Loading training data...")
        X_train, X_test, y_train, y_test = load_data(self.y_type)
        self.store(X_train, X_test, y_train, y_test)
        self.Coordinates = Coordinates(
            np.concatenate((y_train, y_test), axis=0))
        num_batches = int(ceil(X_train.shape[0] / BATCH_SIZE))

        def batch_indices(iter):
            if iter % num_batches == 0:
                # Shuffle the data
                X_train, X_test, y_train, y_test = load_data(self.y_type)
                self.store(X_train, X_test, y_train, y_test)
            idx = iter % num_batches
            return slice(idx * BATCH_SIZE, (idx + 1) * BATCH_SIZE)

        def objective(parameters, iter):
            idx = batch_indices(iter)
            return loss(parameters, X_train[idx], y_train[idx], L2_reg,
                        activations, y_type, loss_type)

        objective_grad = grad(objective)

        def print_perf(parameters, iter, gradient):
            if iter % num_batches == 0:
                train_acc = error(parameters, X_train, y_train, activations,
                                  y_type, loss_type)
                test_acc = error(parameters, X_test, y_test, activations,
                                 y_type, loss_type)
                reg = reg_loss(parameters, L2_reg)
                print("{:15}|{:20}|{:20}|{:20}".format(iter // num_batches,
                                                       train_acc, test_acc,
                                                       reg))

        print("Training the neural network ...")
        self.optimized_params = adam(objective_grad,
                                     init_params,
                                     step_size=step_size,
                                     num_iters=EPOCHS * num_batches,
                                     callback=print_perf)
        return self.results(self.optimized_params, activations, L2_reg,
                            X_train, X_test, y_train, y_test)
Example #26
0
    def run(self):
        self.objectPoints = self.sph.get_sphere_points()
        self.init_params = flatten_points(self.objectPoints, type='object')

        self.objective1 = lambda params: matrix_condition_number_autograd(
            params, self.cam.P, normalize=False)
        self.objective2 = lambda params, iter: matrix_condition_number_autograd(
            params, self.cam.P, normalize=True)

        print("Optimizing condition number...")
        objective_grad = grad(self.objective2)
        self.optimized_params = adam(objective_grad,
                                     self.init_params,
                                     step_size=0.001,
                                     num_iters=200,
                                     callback=self.plot_points)
Example #27
0
    def learn(self, **kwargs): 

        params = self.tet.get_params()
        optimizer = kwargs["optimizer"]

        objective_grad = grad(self.calculate_loss, argnum=0)

        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X, self.y, test_size=0.01, random_state=42)
        print("DATASET SIZE\tTrain set: {}\tValidation set: {}".format(len(self.X_train), len(self.X_val)))

        if optimizer == "adam":
            num_iters = kwargs["num_iters"]
            step_size = kwargs["step_size"]
            optimized_params = adam(objective_grad, params, step_size=step_size, num_iters=num_iters, callback=self.print_perf)
            print("BEST VALIDATION ERROR: ", self.best_v_err)
            print("BEST PARAMS: ", self.best_params)
            return optimized_params
Example #28
0
def run():

    global inputs, targets, hyper

    num_iters = 150

    # inputs, targets = build_tvb_dataset()
    inputs, targets = build_wc_dataset()

    D = inputs.shape[1]
    G = 20

    init_weights = initialize_weights(G, D)

    print('----------  Optimizing KOOPMAN NEURAL NET for {} iterations ..... \n'.format(num_iters))

    opt_weights = adam(grad(objective), init_weights, step_size=0.01, num_iters=num_iters, callback=callback)

    decoded = nn_encode_decode(opt_weights, inputs)
    outputs = nn_encode_foward_decode(opt_weights, inputs)

    plt.figure()
    _ = plt.scatter(targets, outputs, marker='D', c='g', alpha=0.1)
    plt.xlabel('targets')
    plt.ylabel('outputs')
    plt.title('Dynamic Scatter')
    plt.grid()

    plt.figure()
    _ = plt.scatter(inputs, decoded, marker='D', c='b', alpha=0.1)
    plt.xlabel('inputs')
    plt.ylabel('decoded')
    plt.title('Encoding-decoding Scatter')
    plt.grid()

    plt.figure()
    _ = plt.plot(outputs[:, 0:3], marker='x')
    _ = plt.plot(targets[:, 0:3], marker='+')

    plt.show()

    re = np.mean([np.linalg.norm(targets[i] - outputs[i]) / np.linalg.norm(targets[i]) for i in range(len(targets))])

    print('Relative norm error {:+1.4e}'.format(re))
    print('--- Finish ---')
    def infer(self, x, W=None, Psi=None, method="exact"):
        '''
        function: infer
        Description: Run inference to obtain the posterior distribution for 
        a single data case x. method can either be  "exact" for  exact
        inference, or "bbsvi" for black-box stochastic variational inference.
        Output is a tuple consisting of the posterior mean and  the posterior
        covariance matrix.
            
        Inputs:
            x -    (np.array) Data matrix. Shape (1,D)
            W -    (np.array) Factor loading matrix. Shape (K,D).
            Psi -  (np.array) Output covariance matrix. Shape (D,D). Positive, diagonal.
            method-(string) Either  "exact" or "bbsvi" 
        Outputs:  
            mu    - (np.array) Value of the exact or approximate posterior mean. Shape (1,D)
            Sigma - (np.array) Value of the exact or approximate posterior 
                    covariance matrix. Shape (D,D)
        '''
        if (W is None): W = self.W
        if (Psi is None): Psi = self.Psi
        K = self.K
        D = self.D
        if method == "exact":
            #print 'exact'
            inter = np.linalg.inv(np.dot(W.T, W) + Psi)
            mean_conditional = (np.dot(W, np.dot(inter, x.T))).T
            cov_conditional = np.identity(K) - np.dot(W, np.dot(inter, W.T))
            return mean_conditional, cov_conditional

        elif method == "bbsvi":
            #print 'bbsvi'
            init_mean = np.random.randn(1, K) / 100
            init_log_std = 1e-5 * np.ones((1, K))
            init_var_params = np.concatenate(
                (init_mean.flatten(), init_log_std.flatten()))
            gradient = self.svi_wrapper(x, init_var_params, W, Psi)
            variational_params = adam(gradient,
                                      init_var_params,
                                      num_iters=1000)
            return variational_params[:K], np.diag(
                (np.exp(variational_params[K:])**2))
        else:
            print 'invalid method'
        pass
Example #30
0
    def learn(self, **kwargs): 

        params = self.tet.get_params()
        optimizer = kwargs["optimizer"]

        objective_grad = grad(self.calculate_loss, argnum=0)

        self.X_train, self.X_val = self.create_triplets([3,5,7])

        print("DATASET SIZE - \t TRAIN: {} ex \t VALIDATION: {} ex".format(len(self.X_train), len(self.X_val)))

        print("Itr\t|\tTr Error\t|\tVal Error\t|\tParams\t|\tGradient\t")
        if optimizer == "adam":
            num_iters = kwargs["num_iters"]
            step_size = kwargs["step_size"]
            optimized_params = adam(objective_grad, params, step_size=step_size, num_iters=num_iters, callback=self.print_perf)
            print("\nBEST VALIDATION ERROR: ", self.best_v_err)
            print("BEST PARAMS: ", self.best_params)
            return optimized_params
Example #31
0
    inputs, targets = build_toy_dataset()

    def objective(weights, t):
        return -logprob(weights, inputs, targets)\
               -log_gaussian(weights, weight_prior_variance)

    print(grad(objective)(init_params, 0))

    # Set up figure.
    fig = plt.figure(figsize=(12,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.show(block=False)

    def callback(params, t, g):
        print("Iteration {} log likelihood {}".format(t, -objective(params, t)))

        # Plot data and functions.
        plt.cla()
        ax.plot(inputs.ravel(), targets.ravel(), 'bx', ms=12)
        plot_inputs = np.reshape(np.linspace(-7, 7, num=300), (300,1))
        outputs = nn_predict(params, plot_inputs)
        ax.plot(plot_inputs, outputs, 'r', lw=3)
        ax.set_ylim([-1, 1])
        plt.draw()
        plt.pause(1.0/60.0)

    print("Optimizing network parameters...")
    optimized_params = adam(grad(objective), init_params,
                            step_size=0.01, num_iters=1000, callback=callback)
Example #32
0
        ax_vecfield.cla()
        ax_vecfield.set_title('Learned Vector Field')
        ax_vecfield.set_xlabel('x')
        ax_vecfield.set_ylabel('y')
        ax_vecfield.xaxis.set_ticklabels([])
        ax_vecfield.yaxis.set_ticklabels([])

        # vector field plot
        y, x = npo.mgrid[-2:2:21j, -2:2:21j]
        dydt = nn_predict(np.stack([x, y], -1).reshape(21 * 21, 2), 0,
            params).reshape(-1, 2)
        mag = np.sqrt(dydt[:, 0]**2 + dydt[:, 1]**2).reshape(-1, 1)
        dydt = (dydt / mag)
        dydt = dydt.reshape(21, 21, 2)

        ax_vecfield.streamplot(x, y, dydt[:, :, 0], dydt[:, :, 1], color="black")
        ax_vecfield.set_xlim(-2, 2)
        ax_vecfield.set_ylim(-2, 2)

        fig.tight_layout()
        plt.draw()
        plt.pause(0.001)


    # Train neural net dynamics to match data.
    init_params = init_nn_params(0.1, layer_sizes=[D, 150, D])
    optimized_params = adam(grad(train_loss), init_params,
                            num_iters=1000, callback=callback)

Example #33
0
        zs = func(np.concatenate([np.atleast_2d(X.ravel()), np.atleast_2d(Y.ravel())]).T)
        Z = zs.reshape(X.shape)
        plt.contour(X, Y, Z)
        ax.set_yticks([])
        ax.set_xticks([])

    # Set up figure.
    fig = plt.figure(figsize=(8,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.ion()
    plt.show(block=False)

    def callback(params, t, g):
        print("Iteration {} lower bound {}".format(t, -objective(params, t)))

        plt.cla()
        target_distribution = lambda x : np.exp(log_density(x, t))
        plot_isocontours(ax, target_distribution)

        mean, log_std = unpack_params(params)
        variational_contour = lambda x: mvn.pdf(x, mean, np.diag(np.exp(2*log_std)))
        plot_isocontours(ax, variational_contour)
        plt.draw()
        plt.pause(1.0/30.0)

    print("Optimizing variational parameters...")
    init_mean    = -1 * np.ones(D)
    init_log_std = -5 * np.ones(D)
    init_var_params = np.concatenate([init_mean, init_log_std])
    variational_params = adam(gradient, init_var_params, step_size=0.1, num_iters=2000, callback=callback)
Example #34
0
    step_size = 0.001

    print("Loading training data...")
    N, train_images, train_labels, test_images,  test_labels = load_mnist()

    init_params = init_random_params(param_scale, layer_sizes)

    num_batches = int(np.ceil(len(train_images) / batch_size))
    def batch_indices(iter):
        idx = iter % num_batches
        return slice(idx * batch_size, (idx+1) * batch_size)

    # Define training objective
    def objective(params, iter):
        idx = batch_indices(iter)
        return -log_posterior(params, train_images[idx], train_labels[idx], L2_reg)

    # Get gradient of objective using autograd.
    objective_grad = grad(objective)

    print("     Epoch     |    Train accuracy  |       Test accuracy  ")
    def print_perf(params, iter, gradient):
        if iter % num_batches == 0:
            train_acc = accuracy(params, train_images, train_labels)
            test_acc  = accuracy(params, test_images, test_labels)
            print("{:15}|{:20}|{:20}".format(iter//num_batches, train_acc, test_acc))

    # The optimizers provided can optimize lists, tuples, or dicts of parameters.
    optimized_params = adam(objective_grad, init_params, step_size=step_size,
                            num_iters=num_epochs * num_batches, callback=print_perf)
            elbos.append(elbo_val)
            if t % 50 == 0:
                print("Iteration {} lower bound {}".format(t, elbo_val))

        init_mean    = -1 * np.ones(D)
        init_log_std = -5 * np.ones(D)
        init_var_params = np.concatenate([init_mean, init_log_std])
        variational_params = optfun(num_iters, init_var_params, callback)
        return np.array(elbos)

    # let's optimize this with a few different step sizes
    elbo_lists = []
    step_sizes = [.1, .25, .5]
    for step_size in step_sizes:
        # optimize with standard gradient + adam
        optfun = lambda n, init, cb: adam(gradient, init, step_size=step_size,
                                                    num_iters=n, callback=cb)
        standard_lls = optimize_and_lls(optfun)

        # optimize with natural gradient + sgd, no momentum
        optnat = lambda n, init, cb: sgd(natural_gradient, init, step_size=step_size,
                                         num_iters=n, callback=cb, mass=.001)
        natural_lls = optimize_and_lls(optnat)
        elbo_lists.append((standard_lls, natural_lls))

    # visually compare the ELBO
    plt.figure(figsize=(12,8))
    colors = ['b', 'k', 'g']
    for col, ss, (stand_lls, nat_lls) in zip(colors, step_sizes, elbo_lists):
        plt.plot(np.arange(len(stand_lls)), stand_lls,
                 '--', label="standard (adam, step-size = %2.2f)"%ss, alpha=.5, c=col)
        plt.plot(np.arange(len(nat_lls)), nat_lls, '-',
        ax.set_yticks([])
        ax.set_xticks([])

    fig = plt.figure(figsize=(8,8), facecolor='white')
    ax = fig.add_subplot(111, frameon=False)
    plt.ion()
    plt.show(block=False)

    num_plotting_samples = 51

    def callback(params, t, g):
        print("Iteration {} lower bound {}".format(t, -objective(params, t)))

        plt.cla()
        target_distribution = lambda x: np.exp(log_density(x, t))
        var_distribution    = lambda x: np.exp(variational_log_density(params, x))
        plot_isocontours(ax, target_distribution)
        plot_isocontours(ax, var_distribution, cmap=plt.cm.bone)
        ax.set_autoscale_on(False)

        rs = npr.RandomState(0)
        samples = variational_sampler(params, num_plotting_samples, rs)
        plt.plot(samples[:, 0], samples[:, 1], 'x')

        plt.draw()
        plt.pause(1.0/30.0)

    print("Optimizing variational parameters...")
    variational_params = adam(grad(objective), init_var_params(D), step_size=0.1,
                              num_iters=2000, callback=callback)
Example #37
0
File: rnn.py Project: HIPS/autograd
            training_text  = one_hot_to_string(train_inputs[:,t,:])
            predicted_text = one_hot_to_string(logprobs[:,t,:])
            print(training_text.replace('\n', ' ') + "|" +
                  predicted_text.replace('\n', ' '))

    def training_loss(params, iter):
        return -rnn_log_likelihood(params, train_inputs, train_inputs)

    def callback(weights, iter, gradient):
        if iter % 10 == 0:
            print("Iteration", iter, "Train loss:", training_loss(weights, 0))
            print_training_prediction(weights)

    # Build gradient of loss function using autograd.
    training_loss_grad = grad(training_loss)

    print("Training RNN...")
    trained_params = adam(training_loss_grad, init_params, step_size=0.1,
                          num_iters=1000, callback=callback)

    print()
    print("Generating text from RNN...")
    num_letters = 30
    for t in range(20):
        text = ""
        for i in range(num_letters):
            seqs = string_to_one_hot(text, num_chars)[:, np.newaxis, :]
            logprobs = rnn_predict(trained_params, seqs)[-1].ravel()
            text += chr(npr.choice(len(logprobs), p=np.exp(logprobs)))
        print(text)