예제 #1
0
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            rs = RandomState(
                (seed, i_hyper,
                 i_iter))  # Deterministic seed needed for backwards pass.
            idxs = rs.randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs],
                            L2_vect)

        learning_curve_dict = defaultdict(list)

        def callback(x, v, g, i_iter):
            if i_iter % thin == 0:
                learning_curve_dict['learning_curve'].append(
                    loss_fun(x, **train_data))
                learning_curve_dict['grad_norm'].append(np.linalg.norm(g))
                learning_curve_dict['weight_norm'].append(np.linalg.norm(x))
                learning_curve_dict['velocity_norm'].append(np.linalg.norm(v))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        rs = RandomState((seed, i_hyper))
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= rs.randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = np.exp(cur_hyperparams['log_L2_reg'])
        W_opt = sgd_parsed(grad(indexed_loss_fun),
                           kylist(W0, alphas, betas, L2_reg),
                           parser,
                           callback=callback)
        cur_primal_results['weights'] = getval(W_opt).copy()
        cur_primal_results['learning_curve'] = getval(learning_curve_dict)
        return W_opt, learning_curve_dict
예제 #2
0
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, meta_vect, i_iter):
            (train_data, train_labels, L2_vect) = meta
            return loss_fun(w, train_data, train_labels, L2_vect)
            #return loss_fun(w, train_data['X'], train_data['T'], L2_vect + np.sum(fake_data.ravel()))

        learning_curve_dict = defaultdict(list)

        def callback(x, v, g, i_iter):
            if i_iter % thin == 0:
                #        learning_curve_dict['learning_curve'].append(loss_fun(x, getval(cur_hyperparams['fake_data']), fake_labels))
                learning_curve_dict['grad_norm'].append(np.linalg.norm(g))
                learning_curve_dict['weight_norm'].append(np.linalg.norm(x))
                learning_curve_dict['velocity_norm'].append(np.linalg.norm(v))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        fake_data = cur_hyperparams['fake_data']
        rs = RandomState((seed, i_hyper))
        W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale']))
        W0 *= rs.randn(W0.size)
        alphas = np.exp(fixed_hyperparams['log_alphas'])
        betas = logit(fixed_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg']))
        meta = kylist(fake_data, fake_labels, L2_reg)
        W_opt = sgd_parsed(grad(indexed_loss_fun),
                           kylist(W0, alphas, betas, meta),
                           parser,
                           callback=callback)
        cur_primal_results['weights'] = getval(W_opt).copy()
        cur_primal_results['learning_curve'] = getval(learning_curve_dict)
        return W_opt, learning_curve_dict
예제 #3
0
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            rs = RandomState((seed, i_hyper, i_iter))  # Deterministic seed needed for backwards pass.
            idxs = rs.randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect)

        learning_curve_dict = defaultdict(list)
        def callback(x, v, g, i_iter):
            if i_iter % thin == 0:
                learning_curve_dict['learning_curve'].append(loss_fun(x, **train_data))
                learning_curve_dict['grad_norm'].append(np.linalg.norm(g))
                learning_curve_dict['weight_norm'].append(np.linalg.norm(x))
                learning_curve_dict['velocity_norm'].append(np.linalg.norm(v))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        rs = RandomState((seed, i_hyper))
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= rs.randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg']))
        W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg),
                           parser, callback=callback)
        cur_primal_results['weights'] = getval(W_opt).copy()
        cur_primal_results['learning_curve'] = getval(learning_curve_dict)
        return W_opt, learning_curve_dict
예제 #4
0
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, meta_vect, i_iter):
            (train_data, train_labels, L2_vect) = meta
            return loss_fun(w, train_data, train_labels, L2_vect)
            #return loss_fun(w, train_data['X'], train_data['T'], L2_vect + np.sum(fake_data.ravel()))

        learning_curve_dict = defaultdict(list)
        def callback(x, v, g, i_iter):
            if i_iter % thin == 0:
        #        learning_curve_dict['learning_curve'].append(loss_fun(x, getval(cur_hyperparams['fake_data']), fake_labels))
                learning_curve_dict['grad_norm'].append(np.linalg.norm(g))
                learning_curve_dict['weight_norm'].append(np.linalg.norm(x))
                learning_curve_dict['velocity_norm'].append(np.linalg.norm(v))


        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        fake_data = cur_hyperparams['fake_data']
        rs = RandomState((seed, i_hyper))
        W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale']))
        W0 *= rs.randn(W0.size)
        alphas = np.exp(fixed_hyperparams['log_alphas'])
        betas  = logit(fixed_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg']))
        meta = kylist(fake_data, fake_labels, L2_reg)
        W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas, meta),
                           parser, callback=callback)
        cur_primal_results['weights'] = getval(W_opt).copy()
        cur_primal_results['learning_curve'] = getval(learning_curve_dict)
        return W_opt, learning_curve_dict
예제 #5
0
 def primal_loss(z_vect, transform_vect, i_primal, record_results=False):
     w_vect = transform_weights(z_vect, transform_vect)
     loss = total_loss(w_vect, train_data)
     reg = regularization(z_vect)
     if VERBOSE and record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format(
             i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg)
         )
     return loss + reg
예제 #6
0
 def primal_loss(z_vect, transform_vect, i_primal, record_results=False):
     w_vect = transform_weights(z_vect, transform_vect)
     loss = total_loss(w_vect, train_data)
     reg = regularization(z_vect)
     if VERBOSE and record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format(
             i_primal,
             getval(loss) / N_scripts,
             total_loss(getval(w_vect), valid_data) / N_scripts,
             getval(reg))
     return loss + reg
예제 #7
0
 def sub_primal_stochastic_loss(z_vect, transform_vect, i_primal, i_script):
     RS = RandomState((seed, i_hyper, i_primal, i_script))
     N_train = train_data[i_script]['X'].shape[0]
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data[i_script], idxs)
     loss = loss_from_latents(z_vect, transform_vect, i_script, minibatch)
     reg = regularization(z_vect) if i_script == 0 else 0.0
     if i_primal % N_thin == 0 and i_script == 0:
         print "Iter {0}, full losses: train: {1}, valid: {2}, reg: {3}".format(
             i_primal,
             total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)),
             getval(reg) / N_scripts_per_iter)
     return loss + reg
예제 #8
0
 def sub_primal_stochastic_loss(z_vect, transform_vect, i_primal,
                                i_script):
     RS = RandomState((seed, i_hyper, i_primal, i_script))
     N_train = train_data[i_script]['X'].shape[0]
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data[i_script], idxs)
     loss = loss_from_latents(z_vect, transform_vect, i_script,
                              minibatch)
     reg = regularization(z_vect) if i_script == 0 else 0.0
     if i_primal % N_thin == 0 and i_script == 0:
         print "Iter {0}, full losses: train: {1}, valid: {2}, reg: {3}".format(
             i_primal, total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)),
             getval(reg) / N_scripts_per_iter)
     return loss + reg
예제 #9
0
def primal_optimizer(hyperparams_vect, meta_epoch):
    def indexed_loss_fun(w, L2_vect, i_iter):
        rs = RandomState(
            (seed, meta_epoch,
             i_iter))  # Deterministic seed needed for backwards pass.
        idxs = rs.randint(N_train, size=batch_size)
        return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs],
                        L2_vect)

    cur_hyperparams = hyperparams.new_vect(hyperparams_vect)

    rs = RandomState((seed, meta_epoch))

    # Randomly initialize weights
    W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale']))
    W0 *= rs.randn(W0.size)
    # Init regularization term
    L2_reg = fill_parser(parser, np.exp(fixed_hyperparams['log_L2_reg']))
    # Set step sizes
    alphas = np.exp(cur_hyperparams['log_alphas'])
    # Momentum terms
    betas = logit(cur_hyperparams['invlogit_betas'])

    # Train model
    W_opt = sgd_parsed(grad(indexed_loss_fun), kylist(W0, alphas, betas,
                                                      L2_reg), parser)

    cur_primal_results['weights'] = getval(W_opt).copy()
    return W_opt
예제 #10
0
    def hyperloss(hyperparam_vect, i):
        learning_curve = []

        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(
                    loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [
            np.full(parser[name].size,
                    np.exp(cur_hyperparams['log_param_scale'][i]))
            for i, name in enumerate(parser.names)
        ]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        W_opt = sgd5(grad(indexed_loss_fun),
                     kylist(W0, alphas, betas, log_L2_reg), callback)
        all_x.append(getval(W_opt))
        all_learning_curves.append(learning_curve)
        return valid_loss_fun(W_opt)
예제 #11
0
 def primal_stochastic_loss(z_vect, transform_vect, i_primal):
     RS = RandomState((seed, i_hyper, i_primal))
     loss = 0.0
     for _ in range(N_scripts_per_iter):
         i_script = RS.randint(N_scripts)
         N_train = train_data[i_script]['X'].shape[0]
         idxs = RS.permutation(N_train)[:batch_size]
         minibatch = dictslice(train_data[i_script], idxs)
         loss += loss_from_latents(z_vect, transform_vect, i_script, minibatch)
     reg  = regularization(z_vect)
     if i_primal % 20 == 0:
         print "Iter {0}, loss {1}, reg {2}".format(i_primal, getval(loss), getval(reg))
         print "Full losses: train: {0}, valid: {1}".format(
             total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)))
     return loss + reg
예제 #12
0
 def primal_stochastic_loss(z_vect, transform_vect, i_primal):
     RS = RandomState((seed, i_hyper, i_primal))
     loss = 0.0
     for _ in range(N_scripts_per_iter):
         i_script = RS.randint(N_scripts)
         N_train = train_data[i_script]['X'].shape[0]
         idxs = RS.permutation(N_train)[:batch_size]
         minibatch = dictslice(train_data[i_script], idxs)
         loss += loss_from_latents(z_vect, transform_vect, i_script, minibatch)
     reg  = regularization(z_vect)
     if i_primal % 1 == 0:
         print "Iter {0}, loss {1}, reg {2}".format(i_primal, getval(loss), getval(reg))
         print "Full losses: train: {0}, valid: {1}".format(
             total_loss(train_data, getval(z_vect)),
             total_loss(valid_data, getval(z_vect)))
     return loss + reg
예제 #13
0
 def primal_loss(z_vect, transform_vect, i_primal, record_results=False):
     w_vect = transform_weights(z_vect, transform_vect)
     loss = likelihood_loss(w_vect, data)
     reg = regularization(z_vect)
     if record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}".format(i_primal, getval(loss) / N_scripts)
     return loss + reg
예제 #14
0
 def primal_loss(z_vect, transform_vect, i_primal, record_results=False):
     w_vect = transform_weights(z_vect, transform_vect)
     loss = likelihood_loss(w_vect, data)
     reg = regularization(z_vect)
     if record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}".format(i_primal, getval(loss) / N_scripts)
     return loss + reg
예제 #15
0
 def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
     RS = RandomState((seed, i_hyper, i_primal))
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data, idxs)
     loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
     if verbose and i_primal % 10 == 0:
         print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
     return loss
예제 #16
0
 def primal_loss(w_vect, reg, i_primal, record_results=False):
     RS = RandomState((seed, i_primal, "primal"))
     idxs = RS.randint(N_data, size=batch_size)
     minibatch = dictslice(data, idxs)
     loss = loss_fun(w_vect, **minibatch)
     reg = regularization(w_vect, reg)
     if record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}".format(i_primal, getval(loss))
     return loss + reg
예제 #17
0
 def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
     RS = RandomState((seed, i_hyper, i_primal))
     idxs = RS.permutation(N_train)[:batch_size]
     minibatch = dictslice(train_data, idxs)
     loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
     if verbose and i_primal % 30 == 0:
         print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
         
     return loss
예제 #18
0
 def primal_loss(w_vect, reg, i_primal, record_results=False):
     RS = RandomState((seed, i_primal, "primal"))
     idxs = RS.randint(N_data, size=batch_size)
     minibatch = dictslice(data, idxs)
     loss = loss_fun(w_vect, **minibatch)
     reg = regularization(w_vect, reg)
     if record_results and i_primal % N_thin == 0:
         print "Iter {0}: train: {1}".format(i_primal, getval(loss))
     return loss + reg
예제 #19
0
    def hyperloss(transform_vect, i_hyper, record_results=True):
        RS = RandomState((seed, i_hyper, "hyperloss"))

        def primal_loss(z_vect, transform_vect, i_primal, record_results=False):
            w_vect = transform_weights(z_vect, transform_vect)
            loss = total_loss(w_vect, train_data)
            reg = regularization(z_vect)
            if VERBOSE and record_results and i_primal % N_thin == 0:
                print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format(
                    i_primal, getval(loss) / N_scripts, total_loss(getval(w_vect), valid_data) / N_scripts, getval(reg)
                )
            return loss + reg

        z_vect_0 = RS.randn(script_parser.vect.size) * np.exp(log_initialization_scale)
        z_vect_final = sgd(grad(primal_loss), transform_vect, z_vect_0, alpha, beta, N_iters, callback=None)
        w_vect_final = transform_weights(z_vect_final, transform_vect)
        valid_loss = total_loss(w_vect_final, valid_data)
        if record_results:
            results["valid_loss"].append(getval(valid_loss) / N_scripts)
            results["train_loss"].append(total_loss(getval(w_vect_final), train_data) / N_scripts)
            results["tests_loss"].append(total_loss(getval(w_vect_final), tests_data) / N_scripts)
        return valid_loss
예제 #20
0
    def hyperloss(transform_vect, i_hyper, record_results=True):
        RS = RandomState((seed, i_hyper, "hyperloss"))

        def primal_loss(z_vect,
                        transform_vect,
                        i_primal,
                        record_results=False):
            w_vect = transform_weights(z_vect, transform_vect)
            loss = total_loss(w_vect, train_data)
            reg = regularization(z_vect)
            if VERBOSE and record_results and i_primal % N_thin == 0:
                print "Iter {0}: train: {1}, valid: {2}, reg: {3}".format(
                    i_primal,
                    getval(loss) / N_scripts,
                    total_loss(getval(w_vect), valid_data) / N_scripts,
                    getval(reg))
            return loss + reg

        z_vect_0 = RS.randn(
            script_parser.vect.size) * np.exp(log_initialization_scale)
        z_vect_final = sgd(grad(primal_loss),
                           transform_vect,
                           z_vect_0,
                           alpha,
                           beta,
                           N_iters,
                           callback=None)
        w_vect_final = transform_weights(z_vect_final, transform_vect)
        valid_loss = total_loss(w_vect_final, valid_data)
        if record_results:
            results['valid_loss'].append(getval(valid_loss) / N_scripts)
            results['train_loss'].append(
                total_loss(getval(w_vect_final), train_data) / N_scripts)
            results['tests_loss'].append(
                total_loss(getval(w_vect_final), tests_data) / N_scripts)
        return valid_loss
예제 #21
0
 def hyperloss(transform, i_hyper, cur_train_data, cur_valid_data, cur_tests_data, exact_metagrad):
     RS = RandomState((seed, i_top, i_hyper, "hyperloss"))
     z_vect_0 = RS.randn(N_weights) * np.exp(log_init_scale)
     z_vect_final = train_z(cur_train_data, z_vect_0, transform, exact_metagrad)
     w_vect_final = transform_weights(z_vect_final, transform)
     #TODO: print/store losses and error rates here
     print "Training loss (unregularized) = " +str(getval(loss_fun(w_vect_final, **cur_train_data)))
     print "Validation loss = " +str(getval(loss_fun(w_vect_final, **cur_valid_data)))
     print "Test loss = " +str(getval(loss_fun(w_vect_final, **tests_data)))
     print "Training error = "+ str(getval(frac_err(w_vect_final, **cur_train_data)))
     print "Validation error = "+ str(getval(frac_err(w_vect_final, **cur_valid_data)))
     print "Test error = "+ str(getval(frac_err(w_vect_final, **tests_data)))
     return loss_fun(w_vect_final, **cur_valid_data)
예제 #22
0
        def hyperloss(transform, i_hyper, cur_train_data, cur_valid_data, cur_tests_data):
            RS = RandomState((seed, i_top, i_hyper, "hyperloss"))
            z_vect_0 = RS.randn(N_weights) * np.exp(log_init_scale)
            z_vect_final = train_z(cur_train_data, z_vect_0, transform)
            w_vect_final = transform_weights(z_vect_final, transform) #TODO: initial scale AND regularization
            
         
            train_loss = getval(loss_fun(w_vect_final, **cur_train_data))
            print "Training loss (unregularized) = " +str(train_loss)
            all_train_loss.append(train_loss)
            valid_loss = getval(loss_fun(w_vect_final, **cur_valid_data))
            print "Validation loss = " +str(valid_loss)
            all_valid_loss.append(valid_loss)
            tests_loss = getval(loss_fun(w_vect_final, **cur_tests_data))
            print "Test loss = " +str(tests_loss)
            all_tests_loss.append(tests_loss)
            
            plt.plot(all_train_loss, label="training loss (unregularized)")
            plt.plot(all_valid_loss, label="validation loss")
            plt.plot(all_tests_loss, label="test loss")
            plt.title("loss vs meta iteration")
            plt.xlabel("meta iteration")
            plt.ylabel("loss")
            plt.legend()
            plt.savefig("loss2000_corrected.png")
            plt.clf()
            
            
            train_rate = getval(frac_err(w_vect_final, **cur_train_data))
            print "Training error rate = " +str(train_rate)
            all_train_rates.append(train_rate)
            valid_rate = getval(frac_err(w_vect_final, **cur_valid_data))
            print "Validation error rate = " +str(valid_rate)
            all_valid_rates.append(valid_rate)
            tests_rate = getval(frac_err(w_vect_final, **cur_tests_data))
            print "Test error rate = " +str(tests_rate)
            all_tests_rates.append(tests_rate)
            
            plt.plot(all_train_rates, label="training error rate")
            plt.plot(all_valid_rates, label="validation error rate")
            plt.plot(all_tests_rates, label="test error rate")
            plt.title("error rate vs meta iteration")
            plt.xlabel("meta iteration")
            plt.ylabel("error rate")
            plt.legend()
            plt.savefig("error2000_corrected.png")
            plt.clf()

            
            return loss_fun(w_vect_final, **cur_valid_data)
예제 #23
0
    def hyperloss(hyperparam_vect, i):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas     = np.exp(cur_hyperparams['log_alphas'])
        betas      =  logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg =        cur_hyperparams['log_L2_reg']
        W_opt = sgd5(grad(indexed_loss_fun), kylist(W0, alphas, betas, log_L2_reg), callback)
        all_x.append(getval(W_opt))
        all_learning_curves.append(learning_curve)
        return valid_loss_fun(W_opt)
예제 #24
0
 def frac_err(W_vect, X, T):
     #print getval(predictions(W_vect, X))
     #preds = np.argmax(predictions(W_vect, X), axis=1)
     # TODO: above doesn't work through gradients
     preds = np.argmax(getval(predictions(W_vect, X)), axis=1)
     return 100.0 * np.mean(np.argmax(T, axis=1) != preds)