Beispiel #1
0
def test_rms_prop():
    N_weights = 5
    W0 = 0.1 * npr.randn(N_weights)
    (loss_fun, true_argmin) = make_optimization_problem(N_weights)
    x_min = rms_prop(grad(loss_fun), W0)
    assert np.allclose(x_min, true_argmin, rtol=1e-3, atol=1e-4), \
        "Diffs are: {0}".format(x_min - true_argmin)
Beispiel #2
0
def run():
    train_data, valid_data, tests_data = load_data_dicts(
        N_train, N_valid, N_tests)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg'] = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types,
                                             init_log_param_scale)
    hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas)

    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            seed = i_hyper * 10**6 + i_iter
            idxs = npr.RandomState(seed).randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs],
                            L2_vect)

        learning_curve = []

        def callback(x, v, g, i_iter):
            if i_iter % N_batches == 0:
                learning_curve.append(loss_fun(x, **train_data))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(i_hyper).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        V0 = np.zeros(W0.size)
        W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg),
                     callback)
        return W_opt, learning_curve

    def hyperloss(hyperparam_vect, i_hyper):
        W_opt, _ = primal_optimizer(hyperparam_vect, i_hyper)
        return loss_fun(W_opt, **valid_data)

    hyperloss_grad = grad(hyperloss)

    meta_results = defaultdict(list)

    def meta_callback(hyperparam_vect, i_hyper, g):
        print "Epoch {0}".format(i_hyper)
        x, learning_curve = primal_optimizer(hyperparam_vect, i_hyper)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])
        meta_results['train_loss'].append(loss_fun(x, **train_data))
        meta_results['valid_loss'].append(loss_fun(x, **valid_data))
        meta_results['tests_loss'].append(loss_fun(x, **tests_data))
        meta_results['learning_curves'].append(learning_curve)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback,
                            N_meta_iter, meta_alpha)
    parser.vect = None  # No need to pickle zeros
    return meta_results, parser
def run():
    train_data, valid_data, tests_data = load_data_dicts(N_train, N_valid, N_tests)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg']      = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale)
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)

    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)
    #fixed_hyperparams = VectorParser()
    #fixed_hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)

    # TODO: memoize
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            rs = npr.RandomState(npr.RandomState(global_seed + i_hyper).randint(1000))
            seed = i_hyper * 10**6 + i_iter   # Deterministic seed needed for backwards pass.
            idxs = rs.randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs], L2_vect)

        learning_curve = []
        def callback(x, i_iter):
            if i_iter % N_batches == 0:
                learning_curve.append(loss_fun(x, **train_data))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        V0 = np.zeros(W0.size)
        W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg), callback)
        return W_opt, learning_curve

    def hyperloss(hyperparam_vect, i_hyper):
        W_opt, _ = primal_optimizer(hyperparam_vect, i_hyper)
        # return loss_fun(W_opt, **valid_data)
        return loss_fun(W_opt, **train_data)

    hyperloss_grad = grad(hyperloss)

    meta_results = defaultdict(list)
    def meta_callback(hyperparam_vect, i_hyper):
        print "Meta Epoch {0}".format(i_hyper)
        x, learning_curve = primal_optimizer(hyperparam_vect, i_hyper)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])
        meta_results['train_loss'].append(loss_fun(x, **train_data))
        meta_results['valid_loss'].append(loss_fun(x, **valid_data))
        meta_results['tests_loss'].append(loss_fun(x, **tests_data))
        meta_results['learning_curves'].append(learning_curve)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect,
                            meta_callback, N_meta_iter, meta_alpha, gamma=0.0)
    parser.vect = None # No need to pickle zeros
    return meta_results, parser
Beispiel #4
0
def run():
    N_iters = N_epochs
    parser, loss_fun = make_toy_funs()
    N_weight_types = len(parser.names)
    N_weights = parser.vect.size
    hyperparams = VectorParser()
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)
    hyperparams['V0']  = np.full(N_weights, init_V0)

    all_learning_curves = []
    all_param_curves = []
    all_x = []
    def hyperloss_grad(hyperparam_vect, ii):
        learning_curve = []
        params_curve = []
        def callback(x, i):
            params_curve.append(x)
            learning_curve.append(loss_fun(x))

        def indexed_loss_fun(w, log_L2_reg, j):
            return loss_fun(w)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = np.ones(N_weights) * init_param_scale
        V0 = cur_hyperparams['V0']
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = 0.0
        results = sgd3(indexed_loss_fun, loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['V0']              = results['dMd_v']
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        all_param_curves.append(params_curve)
        return hypergrads.vect

    add_fields = ['train_loss', 'valid_loss', 'tests_loss', 'iter_num']
    meta_results = {field : [] for field in add_fields + hyperparams.names}
    def meta_callback(hyperparam_vect, i):
        if i % N_meta_thin == 0:
            print "Meta iter {0}".format(i)
            x = all_x[-1]
            cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
            for field in cur_hyperparams.names:
                meta_results[field].append(cur_hyperparams[field])
            meta_results['train_loss'].append(loss_fun(x))
            meta_results['iter_num'].append(i)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect,
                            meta_callback, N_meta_iter, meta_alpha, meta_gamma)
    meta_results['all_learning_curves'] = all_learning_curves
    meta_results['all_param_curves'] = all_param_curves
    parser.vect = None # No need to pickle zeros
    return meta_results, parser
def run():
    parser, loss_fun = make_parabola(dimension)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg']      = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale)
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)

    def primal_optimizer(hyperparam_vect, i_hyper):
        learning_curve = []
        def callback(x, i_iter):
            learning_curve.append(loss_fun(x))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(hash(i_hyper)).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        W_opt = sgd4(grad(loss_fun), kylist(W0, alphas, betas, L2_reg), callback)
        callback(W_opt, N_iters)
        return W_opt, learning_curve

    def hyperloss(hyperparam_vect, i_hyper):
        W_opt, _ = primal_optimizer(hyperparam_vect, i_hyper)
        return loss_fun(W_opt)
    hyperloss_grad = grad(hyperloss)

    meta_results = defaultdict(list)
    def meta_callback(hyperparam_vect, i_hyper):
        print "Meta Epoch {0}".format(i_hyper)
        x, learning_curve = primal_optimizer(hyperparam_vect, i_hyper)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])
        meta_results['train_loss'].append(loss_fun(x))
        meta_results['learning_curves'].append(learning_curve)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect,
                            meta_callback, N_meta_iter, meta_alpha, gamma=0.0)
    meta_callback(final_result, N_meta_iter)
    parser.vect = None # No need to pickle zeros
    return meta_results, parser
Beispiel #6
0
def run():
    parser, loss_fun = make_parabola(dimension)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg']      = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale)
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)

    def primal_optimizer(hyperparam_vect, i_hyper):
        learning_curve = []
        def callback(x, i_iter):
            learning_curve.append(loss_fun(x))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(cur_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(hash(i_hyper)).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas  = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        W_opt = sgd4(grad(loss_fun), kylist(W0, alphas, betas, L2_reg), callback)
        callback(W_opt, N_iters)
        return W_opt, learning_curve

    def hyperloss(hyperparam_vect, i_hyper):
        W_opt, _ = primal_optimizer(hyperparam_vect, i_hyper)
        return loss_fun(W_opt)
    hyperloss_grad = grad(hyperloss)

    meta_results = defaultdict(list)
    def meta_callback(hyperparam_vect, i_hyper):
        print "Meta Epoch {0}".format(i_hyper)
        x, learning_curve = primal_optimizer(hyperparam_vect, i_hyper)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])
        meta_results['train_loss'].append(loss_fun(x))
        meta_results['learning_curves'].append(learning_curve)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect,
                            meta_callback, N_meta_iter, meta_alpha, gamma=0.0)
    meta_callback(final_result, N_meta_iter)
    parser.vect = None # No need to pickle zeros
    return meta_results, parser
Beispiel #7
0
def run():
    train_data, valid_data, tests_data = load_data_dicts(
        N_train, N_valid, N_tests)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg'] = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types,
                                             init_log_param_scale)
    hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas)

    hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas)
    fixed_hyperparams = VectorParser()
    fixed_hyperparams['log_param_scale'] = np.full(N_iters,
                                                   init_log_param_scale)

    # TODO: memoize
    def primal_optimizer(hyperparam_vect, i_hyper):
        def indexed_loss_fun(w, L2_vect, i_iter):
            rs = npr.RandomState(
                npr.RandomState(global_seed + i_hyper).randint(1000))
            seed = i_hyper * 10**6 + i_iter  # Deterministic seed needed for backwards pass.
            idxs = rs.randint(N_train, size=batch_size)
            return loss_fun(w, train_data['X'][idxs], train_data['T'][idxs],
                            L2_vect)

        learning_curve_dict = defaultdict(list)

        def callback(x, v, g, i_iter):
            if i_iter % N_batches == 0:
                learning_curve_dict['learning_curve'].append(
                    loss_fun(x, **train_data))
                learning_curve_dict['grad_norm'].append(np.linalg.norm(g))
                learning_curve_dict['weight_norm'].append(np.linalg.norm(x))
                learning_curve_dict['velocity_norm'].append(np.linalg.norm(v))

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = fill_parser(parser, np.exp(fixed_hyperparams['log_param_scale']))
        W0 *= npr.RandomState(global_seed + i_hyper).randn(W0.size)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        L2_reg = fill_parser(parser, np.exp(cur_hyperparams['log_L2_reg']))
        W_opt = sgd4(grad(indexed_loss_fun), kylist(W0, alphas, betas, L2_reg),
                     callback)
        #callback(W_opt, N_iters)
        return W_opt, learning_curve_dict

    def hyperloss(hyperparam_vect, i_hyper):
        W_opt, _ = primal_optimizer(hyperparam_vect, i_hyper)
        return loss_fun(W_opt, **valid_data)

    hyperloss_grad = grad(hyperloss)

    meta_results = defaultdict(list)

    def meta_callback(hyperparam_vect, i_hyper):
        print "Meta Epoch {0}".format(i_hyper)
        x, learning_curve_dict = primal_optimizer(hyperparam_vect, i_hyper)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])
        meta_results['train_loss'].append(loss_fun(x, **train_data))
        meta_results['valid_loss'].append(loss_fun(x, **valid_data))
        meta_results['tests_loss'].append(loss_fun(x, **tests_data))
        meta_results['learning_curves'].append(learning_curve_dict)

    final_result = rms_prop(hyperloss_grad,
                            hyperparams.vect,
                            meta_callback,
                            N_meta_iter,
                            meta_alpha,
                            gamma=0.0)
    meta_callback(final_result, N_meta_iter)
    parser.vect = None  # No need to pickle zeros
    return meta_results, parser
Beispiel #8
0
def run():
    (train_images, train_labels),\
    (valid_images, valid_labels),\
    (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests)
    batch_idxs = BatchList(N_train, batch_size)
    N_iters = N_epochs * len(batch_idxs)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg'] = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types,
                                             init_log_param_scale)
    hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas)

    def indexed_loss_fun(w, log_L2_reg, i):
        idxs = batch_idxs[i % len(batch_idxs)]
        partial_vects = [
            np.full(parser[name].size, np.exp(log_L2_reg[i]))
            for i, name in enumerate(parser.names)
        ]
        L2_reg_vect = np.concatenate(partial_vects, axis=0)
        return loss_fun(w,
                        X=train_images[idxs],
                        T=train_labels[idxs],
                        L2_reg=L2_reg_vect)

    def train_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=train_images, T=train_labels)

    def valid_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=valid_images, T=valid_labels)

    def tests_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=tests_images, T=tests_labels)

    all_learning_curves = []
    all_x = []

    def hyperloss_grad(hyperparam_vect, i):
        learning_curve = []

        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(
                    loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [
            np.full(parser[name].size,
                    np.exp(cur_hyperparams['log_param_scale'][i]))
            for i, name in enumerate(parser.names)
        ]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        results = sgd3(indexed_loss_fun,
                       valid_loss_fun,
                       W0,
                       V0,
                       alphas,
                       betas,
                       log_L2_reg,
                       callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['log_L2_reg'] = results['dMd_meta']
        weights_grad = parser.new_vect(W0 * results['dMd_x'])
        hypergrads['log_param_scale'] = [
            np.sum(weights_grad[name]) for name in parser.names
        ]
        hypergrads['log_alphas'] = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas'] = (
            results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        return hypergrads.vect

    add_fields = ['train_loss', 'valid_loss', 'tests_loss']
    meta_results = {field: [] for field in add_fields + hyperparams.names}

    def meta_callback(hyperparam_vect, i):
        print "Meta iter {0}".format(i)
        x = all_x[-1]
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        log_L2_reg = cur_hyperparams['log_L2_reg']
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])

        meta_results['train_loss'].append(train_loss_fun(x))
        meta_results['valid_loss'].append(valid_loss_fun(x))
        meta_results['tests_loss'].append(tests_loss_fun(x))

    final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback,
                            N_meta_iter, meta_alpha)
    meta_results['all_learning_curves'] = all_learning_curves
    parser.vect = None  # No need to pickle zeros
    return meta_results, parser
Beispiel #9
0
def run():
    RS = RandomState((seed, "top_rs"))
    all_alphabets = omniglot.load_data()
    RS.shuffle(all_alphabets)
    train_alphabets = all_alphabets[:-N_test_alphabets]
    tests_alphabets = all_alphabets[-N_test_alphabets:]
    w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = w_parser.vect.size
    hyperparams_0 = VectorParser()
    hyperparams_0['log_scale']  = log_scale_init * np.ones(N_weights)
    hyperparams_0['offset'] = offset_init_std * RS.randn(N_weights)

    def reg_loss_fun(W, data, hyperparam_vect, reg_penalty):
        hyperparams = hyperparams_0.new_vect(hyperparam_vect)
        Z = np.exp(hyperparams['log_scale']) * W + hyperparams['offset']
        return loss_fun(Z, **data) + np.dot(W, W) * reg_penalty

    def hyperloss(hyperparam_vect, i_hyper, alphabets, verbose=True, report_train_loss=False):
        RS = RandomState((seed, i_hyper, "hyperloss"))        
        alphabet = shuffle_alphabet(RS.choice(alphabets), RS)
        N_train = alphabet['X'].shape[0] - N_valid_dpts
        train_data = dictslice(alphabet, slice(None, N_train))
        if report_train_loss:
            valid_data = dictslice(alphabet, slice(None, N_valid_dpts))
        else:
            valid_data = dictslice(alphabet, slice(N_train, None))
        def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
            RS = RandomState((seed, i_hyper, i_primal))
            idxs = RS.permutation(N_train)[:batch_size]
            minibatch = dictslice(train_data, idxs)
            loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
            if verbose and i_primal % 10 == 0: print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
            return loss

        W0 = RS.randn(N_weights) * initialization_scale
        W_final = sgd(grad(primal_loss), hyperparam_vect, W0, alpha, beta, N_iters, callback=None)
        return reg_loss_fun(W_final, valid_data, hyperparam_vect, reg_penalty=False)

    results = defaultdict(list)
    def record_results(hyperparam_vect, i_hyper, g):
        print "Meta iter {0}. Recording results".format(i_hyper)
        # RS = RandomState((seed, i_hyper, "evaluation"))
        def loss_fun(alphabets, report_train_loss):
            RS = RandomState((seed, "evaluation")) # Same alphabet with i_hyper now
            return np.mean([hyperloss(hyperparam_vect, RS.int32(), alphabets=alphabets,
                                      verbose=False, report_train_loss=report_train_loss)
                            for i in range(N_alphabets_eval)])
        cur_hyperparams = hyperparams_0.new_vect(hyperparam_vect.copy())
        if i_hyper % N_hyper_thin == 0:
            # Storing O(N_weights) is a bit expensive so we thin it out and store in low precision
            for field in cur_hyperparams.names:
                results[field].append(cur_hyperparams[field].astype(np.float16))
        results['train_loss'].append(loss_fun(train_alphabets, report_train_loss=True))
        results['valid_loss'].append(loss_fun(train_alphabets, report_train_loss=False))
        results['tests_loss'].append(loss_fun(tests_alphabets, report_train_loss=False))
        print "Train:", results['train_loss']
        print "Valid:", results['valid_loss']
        print "Tests:", results['tests_loss']

    train_hyperloss = partial(hyperloss, alphabets=train_alphabets)
    rms_prop(grad(train_hyperloss), hyperparams_0.vect, record_results, N_meta_iter, meta_alpha, gamma=0)
    return results
def run():
    (train_images, train_labels),\
    (valid_images, valid_labels),\
    (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests)
    batch_idxs = BatchList(N_train, batch_size)
    N_iters = N_epochs * len(batch_idxs)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg']      = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale)
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)

    def indexed_loss_fun(w, log_L2_reg, i):
        idxs = batch_idxs[i % len(batch_idxs)]
        partial_vects = [np.full(parser[name].size, np.exp(log_L2_reg[i]))
                         for i, name in enumerate(parser.names)]
        L2_reg_vect = np.concatenate(partial_vects, axis=0)
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg_vect)

    def train_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=train_images, T=train_labels)

    def valid_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=valid_images, T=valid_labels)

    def tests_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=tests_images, T=tests_labels)

    all_learning_curves = []
    all_x = []

    def hyperloss(hyperparam_vect, i):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas     = np.exp(cur_hyperparams['log_alphas'])
        betas      =  logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg =        cur_hyperparams['log_L2_reg']
        W_opt = sgd5(grad(indexed_loss_fun), kylist(W0, alphas, betas, log_L2_reg), callback)
        all_x.append(getval(W_opt))
        all_learning_curves.append(learning_curve)
        return valid_loss_fun(W_opt)

    hyperloss_grad = grad(hyperloss)

    add_fields = ['train_loss', 'valid_loss', 'tests_loss']
    meta_results = {field : [] for field in add_fields + hyperparams.names}
    def meta_callback(hyperparam_vect, i):
        x = all_x[-1]
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        log_L2_reg = cur_hyperparams['log_L2_reg']
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])

        meta_results['train_loss'].append(train_loss_fun(x))
        meta_results['valid_loss'].append(valid_loss_fun(x))
        meta_results['tests_loss'].append(tests_loss_fun(x))

    final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback, N_meta_iter, meta_alpha)
    meta_results['all_learning_curves'] = all_learning_curves
    parser.vect = None # No need to pickle zeros
    return meta_results, parser
Beispiel #11
0
def run():
    (train_images, train_labels),\
    (valid_images, valid_labels),\
    (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests)
    batch_idxs = BatchList(N_train, batch_size)
    N_iters = N_epochs * len(batch_idxs)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg']      = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale)
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)

    def train_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=train_images, T=train_labels)

    def valid_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=valid_images, T=valid_labels)

    def tests_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=tests_images, T=tests_labels)

    all_learning_curves = []
    all_x = []
    def hyperloss_grad(hyperparam_vect, ii):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        def indexed_loss_fun(w, log_L2_reg, j):
            # idxs = batch_idxs[i % len(batch_idxs)]
            npr.seed(1000 * ii + j)
            idxs = npr.randint(N_train, size=len(batch_idxs))
            partial_vects = [np.full(parser[name].size, np.exp(log_L2_reg[i]))
                             for i, name in enumerate(parser.names)]
            L2_reg_vect = np.concatenate(partial_vects, axis=0)
            return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg_vect)

        npr.seed(ii)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['log_L2_reg']      = results['dMd_meta']
        weights_grad = parser.new_vect(W0 * results['dMd_x'])
        hypergrads['log_param_scale'] = [np.sum(weights_grad[name])
                                         for name in parser.names]
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        return hypergrads.vect

    add_fields = ['train_loss', 'valid_loss', 'tests_loss', 'iter_num']
    meta_results = {field : [] for field in add_fields + hyperparams.names}
    def meta_callback(hyperparam_vect, i):
        if i % N_meta_thin == 0:
            print "Meta iter {0}".format(i)
            x = all_x[-1]
            cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
            log_L2_reg = cur_hyperparams['log_L2_reg']
            for field in cur_hyperparams.names:
                meta_results[field].append(cur_hyperparams[field])

            meta_results['train_loss'].append(train_loss_fun(x))
            meta_results['valid_loss'].append(valid_loss_fun(x))
            meta_results['tests_loss'].append(tests_loss_fun(x))
            meta_results['iter_num'].append(i)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect,
                            meta_callback, N_meta_iter, meta_alpha, meta_gamma)
    meta_results['all_learning_curves'] = all_learning_curves
    parser.vect = None # No need to pickle zeros
    return meta_results, parser
Beispiel #12
0
def run():
    RS = RandomState((seed, "top_rs"))
    all_alphabets = omniglot.load_data()
    RS.shuffle(all_alphabets)
    train_alphabets = all_alphabets[:-N_test_alphabets]
    tests_alphabets = all_alphabets[-N_test_alphabets:]
    w_parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = w_parser.vect.size
    hyperparams_0 = VectorParser()
    hyperparams_0['log_scale']  = log_scale_init * np.ones(N_weights)
    hyperparams_0['offset'] = offset_init_std * RS.randn(N_weights)

    def reg_loss_fun(W, data, hyperparam_vect, reg_penalty):
        hyperparams = hyperparams_0.new_vect(hyperparam_vect)
        Z = np.exp(hyperparams['log_scale']) * W + hyperparams['offset']
        return loss_fun(Z, **data) + np.dot(W, W) * reg_penalty

    def hyperloss(hyperparam_vect, i_hyper, alphabets, verbose=True, report_train_loss=False):
        RS = RandomState((seed, i_hyper, "hyperloss"))        
        alphabet = shuffle_alphabet(RS.choice(alphabets), RS)
        N_train = alphabet['X'].shape[0] - N_valid_dpts
        train_data = dictslice(alphabet, slice(None, N_train))
        if report_train_loss:
            valid_data = dictslice(alphabet, slice(None, N_valid_dpts))
        else:
            valid_data = dictslice(alphabet, slice(N_train, None))
        def primal_loss(W, hyperparam_vect, i_primal, reg_penalty=True):
            RS = RandomState((seed, i_hyper, i_primal))
            idxs = RS.permutation(N_train)[:batch_size]
            minibatch = dictslice(train_data, idxs)
            loss = reg_loss_fun(W, minibatch, hyperparam_vect, reg_penalty)
            if verbose and i_primal % 10 == 0: print "Iter {0}, loss, {1}".format(i_primal, getval(loss))
            return loss

        W0 = np.zeros(N_weights)
        W_final = sgd(grad(primal_loss), hyperparam_vect, W0, alpha, beta, N_iters, callback=None)
        return reg_loss_fun(W_final, valid_data, hyperparam_vect, reg_penalty=False)

    results = defaultdict(list)
    def record_results(hyperparam_vect, i_hyper, g):
        print "Meta iter {0}. Recording results".format(i_hyper)
        RS = RandomState((seed, i_hyper, "evaluation"))
        def loss_fun(alphabets, report_train_loss):
            return np.mean([hyperloss(hyperparam_vect, RS.int32(), alphabets=alphabets,
                                      verbose=False, report_train_loss=report_train_loss)
                            for i in range(N_alphabets_eval)])
        cur_hyperparams = hyperparams_0.new_vect(hyperparam_vect.copy())
        if i_hyper % N_hyper_thin == 0:
            # Storing O(N_weights) is a bit expensive so we thin it out and store in low precision
            for field in cur_hyperparams.names:
                results[field].append(cur_hyperparams[field].astype(np.float16))
        results['train_loss'].append(loss_fun(train_alphabets, report_train_loss=True))
        results['valid_loss'].append(loss_fun(train_alphabets, report_train_loss=False))
        results['tests_loss'].append(loss_fun(tests_alphabets, report_train_loss=False))
        print "Train:", results['train_loss']
        print "Valid:", results['valid_loss']
        print "Tests:", results['tests_loss']

    train_hyperloss = partial(hyperloss, alphabets=train_alphabets)
    rms_prop(grad(train_hyperloss), hyperparams_0.vect, record_results, N_meta_iter, meta_alpha, gamma=0)
    return results
Beispiel #13
0
def run():
    N_iters = N_epochs
    parser, loss_fun = make_toy_funs()
    N_weight_types = len(parser.names)
    N_weights = parser.vect.size
    hyperparams = VectorParser()
    hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas)
    hyperparams['V0'] = np.full(N_weights, init_V0)

    all_learning_curves = []
    all_param_curves = []
    all_x = []

    def hyperloss_grad(hyperparam_vect, ii):
        learning_curve = []
        params_curve = []

        def callback(x, i):
            params_curve.append(x)
            learning_curve.append(loss_fun(x))

        def indexed_loss_fun(w, log_L2_reg, j):
            return loss_fun(w)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = np.ones(N_weights) * init_param_scale
        V0 = cur_hyperparams['V0']
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = 0.0
        results = sgd3(indexed_loss_fun,
                       loss_fun,
                       W0,
                       V0,
                       alphas,
                       betas,
                       log_L2_reg,
                       callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['V0'] = results['dMd_v']
        hypergrads['log_alphas'] = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas'] = (
            results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        all_param_curves.append(params_curve)
        return hypergrads.vect

    add_fields = ['train_loss', 'valid_loss', 'tests_loss', 'iter_num']
    meta_results = {field: [] for field in add_fields + hyperparams.names}

    def meta_callback(hyperparam_vect, i):
        if i % N_meta_thin == 0:
            print "Meta iter {0}".format(i)
            x = all_x[-1]
            cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
            for field in cur_hyperparams.names:
                meta_results[field].append(cur_hyperparams[field])
            meta_results['train_loss'].append(loss_fun(x))
            meta_results['iter_num'].append(i)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback,
                            N_meta_iter, meta_alpha, meta_gamma)
    meta_results['all_learning_curves'] = all_learning_curves
    meta_results['all_param_curves'] = all_param_curves
    parser.vect = None  # No need to pickle zeros
    return meta_results, parser