def preproc_space(
    sup_min_epochs=300,
    sup_max_epochs=2000,
    max_seconds=60 * 60,
):
    """
    Return a hyperopt-compatible pyll expression for a trained neural network.

    The trained neural network will have 0, 1, 2, or 3 hidden layers, and may
    have an affine first layer that does column normalization or PCA
    pre-processing.

    Each layer of the network will be pre-trained by some amount of
    contrastive divergence before being fine-tuning by SGD.

    The training program is built using stub literals `pyll_stubs.train_task`
    and `pyll_stubs.valid_task`.  When evaluating the pyll program, these
    literals must be replaced with skdata Task objects with
    `vector_classification` semantics.  See `skdata_learning_algo.py` for how
    to use the `use_obj_for_literal_in_memo` function to swap live Task
    objects in for these stubs.

    The search space described by this function corresponds to the DBN model
    used in [1] and [2].

    """

    train_task_x = scope.getattr(pyll_stubs.train_task, 'x')
    nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1])
    nnet1 = hp.choice(
        'preproc',
        [
            nnet0,  # -- raw data
            scope.nnet_add_layers(  # -- ZCA of data
                nnet0,
                scope.zca_layer(
                    train_task_x,
                    energy=hp.uniform('pca_energy', .5, 1),
                    eps=1e-14,
                )),
        ])

    param_seed = hp.choice('iseed', [5, 6, 7, 8])

    time_limit = scope.time() + max_seconds

    nnets = [nnet1]
    nnet_i_pt = nnet1
    for ii, cd_epochs_max in enumerate([3000, 2000, 1500]):
        layer = scope.random_sigmoid_layer(
            # -- hack to get different seeds for dif't layers
            seed=param_seed + cd_epochs_max,
            n_in=scope.getattr(nnet_i_pt, 'n_out'),
            n_out=hp.qloguniform('n_hid_%i' % ii,
                                 np.log(2**7),
                                 np.log(2**12),
                                 q=16),
            dist=hp.choice('W_idist_%i' % ii, ['uniform', 'normal']),
            scale_heuristic=hp.choice(
                'W_ialgo_%i' % ii,
                [('old', hp.lognormal('W_imult_%i' % ii, 0, 1)),
                 ('Glorot', )]),
            squash='logistic',
        )
        nnet_i_raw = scope.nnet_add_layer(nnet_i_pt, layer)
        # -- repeatedly calculating lower-layers wastes some CPU, but keeps
        #    memory usage much more stable across jobs (good for cluster)
        #    and the wasted CPU is not so much overall.
        nnet_i_pt = scope.nnet_pretrain_top_layer_cd(
            nnet_i_raw,
            train_task_x,
            lr=hp.lognormal('cd_lr_%i' % ii, np.log(.01), 2),
            seed=1 + hp.randint('cd_seed_%i' % ii, 10),
            n_epochs=hp.qloguniform('cd_epochs_%i' % ii,
                                    np.log(1),
                                    np.log(cd_epochs_max),
                                    q=1),
            # -- for whatever reason (?), this was fixed at 100
            batchsize=100,
            sample_v0s=hp.choice('sample_v0s_%i' % ii, [False, True]),
            lr_anneal_start=hp.qloguniform('lr_anneal_%i' % ii,
                                           np.log(10),
                                           np.log(10000),
                                           q=1),
            time_limit=time_limit,
        )
        nnets.append(nnet_i_pt)

    # this prior is not what I would do now, but it is what I did then...
    nnet_features = hp.pchoice('depth', [(.5, nnets[0]), (.25, nnets[1]),
                                         (.125, nnets[2]), (.125, nnets[3])])

    sup_nnet = scope.nnet_add_layer(
        nnet_features,
        scope.zero_softmax_layer(n_in=scope.getattr(nnet_features, 'n_out'),
                                 n_out=scope.getattr(pyll_stubs.train_task,
                                                     'n_classes')))

    nnet4, report = scope.nnet_sgd_finetune_classifier(
        sup_nnet,
        pyll_stubs.train_task,
        pyll_stubs.valid_task,
        fixed_nnet=nnet1,
        max_epochs=sup_max_epochs,
        min_epochs=sup_min_epochs,
        batch_size=hp.choice('batch_size', [20, 100]),
        lr=hp.lognormal('lr', np.log(.01), 3.),
        lr_anneal_start=hp.qloguniform('lr_anneal_start',
                                       np.log(100),
                                       np.log(10000),
                                       q=1),
        l2_penalty=hp.choice(
            'l2_penalty',
            [0, hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]),
        time_limit=time_limit,
    )

    return nnet4, report
Exemple #2
0
def nnet1_preproc_space(sup_min_epochs=300, sup_max_epochs=2000,
                       max_seconds=60 * 60):
    """
    Return a hyperopt-compatible pyll expression for a trained neural network.

    The trained neural network will have one hidden layer, and may
    have an affine first layer that does column normalization or PCA
    pre-processing.

    The training program is built using stub literals `pyll_stubs.train_task`
    and `pyll_stubs.valid_task`.  When evaluating the pyll program, these
    literals must be replaced with skdata Task objects with
    `vector_classification` semantics.  See `skdata_learning_algo.py` for how
    to use the `use_obj_for_literal_in_memo` function to swap live Task
    objects in for these stubs.

    The search space described by this function corresponds to the one-layer
    neural network with pre-processing used in [1] and [2].

    """
    time_limit = scope.time() + max_seconds

    train_task_x = scope.getattr(pyll_stubs.train_task, 'x')
    nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1])
    nnet1 = hp.choice('preproc',
        [
            # -- raw XXX set up something for n_in arg of hidden layer
            nnet0,
            # -- normalize
            scope.nnet_add_layer(
                nnet0,
                scope.column_normalize_layer(
                    train_task_x,
                    std_thresh=hp.loguniform('colnorm_thresh',
                                             np.log(1e-9),
                                             np.log(1e-3)))),
            # -- pca (with bias to throw away a lot)
            scope.nnet_add_layer(
                nnet0,
                scope.pca_layer(
                    train_task_x,
                    energy=hp.uniform('pca_energy', .5, 1),
                    eps=1e-14)),
        ])
    hidden_layer = scope.random_sigmoid_layer(
            n_in=scope.getattr(nnet1, 'n_out'),
            n_out=hp.qloguniform(
                'nhid1', np.log(16), np.log(1024), q=16),
            dist=hp.choice('dist1', ['uniform', 'normal']),
            scale_heuristic=hp.choice('scale_heur1', [
                ('old', hp.uniform('scale_mult1', .2, 2)),
                ('Glorot', )]),
            seed=hp.choice('iseed', [5, 6, 7, 8]),
            squash=hp.choice('squash', ['tanh', 'logistic']),
            )
    nnet2 = scope.nnet_add_layer(nnet1, hidden_layer)
    nnet3 = scope.nnet_add_layer(
        nnet2,
        scope.zero_softmax_layer(
            n_in=scope.getattr(nnet2, 'n_out'),
            n_out=scope.getattr(pyll_stubs.train_task, 'n_classes')))

    nnet4 = scope.nnet_sgd_finetune_classifier(
        nnet3,
        pyll_stubs.train_task,
        pyll_stubs.valid_task,
        fixed_nnet=nnet1,   # -- don't fine-tune this first part of nnet3
        max_epochs=sup_max_epochs,
        min_epochs=sup_min_epochs,
        batch_size=hp.choice('batch_size', [20, 100]),
        lr=hp.lognormal('lr', np.log(.01), 3.),
        lr_anneal_start=hp.qloguniform(
            'lr_anneal_start', np.log(100), np.log(10000), q=1),
        l2_penalty=hp.choice('l2_penalty', [
            0,
            hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]),
        time_limit=time_limit,
        )

    return nnet4
def nnet1_preproc_space(sup_min_epochs=300,
                        sup_max_epochs=2000,
                        max_seconds=60 * 60):
    """
    Return a hyperopt-compatible pyll expression for a trained neural network.

    The trained neural network will have one hidden layer, and may
    have an affine first layer that does column normalization or PCA
    pre-processing.

    The training program is built using stub literals `pyll_stubs.train_task`
    and `pyll_stubs.valid_task`.  When evaluating the pyll program, these
    literals must be replaced with skdata Task objects with
    `vector_classification` semantics.  See `skdata_learning_algo.py` for how
    to use the `use_obj_for_literal_in_memo` function to swap live Task
    objects in for these stubs.

    The search space described by this function corresponds to the one-layer
    neural network with pre-processing used in [1] and [2].

    """
    time_limit = scope.time() + max_seconds

    train_task_x = scope.getattr(pyll_stubs.train_task, 'x')
    nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1])
    nnet1 = hp.choice(
        'preproc',
        [
            # -- raw XXX set up something for n_in arg of hidden layer
            nnet0,
            # -- normalize
            scope.nnet_add_layer(
                nnet0,
                scope.column_normalize_layer(
                    train_task_x,
                    std_thresh=hp.loguniform('colnorm_thresh', np.log(1e-9),
                                             np.log(1e-3)))),
            # -- pca (with bias to throw away a lot)
            scope.nnet_add_layer(
                nnet0,
                scope.pca_layer(train_task_x,
                                energy=hp.uniform('pca_energy', .5, 1),
                                eps=1e-14)),
        ])
    hidden_layer = scope.random_sigmoid_layer(
        n_in=scope.getattr(nnet1, 'n_out'),
        n_out=hp.qloguniform('nhid1', np.log(16), np.log(1024), q=16),
        dist=hp.choice('dist1', ['uniform', 'normal']),
        scale_heuristic=hp.choice('scale_heur1',
                                  [('old', hp.uniform('scale_mult1', .2, 2)),
                                   ('Glorot', )]),
        seed=hp.choice('iseed', [5, 6, 7, 8]),
        squash=hp.choice('squash', ['tanh', 'logistic']),
    )
    nnet2 = scope.nnet_add_layer(nnet1, hidden_layer)
    nnet3 = scope.nnet_add_layer(
        nnet2,
        scope.zero_softmax_layer(n_in=scope.getattr(nnet2, 'n_out'),
                                 n_out=scope.getattr(pyll_stubs.train_task,
                                                     'n_classes')))

    nnet4 = scope.nnet_sgd_finetune_classifier(
        nnet3,
        pyll_stubs.train_task,
        pyll_stubs.valid_task,
        fixed_nnet=nnet1,  # -- don't fine-tune this first part of nnet3
        max_epochs=sup_max_epochs,
        min_epochs=sup_min_epochs,
        batch_size=hp.choice('batch_size', [20, 100]),
        lr=hp.lognormal('lr', np.log(.01), 3.),
        lr_anneal_start=hp.qloguniform('lr_anneal_start',
                                       np.log(100),
                                       np.log(10000),
                                       q=1),
        l2_penalty=hp.choice(
            'l2_penalty',
            [0, hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]),
        time_limit=time_limit,
    )

    return nnet4
def preproc_space(
    sup_min_epochs=300,
    sup_max_epochs=2000,
    max_seconds=60 * 60,
    ):
    """
    Return a hyperopt-compatible pyll expression for a trained neural network.

    The trained neural network will have 0, 1, 2, or 3 hidden layers, and may
    have an affine first layer that does column normalization or PCA
    pre-processing.

    Each layer of the network will be pre-trained by some amount of
    contrastive divergence before being fine-tuning by SGD.

    The training program is built using stub literals `pyll_stubs.train_task`
    and `pyll_stubs.valid_task`.  When evaluating the pyll program, these
    literals must be replaced with skdata Task objects with
    `vector_classification` semantics.  See `skdata_learning_algo.py` for how
    to use the `use_obj_for_literal_in_memo` function to swap live Task
    objects in for these stubs.

    The search space described by this function corresponds to the DBN model
    used in [1] and [2].

    """

    train_task_x = scope.getattr(pyll_stubs.train_task, 'x')
    nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1])
    nnet1 = hp.choice('preproc',
        [
            nnet0,                  # -- raw data
            scope.nnet_add_layers(  # -- ZCA of data
                nnet0,
                scope.zca_layer(
                    train_task_x,
                    energy=hp.uniform('pca_energy', .5, 1),
                    eps=1e-14,
                    )),
        ])

    param_seed = hp.choice('iseed', [5, 6, 7, 8])

    time_limit = scope.time() + max_seconds

    nnets = [nnet1]
    nnet_i_pt = nnet1
    for ii, cd_epochs_max in enumerate([3000, 2000, 1500]):
        layer = scope.random_sigmoid_layer(
            # -- hack to get different seeds for dif't layers
            seed=param_seed + cd_epochs_max,
            n_in=scope.getattr(nnet_i_pt, 'n_out'),
            n_out=hp.qloguniform('n_hid_%i' % ii,
                                 np.log(2**7),
                                 np.log(2**12),
                                 q=16),
            dist=hp.choice('W_idist_%i' % ii, ['uniform', 'normal']),
            scale_heuristic=hp.choice(
                'W_ialgo_%i' % ii, [
                    ('old', hp.lognormal('W_imult_%i' % ii, 0, 1)),
                    ('Glorot',)]),
            squash='logistic',
            )
        nnet_i_raw = scope.nnet_add_layer(nnet_i_pt, layer)
        # -- repeatedly calculating lower-layers wastes some CPU, but keeps
        #    memory usage much more stable across jobs (good for cluster)
        #    and the wasted CPU is not so much overall.
        nnet_i_pt = scope.nnet_pretrain_top_layer_cd(
            nnet_i_raw,
            train_task_x,
            lr=hp.lognormal('cd_lr_%i' % ii, np.log(.01), 2),
            seed=1 + hp.randint('cd_seed_%i' % ii, 10),
            n_epochs=hp.qloguniform('cd_epochs_%i' % ii,
                                    np.log(1),
                                    np.log(cd_epochs_max),
                                    q=1),
            # -- for whatever reason (?), this was fixed at 100
            batchsize=100,
            sample_v0s=hp.choice('sample_v0s_%i' % ii, [False, True]),
            lr_anneal_start=hp.qloguniform('lr_anneal_%i' % ii,
                                           np.log(10),
                                           np.log(10000),
                                           q=1),
            time_limit=time_limit,
            )
        nnets.append(nnet_i_pt)

    # this prior is not what I would do now, but it is what I did then...
    nnet_features = hp.pchoice(
        'depth',
        [(.5, nnets[0]),
         (.25, nnets[1]),
         (.125, nnets[2]),
         (.125, nnets[3])])

    sup_nnet = scope.nnet_add_layer(
        nnet_features,
        scope.zero_softmax_layer(
            n_in=scope.getattr(nnet_features, 'n_out'),
            n_out=scope.getattr(pyll_stubs.train_task, 'n_classes')))


    nnet4, report = scope.nnet_sgd_finetune_classifier(
        sup_nnet,
        pyll_stubs.train_task,
        pyll_stubs.valid_task,
        fixed_nnet=nnet1,
        max_epochs=sup_max_epochs,
        min_epochs=sup_min_epochs,
        batch_size=hp.choice('batch_size', [20, 100]),
        lr=hp.lognormal('lr', np.log(.01), 3.),
        lr_anneal_start=hp.qloguniform(
            'lr_anneal_start',
            np.log(100),
            np.log(10000),
            q=1),
        l2_penalty=hp.choice('l2_penalty', [
            0,
            hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]),
        time_limit=time_limit,
        )

    return nnet4, report