def preproc_space( sup_min_epochs=300, sup_max_epochs=2000, max_seconds=60 * 60, ): """ Return a hyperopt-compatible pyll expression for a trained neural network. The trained neural network will have 0, 1, 2, or 3 hidden layers, and may have an affine first layer that does column normalization or PCA pre-processing. Each layer of the network will be pre-trained by some amount of contrastive divergence before being fine-tuning by SGD. The training program is built using stub literals `pyll_stubs.train_task` and `pyll_stubs.valid_task`. When evaluating the pyll program, these literals must be replaced with skdata Task objects with `vector_classification` semantics. See `skdata_learning_algo.py` for how to use the `use_obj_for_literal_in_memo` function to swap live Task objects in for these stubs. The search space described by this function corresponds to the DBN model used in [1] and [2]. """ train_task_x = scope.getattr(pyll_stubs.train_task, 'x') nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1]) nnet1 = hp.choice( 'preproc', [ nnet0, # -- raw data scope.nnet_add_layers( # -- ZCA of data nnet0, scope.zca_layer( train_task_x, energy=hp.uniform('pca_energy', .5, 1), eps=1e-14, )), ]) param_seed = hp.choice('iseed', [5, 6, 7, 8]) time_limit = scope.time() + max_seconds nnets = [nnet1] nnet_i_pt = nnet1 for ii, cd_epochs_max in enumerate([3000, 2000, 1500]): layer = scope.random_sigmoid_layer( # -- hack to get different seeds for dif't layers seed=param_seed + cd_epochs_max, n_in=scope.getattr(nnet_i_pt, 'n_out'), n_out=hp.qloguniform('n_hid_%i' % ii, np.log(2**7), np.log(2**12), q=16), dist=hp.choice('W_idist_%i' % ii, ['uniform', 'normal']), scale_heuristic=hp.choice( 'W_ialgo_%i' % ii, [('old', hp.lognormal('W_imult_%i' % ii, 0, 1)), ('Glorot', )]), squash='logistic', ) nnet_i_raw = scope.nnet_add_layer(nnet_i_pt, layer) # -- repeatedly calculating lower-layers wastes some CPU, but keeps # memory usage much more stable across jobs (good for cluster) # and the wasted CPU is not so much overall. nnet_i_pt = scope.nnet_pretrain_top_layer_cd( nnet_i_raw, train_task_x, lr=hp.lognormal('cd_lr_%i' % ii, np.log(.01), 2), seed=1 + hp.randint('cd_seed_%i' % ii, 10), n_epochs=hp.qloguniform('cd_epochs_%i' % ii, np.log(1), np.log(cd_epochs_max), q=1), # -- for whatever reason (?), this was fixed at 100 batchsize=100, sample_v0s=hp.choice('sample_v0s_%i' % ii, [False, True]), lr_anneal_start=hp.qloguniform('lr_anneal_%i' % ii, np.log(10), np.log(10000), q=1), time_limit=time_limit, ) nnets.append(nnet_i_pt) # this prior is not what I would do now, but it is what I did then... nnet_features = hp.pchoice('depth', [(.5, nnets[0]), (.25, nnets[1]), (.125, nnets[2]), (.125, nnets[3])]) sup_nnet = scope.nnet_add_layer( nnet_features, scope.zero_softmax_layer(n_in=scope.getattr(nnet_features, 'n_out'), n_out=scope.getattr(pyll_stubs.train_task, 'n_classes'))) nnet4, report = scope.nnet_sgd_finetune_classifier( sup_nnet, pyll_stubs.train_task, pyll_stubs.valid_task, fixed_nnet=nnet1, max_epochs=sup_max_epochs, min_epochs=sup_min_epochs, batch_size=hp.choice('batch_size', [20, 100]), lr=hp.lognormal('lr', np.log(.01), 3.), lr_anneal_start=hp.qloguniform('lr_anneal_start', np.log(100), np.log(10000), q=1), l2_penalty=hp.choice( 'l2_penalty', [0, hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]), time_limit=time_limit, ) return nnet4, report
def nnet1_preproc_space(sup_min_epochs=300, sup_max_epochs=2000, max_seconds=60 * 60): """ Return a hyperopt-compatible pyll expression for a trained neural network. The trained neural network will have one hidden layer, and may have an affine first layer that does column normalization or PCA pre-processing. The training program is built using stub literals `pyll_stubs.train_task` and `pyll_stubs.valid_task`. When evaluating the pyll program, these literals must be replaced with skdata Task objects with `vector_classification` semantics. See `skdata_learning_algo.py` for how to use the `use_obj_for_literal_in_memo` function to swap live Task objects in for these stubs. The search space described by this function corresponds to the one-layer neural network with pre-processing used in [1] and [2]. """ time_limit = scope.time() + max_seconds train_task_x = scope.getattr(pyll_stubs.train_task, 'x') nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1]) nnet1 = hp.choice('preproc', [ # -- raw XXX set up something for n_in arg of hidden layer nnet0, # -- normalize scope.nnet_add_layer( nnet0, scope.column_normalize_layer( train_task_x, std_thresh=hp.loguniform('colnorm_thresh', np.log(1e-9), np.log(1e-3)))), # -- pca (with bias to throw away a lot) scope.nnet_add_layer( nnet0, scope.pca_layer( train_task_x, energy=hp.uniform('pca_energy', .5, 1), eps=1e-14)), ]) hidden_layer = scope.random_sigmoid_layer( n_in=scope.getattr(nnet1, 'n_out'), n_out=hp.qloguniform( 'nhid1', np.log(16), np.log(1024), q=16), dist=hp.choice('dist1', ['uniform', 'normal']), scale_heuristic=hp.choice('scale_heur1', [ ('old', hp.uniform('scale_mult1', .2, 2)), ('Glorot', )]), seed=hp.choice('iseed', [5, 6, 7, 8]), squash=hp.choice('squash', ['tanh', 'logistic']), ) nnet2 = scope.nnet_add_layer(nnet1, hidden_layer) nnet3 = scope.nnet_add_layer( nnet2, scope.zero_softmax_layer( n_in=scope.getattr(nnet2, 'n_out'), n_out=scope.getattr(pyll_stubs.train_task, 'n_classes'))) nnet4 = scope.nnet_sgd_finetune_classifier( nnet3, pyll_stubs.train_task, pyll_stubs.valid_task, fixed_nnet=nnet1, # -- don't fine-tune this first part of nnet3 max_epochs=sup_max_epochs, min_epochs=sup_min_epochs, batch_size=hp.choice('batch_size', [20, 100]), lr=hp.lognormal('lr', np.log(.01), 3.), lr_anneal_start=hp.qloguniform( 'lr_anneal_start', np.log(100), np.log(10000), q=1), l2_penalty=hp.choice('l2_penalty', [ 0, hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]), time_limit=time_limit, ) return nnet4
def nnet1_preproc_space(sup_min_epochs=300, sup_max_epochs=2000, max_seconds=60 * 60): """ Return a hyperopt-compatible pyll expression for a trained neural network. The trained neural network will have one hidden layer, and may have an affine first layer that does column normalization or PCA pre-processing. The training program is built using stub literals `pyll_stubs.train_task` and `pyll_stubs.valid_task`. When evaluating the pyll program, these literals must be replaced with skdata Task objects with `vector_classification` semantics. See `skdata_learning_algo.py` for how to use the `use_obj_for_literal_in_memo` function to swap live Task objects in for these stubs. The search space described by this function corresponds to the one-layer neural network with pre-processing used in [1] and [2]. """ time_limit = scope.time() + max_seconds train_task_x = scope.getattr(pyll_stubs.train_task, 'x') nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1]) nnet1 = hp.choice( 'preproc', [ # -- raw XXX set up something for n_in arg of hidden layer nnet0, # -- normalize scope.nnet_add_layer( nnet0, scope.column_normalize_layer( train_task_x, std_thresh=hp.loguniform('colnorm_thresh', np.log(1e-9), np.log(1e-3)))), # -- pca (with bias to throw away a lot) scope.nnet_add_layer( nnet0, scope.pca_layer(train_task_x, energy=hp.uniform('pca_energy', .5, 1), eps=1e-14)), ]) hidden_layer = scope.random_sigmoid_layer( n_in=scope.getattr(nnet1, 'n_out'), n_out=hp.qloguniform('nhid1', np.log(16), np.log(1024), q=16), dist=hp.choice('dist1', ['uniform', 'normal']), scale_heuristic=hp.choice('scale_heur1', [('old', hp.uniform('scale_mult1', .2, 2)), ('Glorot', )]), seed=hp.choice('iseed', [5, 6, 7, 8]), squash=hp.choice('squash', ['tanh', 'logistic']), ) nnet2 = scope.nnet_add_layer(nnet1, hidden_layer) nnet3 = scope.nnet_add_layer( nnet2, scope.zero_softmax_layer(n_in=scope.getattr(nnet2, 'n_out'), n_out=scope.getattr(pyll_stubs.train_task, 'n_classes'))) nnet4 = scope.nnet_sgd_finetune_classifier( nnet3, pyll_stubs.train_task, pyll_stubs.valid_task, fixed_nnet=nnet1, # -- don't fine-tune this first part of nnet3 max_epochs=sup_max_epochs, min_epochs=sup_min_epochs, batch_size=hp.choice('batch_size', [20, 100]), lr=hp.lognormal('lr', np.log(.01), 3.), lr_anneal_start=hp.qloguniform('lr_anneal_start', np.log(100), np.log(10000), q=1), l2_penalty=hp.choice( 'l2_penalty', [0, hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]), time_limit=time_limit, ) return nnet4
def preproc_space( sup_min_epochs=300, sup_max_epochs=2000, max_seconds=60 * 60, ): """ Return a hyperopt-compatible pyll expression for a trained neural network. The trained neural network will have 0, 1, 2, or 3 hidden layers, and may have an affine first layer that does column normalization or PCA pre-processing. Each layer of the network will be pre-trained by some amount of contrastive divergence before being fine-tuning by SGD. The training program is built using stub literals `pyll_stubs.train_task` and `pyll_stubs.valid_task`. When evaluating the pyll program, these literals must be replaced with skdata Task objects with `vector_classification` semantics. See `skdata_learning_algo.py` for how to use the `use_obj_for_literal_in_memo` function to swap live Task objects in for these stubs. The search space described by this function corresponds to the DBN model used in [1] and [2]. """ train_task_x = scope.getattr(pyll_stubs.train_task, 'x') nnet0 = scope.NNet([], n_out=scope.getattr(train_task_x, 'shape')[1]) nnet1 = hp.choice('preproc', [ nnet0, # -- raw data scope.nnet_add_layers( # -- ZCA of data nnet0, scope.zca_layer( train_task_x, energy=hp.uniform('pca_energy', .5, 1), eps=1e-14, )), ]) param_seed = hp.choice('iseed', [5, 6, 7, 8]) time_limit = scope.time() + max_seconds nnets = [nnet1] nnet_i_pt = nnet1 for ii, cd_epochs_max in enumerate([3000, 2000, 1500]): layer = scope.random_sigmoid_layer( # -- hack to get different seeds for dif't layers seed=param_seed + cd_epochs_max, n_in=scope.getattr(nnet_i_pt, 'n_out'), n_out=hp.qloguniform('n_hid_%i' % ii, np.log(2**7), np.log(2**12), q=16), dist=hp.choice('W_idist_%i' % ii, ['uniform', 'normal']), scale_heuristic=hp.choice( 'W_ialgo_%i' % ii, [ ('old', hp.lognormal('W_imult_%i' % ii, 0, 1)), ('Glorot',)]), squash='logistic', ) nnet_i_raw = scope.nnet_add_layer(nnet_i_pt, layer) # -- repeatedly calculating lower-layers wastes some CPU, but keeps # memory usage much more stable across jobs (good for cluster) # and the wasted CPU is not so much overall. nnet_i_pt = scope.nnet_pretrain_top_layer_cd( nnet_i_raw, train_task_x, lr=hp.lognormal('cd_lr_%i' % ii, np.log(.01), 2), seed=1 + hp.randint('cd_seed_%i' % ii, 10), n_epochs=hp.qloguniform('cd_epochs_%i' % ii, np.log(1), np.log(cd_epochs_max), q=1), # -- for whatever reason (?), this was fixed at 100 batchsize=100, sample_v0s=hp.choice('sample_v0s_%i' % ii, [False, True]), lr_anneal_start=hp.qloguniform('lr_anneal_%i' % ii, np.log(10), np.log(10000), q=1), time_limit=time_limit, ) nnets.append(nnet_i_pt) # this prior is not what I would do now, but it is what I did then... nnet_features = hp.pchoice( 'depth', [(.5, nnets[0]), (.25, nnets[1]), (.125, nnets[2]), (.125, nnets[3])]) sup_nnet = scope.nnet_add_layer( nnet_features, scope.zero_softmax_layer( n_in=scope.getattr(nnet_features, 'n_out'), n_out=scope.getattr(pyll_stubs.train_task, 'n_classes'))) nnet4, report = scope.nnet_sgd_finetune_classifier( sup_nnet, pyll_stubs.train_task, pyll_stubs.valid_task, fixed_nnet=nnet1, max_epochs=sup_max_epochs, min_epochs=sup_min_epochs, batch_size=hp.choice('batch_size', [20, 100]), lr=hp.lognormal('lr', np.log(.01), 3.), lr_anneal_start=hp.qloguniform( 'lr_anneal_start', np.log(100), np.log(10000), q=1), l2_penalty=hp.choice('l2_penalty', [ 0, hp.lognormal('l2_penalty_nz', np.log(1.0e-6), 2.)]), time_limit=time_limit, ) return nnet4, report