irange=ir, dim=dim, max_col_norm=1.) output = Softmax(layer_name='y', n_classes=9, irange=ir, max_col_norm=mcn_out) mdl = MLP([l1, l2, l3, output], nvis=X2.shape[1]) trainer = sgd.SGD(learning_rate=lr, batch_size=bs, learning_rule=learning_rule.Momentum(mm), cost=Dropout(default_input_include_prob=ip, default_input_scale=1 / ip), termination_criterion=EpochCounter(epochs), seed=seed) decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor=.1) experiment = Train(dataset=training, model=mdl, algorithm=trainer, extensions=[decay]) experiment.main_loop() epochs_current = epochs for s in range(n_add): trainer = sgd.SGD(learning_rate=lr * .1, batch_size=bs, learning_rule=learning_rule.Momentum(mm), cost=Dropout(default_input_include_prob=ip, default_input_scale=1 / ip), termination_criterion=EpochCounter(epochs_add), seed=seed) experiment = Train(dataset=training, model=mdl, algorithm=trainer)
#L1 Weight Decay L1_cost = PL.costs.cost.SumOfCosts([PL.costs.cost.MethodCost(method='cost_from_X'), PL.costs.mlp.L1WeightDecay(coeffs=[0.1, 0.01])]) # momentum initial_momentum = .5 final_momentum = .99 start = 1 saturate = 20 momentum_adjustor = learning_rule.MomentumAdjustor(final_momentum, start, saturate) momentum_rule = learning_rule.Momentum(initial_momentum) # learning rate start = .1 saturate = 20 decay_factor = .00001 learning_rate_adjustor = sgd.LinearDecayOverEpoch(start, saturate, decay_factor) # termination criterion that stops after 50 epochs without # any increase in misclassification on the validation set termination_criterion = MonitorBased(channel_name='objective', N=20, prop_decrease=0.0) # create Stochastic Gradient Descent trainer trainer = sgd.SGD(learning_rate=.001, batch_size=10, monitoring_dataset=ds_valid, termination_criterion=termination_criterion, cost=L1_cost) #learning_rule=momentum_rule, trainer.setup(ann, ds_train) # add monitor for saving the model with best score
def main(): training_data, validation_data, test_data, std_scale = load_training_data() kaggle_test_features = load_test_data(std_scale) ############### # pylearn2 ML hl1 = mlp.Sigmoid(layer_name='hl1', dim=200, irange=.1, init_bias=1.) hl2 = mlp.Sigmoid(layer_name='hl2', dim=100, irange=.1, init_bias=1.) # create Softmax output layer output_layer = mlp.Softmax(9, 'output', irange=.1) # create Stochastic Gradient Descent trainer that runs for 400 epochs trainer = sgd.SGD(learning_rate=.05, batch_size=300, learning_rule=learning_rule.Momentum(.5), termination_criterion=MonitorBased( channel_name='valid_objective', prop_decrease=0., N=10), monitoring_dataset={ 'valid': validation_data, 'train': training_data }) layers = [hl1, hl2, output_layer] # create neural net model = mlp.MLP(layers, nvis=93) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_objective', save_path='pylearn2_results/pylearn2_test.pkl') velocity = learning_rule.MomentumAdjustor(final_momentum=.6, start=1, saturate=250) decay = sgd.LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01) ###################### experiment = Train(dataset=training_data, model=model, algorithm=trainer, extensions=[watcher, velocity, decay]) experiment.main_loop() #load best model and test ################ model = serial.load('pylearn2_results/pylearn2_test.pkl') # get an prediction of the accuracy from the test_data test_results = model.fprop(theano.shared(test_data[0], name='test_data')).eval() print test_results.shape loss = multiclass_log_loss(test_data[1], test_results) print 'Test multiclass log loss:', loss out_file = 'pylearn2_results/' + str(loss) + 'ann' #exp.save(out_file + '.pkl') #save the kaggle results results = model.fprop( theano.shared(kaggle_test_features, name='kaggle_test_data')).eval() save_results(out_file + '.csv', kaggle_test_features, results)
lr = 0.1 epochs = 400 trainer = sgd.SGD( learning_rate=lr, batch_size=100, learning_rule=learning_rule.Momentum(.05), # Remember, default dropout is .5 #cost=Dropout(input_include_probs={'l1': .5}, # input_scales={'l1': 1.}), termination_criterion=EpochCounter(epochs), monitoring_dataset={ 'train': ds, 'valid': ds_test }) watcher = best_params.MonitorBasedSaveBest(channel_name='valid_roc_auc', save_path='saved_clf.pkl') velocity = learning_rule.MomentumAdjustor(final_momentum=.9, start=1, saturate=250) decay = sgd.LinearDecayOverEpoch(start=1, saturate=250, decay_factor=lr * .05) rocauc = roc_auc.RocAucChannel() experiment = Train(dataset=ds, model=ann, algorithm=trainer, extensions=[watcher, velocity, decay, rocauc]) experiment.main_loop()
input_space = Conv2DSpace(shape=(central_window_shape, central_window_shape), axes = axes, num_channels = 1) ann = mlp.MLP(layers, input_space=input_space) velocity = learning_rule.MomentumAdjustor(final_momentum=momentum_end, start=1, saturate=momentum_saturate) watcher = best_params.MonitorBasedSaveBest(channel_name='valid_y_nll', save_path=save_path) decay = sgd.LinearDecayOverEpoch(start=1, saturate=decay_saturate, decay_factor=decay_factor) ra = RealtimeAugment(window_shape=[img_dim, img_dim], randomize=[X_train, X_test], scale_diff=scale_diff, translation=translation, center_shape=center_shape, center=[X_train, X_test], preprocess=preprocess) train = Train(dataset=X_train, model=ann, algorithm=trainer, extensions=[watcher, velocity, decay, ra]) train.main_loop() print "using model", save_path model = serial.load(save_path) print "loading test set" for f_name_dir in os.walk("test"):
'train': trn}) preprocessor = Pipeline([GlobalContrastNormalization(scale=55.), ZCA()]) trn.apply_preprocessor(preprocessor=preprocessor, can_fit=True) tst.apply_preprocessor(preprocessor=preprocessor, can_fit=False) serial.save('kaggle_cifar10_preprocessor.pkl', preprocessor) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='kaggle_cifar10_maxout_zca.pkl') velocity = learning_rule.MomentumAdjustor(final_momentum=.65, start=1, saturate=250) decay = sgd.LinearDecayOverEpoch(start=1, saturate=500, decay_factor=.01) win = window_flip.WindowAndFlipC01B(pad_randomized=8, window_shape=(32, 32), randomize=[trn], center=[tst]) experiment = Train(dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, velocity, decay, win]) experiment.main_loop()
def supervisedLayerwisePRL(trainset, testset): ''' The supervised layerwise training as used in the PRL Paper. Input ------ trainset : A path to an hdf5 file created through h5py. testset : A path to an hdf5 file created through h5py. ''' batch_size = 100 # Both train and test h5py files are expected to have a 'topo_view' and 'y' # datasets side them corresponding to the 'b01c' data format as used in pylearn2 # and 'y' equivalent to the one hot encoded labels trn = HDF5Dataset(filename=trainset, topo_view='topo_view', y='y', load_all=False) tst = HDF5Dataset(filename=testset, topo_view='topo_view', y='y', load_all=False) ''' The 1st Convolution and Pooling Layers are added below. ''' h1 = mlp.ConvRectifiedLinear(layer_name='h1', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=171, irange=.005, max_col_norm=1.9365) layers = [h1, fc, output] mdl = mlp.MLP(layers, input_space=Conv2DSpace(shape=(70, 70), num_channels=1)) trainer = sgd.SGD( learning_rate=0.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts( costs=[Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005])]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best1.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop() del mdl mdl = serial.load('./Saved Models/conv_supervised_layerwise_best1.pkl') mdl = push_monitor(mdl, 'k') ''' The 2nd Convolution and Pooling Layers are added below. ''' h2 = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=171, irange=.005, max_col_norm=1.9365) del mdl.layers[-1] mdl.layer_names.remove('y') del mdl.layers[-1] mdl.layer_names.remove('fc') mdl.add_layers([h2, fc, output]) trainer = sgd.SGD(learning_rate=0.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts(costs=[ Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005]) ]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best2.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop() del mdl mdl = serial.load('./Saved Models/conv_supervised_layerwise_best2.pkl') mdl = push_monitor(mdl, 'l') ''' The 3rd Convolution and Pooling Layers are added below. ''' h3 = mlp.ConvRectifiedLinear(layer_name='h2', output_channels=64, irange=0.05, kernel_shape=[4, 4], pool_shape=[4, 4], pool_stride=[2, 2], max_kernel_norm=1.9365) fc = mlp.RectifiedLinear(layer_name='h3', dim=1500, irange=0.05) output = mlp.Softmax(layer_name='y', n_classes=10, irange=.005, max_col_norm=1.9365) del mdl.layers[-1] mdl.layer_names.remove('y') del mdl.layers[-1] mdl.layer_names.remove('fc') mdl.add_layers([h3, output]) trainer = sgd.SGD( learning_rate=.002, batch_size=batch_size, learning_rule=learning_rule.RMSProp(), cost=SumOfCosts(costs=[ Default(), WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005, 0.0005]) ]), train_iteration_mode='shuffled_sequential', monitor_iteration_mode='sequential', termination_criterion=EpochCounter(max_epochs=15), monitoring_dataset={ 'test': tst, 'valid': vld }) watcher = best_params.MonitorBasedSaveBest( channel_name='valid_y_misclass', save_path='./Saved Models/conv_supervised_layerwise_best3.pkl') decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1) experiment = Train( dataset=trn, model=mdl, algorithm=trainer, extensions=[watcher, decay], ) experiment.main_loop()