Ejemplo n.º 1
0
    def _create_trainer(self, dataset):
        sgd.log.setLevel(logging.WARNING)

        # Aggregate all the dropout parameters into shared dictionaries.
        probs, scales = {}, {}
        for l in [l for l in self.layers if l.dropout is not None]:
            incl = 1.0 - l.dropout
            probs[l.name] = incl
            scales[l.name] = 1.0 / incl

        if self.cost == "Dropout" or len(probs) > 0:
            # Use the globally specified dropout rate when there are no layer-specific ones.
            incl = 1.0 - self.dropout
            default_prob, default_scale = incl, 1.0 / incl

            # Pass all the parameters to pylearn2 as a custom cost function.
            self.cost = Dropout(default_input_include_prob=default_prob,
                                default_input_scale=default_scale,
                                input_include_probs=probs,
                                input_scales=scales)

        logging.getLogger('pylearn2.monitor').setLevel(logging.WARNING)
        if dataset is not None:
            termination_criterion = MonitorBased(channel_name='objective',
                                                 N=self.n_stable,
                                                 prop_decrease=self.f_stable)
        else:
            termination_criterion = None

        return sgd.SGD(cost=self.cost,
                       batch_size=self.batch_size,
                       learning_rule=self._learning_rule,
                       learning_rate=self.learning_rate,
                       termination_criterion=termination_criterion,
                       monitoring_dataset=dataset)
Ejemplo n.º 2
0
def get_trainer(model, trainset, validset, epochs=20, batch_size=200):
    monitoring_batches = None if validset is None else 20
    train_algo = SGD(batch_size=batch_size,
                     init_momentum=0.5,
                     learning_rate=0.1,
                     monitoring_batches=monitoring_batches,
                     monitoring_dataset=validset,
                     cost=Dropout(input_include_probs={
                         'h0': 0.8,
                         'h1': 0.8,
                         'h2': 0.8,
                         'h3': 0.8,
                         'y': 0.5
                     },
                                  input_scales={
                                      'h0': 1. / 0.8,
                                      'h1': 1. / 0.8,
                                      'h2': 1. / 0.8,
                                      'h3': 1. / 0.8,
                                      'y': 1. / 0.5
                                  },
                                  default_input_include_prob=0.5,
                                  default_input_scale=1. / 0.5),
                     termination_criterion=EpochCounter(epochs),
                     update_callbacks=ExponentialDecay(decay_factor=1.0001,
                                                       min_lr=0.001))
    return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \
            extensions=[MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs*0.8)), ])
Ejemplo n.º 3
0
    def __init__(self,
                 layers,
                 dropout=False,
                 input_scaler=None,
                 output_scaler=None,
                 learning_rate=0.01,
                 verbose=0):
        """

        :param layers: List of tuples of types of layers alongside the number of neurons
        :param learning_rate: The learning rate for all layers
        :param verbose: Verbosity level
        :return:
        """
        self.layers = layers
        self.ds = None
        self.f = None
        self.verbose = verbose
        cost = None
        if (dropout):
            cost = Dropout()
        self.trainer = sgd.SGD(learning_rate=learning_rate,
                               cost=cost,
                               batch_size=100)

        self.input_normaliser = input_scaler
        self.output_normaliser = output_scaler
Ejemplo n.º 4
0
def get_trainer(model, trainset, validset, epochs=20, batch_size=100):
    monitoring_batches = None if validset is None else 20
    train_algo = SGD(
        batch_size=batch_size,
        init_momentum=0.5,
        learning_rate=0.05,
        monitoring_batches=monitoring_batches,
        monitoring_dataset=validset,
        cost=Dropout(input_include_probs={'h0': 0.8},
                     input_scales={'h0': 1.},
                     default_input_include_prob=0.5,
                     default_input_scale=1. / 0.5),
        #termination_criterion = MonitorBased(channel_name='y_misclass', prop_decrease=0., N=50),
        termination_criterion=EpochCounter(epochs),
        update_callbacks=ExponentialDecay(decay_factor=1.00002, min_lr=0.0001))
    return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \
            extensions=[MomentumAdjustor(final_momentum=0.7, start=0, saturate=int(0.8*epochs))])
Ejemplo n.º 5
0
def get_layer_trainer_sgd(model, trainset):
    drop_cost = Dropout(input_include_probs={'h0': .4},
                        input_scales={'h0': 1.})

    # configs on sgd
    train_algo = SGD(train_iteration_mode='batchwise_shuffled_equential',
                     learning_rate=0.2,
                     cost=drop_cost,
                     monitoring_dataset=trainset,
                     termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS),
                     update_callbacks=None)

    extensions = [
        MonitorBasedSaveBest(channel_name="y_kl",
                             save_path="./convnet_test_best.pkl")
    ]

    return Train(model=model,
                 algorithm=train_algo,
                 extensions=extensions,
                 dataset=trainset)
Ejemplo n.º 6
0
def get_finetuner(model, trainset, batch_size=100, epochs=100):
    train_algo = SGD(batch_size=batch_size,
                     learning_rule=Momentum(init_momentum=0.5),
                     learning_rate=0.5,
                     monitoring_batches=batch_size,
                     monitoring_dataset=trainset,
                     cost=Dropout(input_include_probs={'h0': .5},
                                  input_scales={'h0': 2.}),
                     termination_criterion=EpochCounter(epochs))
    path = DATA_DIR + 'model' + str(SUBMODEL) + 'saved_daex.pkl'
    return Train(model=model,
                 algorithm=train_algo,
                 dataset=trainset,
                 save_path=path,
                 save_freq=10,
                 extensions=[
                     MomentumAdjustor(final_momentum=0.9,
                                      start=0,
                                      saturate=int(epochs * 0.8)),
                     LinearDecayOverEpoch(start=1,
                                          saturate=int(epochs * 0.7),
                                          decay_factor=.02)
                 ])
Ejemplo n.º 7
0
def get_trainer2(model, trainset, epochs=50):
    train_algo = SGD(
        batch_size=bsize,
        learning_rate=0.5,
        learning_rule=Momentum(init_momentum=0.5),
        monitoring_batches=bsize,
        monitoring_dataset=trainset,
        cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}),
        termination_criterion=EpochCounter(epochs),
    )
    path = DATA_DIR + 'model2saved_conv.pkl'
    return Train(model=model,
                 algorithm=train_algo,
                 dataset=trainset,
                 save_path=path,
                 save_freq=1,
                 extensions=[
                     MomentumAdjustor(final_momentum=0.7,
                                      start=0,
                                      saturate=int(epochs * 0.5)),
                     LinearDecayOverEpoch(start=1,
                                          saturate=int(epochs * 0.8),
                                          decay_factor=.01)
                 ])
Ejemplo n.º 8
0
    
    images_train = images[train_index]
    y_train = y[train_index]
    images_train, y_train = shuffle(images_train, y_train, random_state=7)
    X_train = DenseDesignMatrix(X=images_train, y=y_train,view_converter=view_converter)
    
    images_test = images[test_index]
    y_test = y[test_index]
    X_test = DenseDesignMatrix(X=images_test, y=y_test,view_converter=view_converter)
            
    if retrain:
        print "training on", X_train.X.shape, 'testing on', X_test.X.shape
        trainer = sgd.SGD(learning_rate=learn_rate, batch_size=batch_size,
                          learning_rule=learning_rule.Momentum(momentum_start),
                          cost=Dropout(
                                       input_include_probs={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.},
                                       input_scales={'l1':1., 'l2':1., 'l3':1., 'l4':1., 'l5':1., 'l6':1.}
                                       ),
                          termination_criterion=EpochCounter(max_epochs=max_epochs),
                          monitoring_dataset={'train':X_train, 'valid':X_test},
                          )
        
        
        input_space = Conv2DSpace(shape=(central_window_shape, central_window_shape),
                    axes = axes,
                    num_channels = 1)
                    
        ann = mlp.MLP(layers, input_space=input_space)

        velocity = learning_rule.MomentumAdjustor(final_momentum=momentum_end,
                                          start=1,
                                          saturate=momentum_saturate)
Ejemplo n.º 9
0
output = mlp.Softmax(layer_name='y',
                     n_classes=10,
                     irange=.005,
                     max_col_norm=1.9365)

layers = [l1, l2, l3, l4, output]

mdl = mlp.MLP(layers,
              input_space=in_space)

trainer = sgd.SGD(learning_rate=.17,
                  batch_size=128,
                  learning_rule=learning_rule.Momentum(.5),
                  # Remember, default dropout is .5
                  cost=Dropout(input_include_probs={'l1': .8},
                               input_scales={'l1': 1.}),
                  termination_criterion=EpochCounter(max_epochs=475),
                  monitoring_dataset={'valid': tst,
                                      'train': trn})

preprocessor = Pipeline([GlobalContrastNormalization(scale=55.), ZCA()])
trn.apply_preprocessor(preprocessor=preprocessor, can_fit=True)
tst.apply_preprocessor(preprocessor=preprocessor, can_fit=False)
serial.save('kaggle_cifar10_preprocessor.pkl', preprocessor)

watcher = best_params.MonitorBasedSaveBest(
    channel_name='valid_y_misclass',
    save_path='kaggle_cifar10_maxout_zca.pkl')

velocity = learning_rule.MomentumAdjustor(final_momentum=.65,
                                          start=1,
Ejemplo n.º 10
0
def main( x ):

	l1_dim = x[0]
	l2_dim = x[1]
	learning_rate = x[2]
	momentum = x[3]
	l1_dropout = x[4]
	decay_factor = x[5]
	
	min_lr = 1e-7

	#

	train = np.loadtxt( train_file, delimiter = ',' )
	x_train = train[:,0:-1]
	y_train = train[:,-1]
	y_train.shape = ( y_train.shape[0], 1 )

	# 

	validation = np.loadtxt( validation_file, delimiter = ',' )
	x_valid = validation[:,0:-1]
	y_valid = validation[:,-1]
	y_valid.shape = ( y_valid.shape[0], 1 )

	#

	#input_space = VectorSpace( dim = x.shape[1] )
	full = DenseDesignMatrix( X = x_train, y = y_train )
	valid = DenseDesignMatrix( X = x_valid, y = y_valid )

	l1 = mlp.RectifiedLinear( 
		layer_name='l1',
		irange=.001,
		dim = l1_dim,
		# "Rather than using weight decay, we constrain the norms of the weight vectors"
		max_col_norm=1.
	)

	l2 = mlp.RectifiedLinear(
		layer_name='l2',
		irange=.001,
		dim = l2_dim,
		max_col_norm=1.
	)

	output = mlp.Linear( dim = 1, layer_name='y', irange=.0001 )

	layers = [l1, l2, output]
	nvis = x_train.shape[1]

	mdl = mlp.MLP( layers, nvis = nvis )	# input_space = input_space

	#lr = .001
	#epochs = 100
	
	decay = sgd.ExponentialDecay( decay_factor = decay_factor, min_lr = min_lr )

	trainer = sgd.SGD(
		learning_rate = learning_rate,
		batch_size=128,
		learning_rule=learning_rule.Momentum( momentum ),
		
		update_callbacks = [ decay ],

		# Remember, default dropout is .5
		cost = Dropout( input_include_probs = {'l1': l1_dropout},
				   input_scales={'l1': 1.}),

		#termination_criterion = EpochCounter(epochs),
		termination_criterion = MonitorBased(
			channel_name = "valid_objective",
			prop_decrease = 0.001,				# 0.1% of objective
			N = 10	
		),

		# valid_objective is MSE

		monitoring_dataset = { 'train': full, 'valid': valid }
	)

	watcher = best_params.MonitorBasedSaveBest( channel_name = 'valid_objective', save_path = output_model_file )
	
	experiment = Train( dataset = full, model = mdl, algorithm = trainer, extensions = [ watcher ] )
	experiment.main_loop()

	###

	error = get_error_from_model( output_model_file )
	print "*** error: {} ***".format( error )
	return error
	
	
Ejemplo n.º 11
0
 def get_cost_fn(self):
     if self.model_params.get('dropout'):
         return Dropout()
     return None
Ejemplo n.º 12
0
 def get_cost_fn(self):
     return Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.})
Ejemplo n.º 13
0
 def get_cost_fn(self):
     return Dropout()
Ejemplo n.º 14
0
          nvis=26);
          
print "[MESSAGE] The model is built";

### build algorithm

algorithm=SGD(batch_size=100,
              learning_rate=0.05,
              monitoring_dataset={'train':valid_data,
                                  'valid':valid_data,
                                  'test':test_data},
              termination_criterion=Or(criteria=[MonitorBased(channel_name="valid_objective",
                                                              prop_decrease=0.00001,
                                                              N=40),
                                                 EpochCounter(max_epochs=200)]),
              cost = Dropout(input_include_probs={'hidden_0':1., 'hidden_1':1., 'y':0.5},
                             input_scales={ 'hidden_0': 1., 'hidden_1':1., 'y':2.}),
              update_callbacks=ExponentialDecay(decay_factor=1.0000003, 
                                                min_lr=.000001));
                                                
print "[MESSAGE] Training algorithm is built";
                              
### build training

idpath = os.path.splitext(os.path.abspath(__file__))[0]; # ID for output files.
save_path = idpath + '.pkl';

train=Train(dataset=train_data,
            model=model,
            algorithm=algorithm,
            save_path=save_path,
            save_freq=100);
def get_layer_MLP():
    
    extraset = BlackBoxDataset( which_set = 'extra')
    
    processor = Standardize();
    
    processor.apply(extraset,can_fit=True)
    
    trainset = BlackBoxDataset( which_set = 'train',
                                start = 0,
                                stop = 900,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = True,
                                )
    
    validset = BlackBoxDataset( which_set = 'train',
                                start = 900,
                                stop = 1000 ,
                                preprocessor = processor,
                                fit_preprocessor = True,
                                fit_test_preprocessor = False,
                                )
    
    dropCfg = { 'input_include_probs': { 'h0' : .8 } ,
                'input_scales': { 'h0': 1.}
              }
    
    config = { 'learning_rate': .05,
                'init_momentum': .00,
                'cost' : Dropout(**dropCfg), 
                'monitoring_dataset':  { 'train' : trainset,
                                         'valid' : validset
                                        },
                'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0),
                'update_callbacks': None
              }
     
    config0 = {
                'layer_name': 'h0',
                'num_units': 1875,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    config1 = {
                'layer_name': 'h1',
                'num_units': 700,
                'num_pieces': 2,
                'irange': .05,
                # Rather than using weight decay, we constrain the norms of the weight vectors
                'max_col_norm': 2.
    }
    
    sftmaxCfg = {
                'layer_name': 'y',
                'init_bias_target_marginals': trainset,
                # Initialize the weights to all 0s
                'irange': .0,
                'n_classes': 9
            }
    
    l1 = Maxout(**config0)
    l2 = Maxout(**config1)
    l3 = Softmax(**sftmaxCfg)

    train_algo = SGD(**config)
    model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875)
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None, 
            save_path = "maxout_best_model.pkl",
            save_freq = 1)
Ejemplo n.º 16
0
                      irange=ir,
                      dim=dim,
                      max_col_norm=1.)
 l3 = RectifiedLinear(layer_name='l3',
                      irange=ir,
                      dim=dim,
                      max_col_norm=1.)
 output = Softmax(layer_name='y',
                  n_classes=9,
                  irange=ir,
                  max_col_norm=mcn_out)
 mdl = MLP([l1, l2, l3, output], nvis=X2.shape[1])
 trainer = sgd.SGD(learning_rate=lr,
                   batch_size=bs,
                   learning_rule=learning_rule.Momentum(mm),
                   cost=Dropout(default_input_include_prob=ip,
                                default_input_scale=1 / ip),
                   termination_criterion=EpochCounter(epochs),
                   seed=seed)
 decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor=.1)
 experiment = Train(dataset=training,
                    model=mdl,
                    algorithm=trainer,
                    extensions=[decay])
 experiment.main_loop()
 epochs_current = epochs
 for s in range(n_add):
     trainer = sgd.SGD(learning_rate=lr * .1,
                       batch_size=bs,
                       learning_rule=learning_rule.Momentum(mm),
                       cost=Dropout(default_input_include_prob=ip,
                                    default_input_scale=1 / ip),
Ejemplo n.º 17
0
 seed = i + 3819
 R = RImatrix(X.shape[1], m, k, rm_dup_cols = True, seed = seed)
 R = np.abs(R.todense().astype(np.float32))
 dim1 = R.shape[1]
 l1 = RectifiedLinear(layer_name='l1', irange = ir1, dim = dim1, mask_weights = R)
 l2 = RectifiedLinear(layer_name='l2', irange = ir2, dim = dim2, max_col_norm = 1.)
 l3 = RectifiedLinear(layer_name='l3', irange = ir2, dim = dim2, max_col_norm = 1.)
 l4 = RectifiedLinear(layer_name='l4', irange = ir2, dim = dim2, max_col_norm = 1.)
 output = Softmax(layer_name='y', n_classes = 9, irange = ir_out,
                  max_col_norm = mcn_out)
 mdl = MLP([l1, l2, l3, l4, output], nvis = X2.shape[1])
 trainer = sgd.SGD(learning_rate=lr,
                   batch_size=bs,
                   learning_rule=learning_rule.Momentum(mm),
                   cost=Dropout(input_include_probs = {'l1':1.},
                                input_scales = {'l1':1.},
                                default_input_include_prob=ip,
                                default_input_scale=1/ip),
                   termination_criterion=EpochCounter(epochs),seed = seed)
 decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor= .1)
 experiment = Train(dataset = training, model=mdl, algorithm=trainer, extensions=[decay])
 experiment.main_loop()
 epochs_current = epochs
 for s in range(n_add):
     del mdl.monitor
     trainer = sgd.SGD(learning_rate=lr * .1,
                       batch_size=bs,
                       learning_rule=learning_rule.Momentum(mm),
                       cost=Dropout(input_include_probs = {'l1':1.},
                                    input_scales = {'l1':1.},
                                    default_input_include_prob=ip,
                                    default_input_scale=1/ip),
def get_layer_MLP(layers,trainset,validset):
    
    #processor = Standardize();
    
#    trainset = BlackBoxDataset( which_set = 'train',
#                                start = 0,
#                                stop = 900,
#                                preprocessor = Standardize(),
#                                fit_preprocessor = True,
#                                fit_test_preprocessor = True,
#                                )
#    
#    validset = BlackBoxDataset( which_set = 'train',
#                                start = 900,
#                                stop = 1000 ,
#                                preprocessor = Standardize(),
#                                fit_preprocessor = True,
#                                fit_test_preprocessor = False,
#                                )
    
    dropCfg = { 'input_include_probs': { 'h0' : .8 } ,
                'input_scales': { 'h0': 1.}
              }
    
    config = { 'learning_rate': .05,
                'init_momentum': .00,
                'cost' :  Dropout(**dropCfg), #Default()
                'monitoring_dataset':  { 'train' : trainset,
                                         'valid' : validset
                                        },
                'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=50,prop_decrease=0),
                'update_callbacks': None
              }
     
#    configCfg0 = {'layer_name' : 'h0',
#                'dim' : 1875,
#                'irange' : .05,
#                # Rather than using weight decay, we constrain the norms of the weight vectors
#                 'max_col_norm' : 1.}
#    
#    configCfg1 = {'layer_name' : 'h1',
#                'dim' : 1875,
#                'irange' : .05,
#                # Rather than using weight decay, we constrain the norms of the weight vectors
#                 'max_col_norm' : 1.}
    
    sftmaxCfg = {
                'layer_name': 'y',
                'init_bias_target_marginals': trainset,
                # Initialize the weights to all 0s
                'irange': .0,
                'n_classes': 9
            }
    
    layers.append(Softmax(**sftmaxCfg)) 

    train_algo = SGD(**config)
    model = MLP(batch_size=50,layers=layers,nvis=1875)
    return Train(model = model,
            dataset = trainset,
            algorithm = train_algo,
            extensions = None, #[LinearDecayOverEpoch(start= 5, saturate= 100, decay_factor= .01)], 
            save_path = "sae_2_best_model.pkl",
            save_freq = 100)
Ejemplo n.º 19
0
 
y = Softmax(n_classes = 2,
            layer_name = "y",
            irange = 0.1)

inputSpace = Conv2DSpace(shape = [cropSize,cropSize],
                         num_channels = 3)
  
model = MLP(layers = [h0, h1, y], 
            batch_size = batchSize, 
            input_space = inputSpace)
 
algorithm = SGD(learning_rate = 1E-3, 
                cost = SumOfCosts([
                                   MethodCost("cost_from_X"),
                                   Dropout(default_input_include_prob = 0.25,
                                           default_input_scale = 1.3333)
                                  ]), 
                batch_size = batchSize, 
                monitoring_batch_size = batchSize,
                monitoring_dataset = {'train': train,
                                      'valid':valid}, 
                monitor_iteration_mode = "even_batchwise_shuffled_sequential", 
                termination_criterion = EpochCounter(max_epochs = 200),
                learning_rule = Momentum(init_momentum = 0.0),
                train_iteration_mode = "even_batchwise_shuffled_sequential")
     
train = Train(dataset = train, 
              model = model, 
              algorithm = algorithm, 
              save_path = "ConvNet8.pkl", 
              save_freq = 1, 
Ejemplo n.º 20
0
                      irange=ir,
                      dim=dim,
                      max_col_norm=1.)
 l3 = RectifiedLinear(layer_name='l3',
                      irange=ir,
                      dim=dim,
                      max_col_norm=1.)
 output = Softmax(layer_name='y',
                  n_classes=9,
                  irange=ir,
                  max_col_norm=mcn_out)
 mdl = MLP([l1, l2, l3, output], nvis=X2.shape[1])
 trainer = sgd.SGD(learning_rate=lr,
                   batch_size=bs,
                   learning_rule=learning_rule.Momentum(mm),
                   cost=Dropout(default_input_include_prob=ip,
                                default_input_scale=1 / ip),
                   termination_criterion=EpochCounter(epochs),
                   seed=seed)
 decay = sgd.LinearDecayOverEpoch(start=2, saturate=20, decay_factor=.1)
 #fname = path + 'model/TRI_' + 'kmax_'+ str(k_max) + '_seed_' + str(seed) + '.pkl'
 experiment = Train(dataset=training,
                    model=mdl,
                    algorithm=trainer,
                    extensions=[decay])
 #                   save_path = fname, save_freq = epochs)
 experiment.main_loop()
 pred_train = predict(mdl, X2[:num_train].astype(np.float32))
 pred_test = predict(mdl, X2[num_train:].astype(np.float32))
 predAll_train += pred_train
 predAll_test += pred_test
 sc1 = log_loss(yMat, pred_train)
Ejemplo n.º 21
0
h1 = mlp.Softplus(layer_name='h1', dim=60, sparse_init=0)
y0 = mlp.Softmax(layer_name='y0', n_classes=5, irange=0)
layers = [h0, h1, y0]

model = mlp.MLP(layers, nvis=train.X.shape[1])

monitoring = dict(valid=valid)
termination = MonitorBased(channel_name="valid_y0_misclass", N=5)
extensions = [
    best_params.MonitorBasedSaveBest(channel_name="valid_y0_misclass",
                                     save_path="train_best.pkl")
]

algorithm = sgd.SGD(0.1,
                    batch_size=100,
                    cost=Dropout(),
                    monitoring_dataset=monitoring,
                    termination_criterion=termination)

print 'Running training'
train_job = Train(train,
                  model,
                  algorithm,
                  extensions=extensions,
                  save_path="train.pkl",
                  save_freq=1)
train_job.main_loop()

# Rectified Linear with Momentum

from pylearn2.training_algorithms import sgd, learning_rule