def get_layer_maxout(self, layer_id, layer_name): row = self.db.executeSQL( """ SELECT num_units,num_pieces,pool_stride,randomize_pools,irange, sparse_init,sparse_stdev,include_prob,init_bias,W_lr_scale, b_lr_scale,max_col_norm,max_row_norm FROM hps3.layer_maxout WHERE layer_id = %s """, (layer_id, ), self.db.FETCH_ONE) if not row or row is None: raise HPSData("No maxout layer for layer_id=" + str(layer_id)) (num_units,num_pieces,pool_stride,randomize_pools,irange, sparse_init,sparse_stdev,include_prob,init_bias,W_lr_scale, b_lr_scale,max_col_norm, max_row_norm) \ = row return Maxout(num_units=num_units, num_pieces=num_pieces, pool_stride=pool_stride, layer_name=layer_name, randomize_pools=randomize_pools, irange=irange, sparse_init=sparse_init, sparse_stdev=sparse_stdev, include_prob=include_prob, init_bias=init_bias, W_lr_scale=W_lr_scale, b_lr_scale=b_lr_scale, max_col_norm=max_col_norm, max_row_norm=max_row_norm)
def test_convnet(): layers = [] dataset = get_dataset() input_space = Conv2DSpace(shape=[256, 256], num_channels=1) conv_layer = ConvRectifiedLinear(output_channels=12, irange=.005, layer_name="h0", kernel_shape=[88, 88], kernel_stride=[8, 8], pool_shape=[1, 1], pool_stride=[1, 1], max_kernel_norm=1.932) layers.append(conv_layer) maxout_layer = Maxout(layer_name="h1", irange=.005, num_units=600, num_pieces=4, max_col_norm=1.932) layers.append(maxout_layer) sigmoid_layer = Sigmoid(layer_name="y", dim=484, monitor_style="detection", irange=.005) layers.append(sigmoid_layer) model = MLP(batch_size=100, layers=layers, input_space=input_space) trainer = get_layer_trainer_sgd(model, dataset) trainer.main_loop()
def test_min_zero(): """ This test guards against a bug where the size of the zero buffer used with the min_zero flag was specified to have the wrong size. The bug only manifested when compiled with optimizations off, because the optimizations discard information about the size of the zero buffer. """ mlp = MLP(input_space=VectorSpace(1), layers= [Maxout(layer_name="test_layer", num_units=1, num_pieces = 2, irange=.05, min_zero=True)]) X = T.matrix() output = mlp.fprop(X) # Compile in debug mode so we don't optimize out the size of the buffer # of zeros f = function([X], output, mode="DEBUG_MODE") f(np.zeros((1, 1)).astype(X.dtype))
def generateNonConvRegressor(teacher_hintlayer, student_output_space): dim = teacher_hintlayer.output_space.get_total_dimension() layer_name = 'hint_regressor' irng = 0.05 mcn = 0.9 if isinstance(teacher_hintlayer, MaxoutConvC01B): hint_reg_layer = Maxout(layer_name, dim, teacher_hintlayer.num_pieces, irange=irng, max_col_norm=mcn) elif isinstance(teacher_hintlayer, ConvRectifiedLinear): hint_reg_layer = RectifiedLinear(dim=dim, layer_name=layer_name, irange=irng, max_col_norm=mcn) elif isinstance(teacher_hintlayer, ConvElemwise) or isinstance( teacher_hintlayer, ConvElemwisePL2): if isinstance(teacher_hintlayer.nonlinearity, RectifierConvNonlinearity): hint_reg_layer = RectifiedLinear(dim=dim, layer_name=layer_name, irange=irng, max_col_norm=mcn) elif isinstance(teacher_hintlayer.nonlinearity, SigmoidConvNonlinearity): hint_reg_layer = Sigmoid(dim=dim, layer_name=layer_name, irange=irng, max_col_norm=mcn) elif isinstance(teacher_hintlayer.nonlinearity, TanhConvNonlinearity): hint_reg_layer = Tanh(dim=dim, layer_name=layer_name, irange=irng, max_col_norm=mcn) else: raise AssertionError("Unknown layer type") else: raise AssertionError("Unknown fully-connected layer type") return hint_reg_layer
def get_convnet(img_shape=[256, 256], output_channels=16, kernel_shape=[88, 88], kernel_stride=[8, 8]): layers = [] dataset = get_dataset() input_space = Conv2DSpace(shape=img_shape, num_channels=1) conv_layer = ConvRectifiedLinear(output_channels=output_channels, irange=.005, layer_name="h0", kernel_shape=kernel_shape, kernel_stride=kernel_stride, pool_shape=[1, 1], pool_stride=[1, 1], max_kernel_norm=1.932) layers.append(conv_layer) maxout_layer = Maxout(layer_name="h1", irange=.005, num_units=600, num_pieces=4, max_col_norm=1.932) layers.append(maxout_layer) conv_out_dim = ((img_shape[0] - kernel_shape[0]) / kernel_stride[0] + 1)**2 sigmoid_layer = Sigmoid(layer_name="y", dim=conv_out_dim, monitor_style="detection", irange=.005) layers.append(sigmoid_layer) model = MLP(batch_size=100, layers=layers, input_space=input_space) return model
def main(): #creating layers #2 convolutional rectified layers, border mode valid batch_size = 48 lr = 1.0 #0.1/4 finMomentum = 0.9 maxout_units = 2000 num_pcs = 4 lay1_reg = lay2_reg = maxout_reg = None #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib' #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib' #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib' #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib' save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib' best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib' #numBatches = 400000/batch_size ''' print 'Applying preprocessing' ddmTrain = EmotiwKeypoints(start=0, stop =40000) ddmValid = EmotiwKeypoints(start=40000, stop = 44000) ddmTest = EmotiwKeypoints(start=44000) stndrdz = preprocessing.Standardize() stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train') stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val') stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test') GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000) GCN.apply(ddmTrain, can_fit =True, name = 'train') GCN.apply(ddmValid, can_fit =False, name = 'val') GCN.apply(ddmTest, can_fit = False, name = 'test') return ''' ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train') ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid') #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train') layer1 = ConvRectifiedLinear(layer_name = 'convRect1', output_channels = 64, irange = .05, kernel_shape = [5, 5], pool_shape = [4, 4], pool_stride = [2, 2], W_lr_scale = 0.1, max_kernel_norm = lay1_reg) layer2 = ConvRectifiedLinear(layer_name = 'convRect2', output_channels = 128, irange = .05, kernel_shape = [5, 5], pool_shape = [3, 3], pool_stride = [2, 2], W_lr_scale = 0.1, max_kernel_norm = lay2_reg) # Rectified linear units #layer3 = RectifiedLinear(dim = 3000, # sparse_init = 15, # layer_name = 'RectLin3') #Maxout layer maxout = Maxout(layer_name= 'maxout', irange= .005, num_units= maxout_units, num_pieces= num_pcs, W_lr_scale = 0.1, max_col_norm= maxout_reg) #multisoftmax n_groups = 196 n_classes = 96 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name) #setting up MLP MLPerc = MLP(batch_size = batch_size, input_space = Conv2DSpace(shape = [96, 96], num_channels = 3, axes=('b', 0, 1, 'c')), layers = [ layer1, layer2, maxout, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value ) mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. }) #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 }, # input_scales= { 'convRect1': 1. }) #algorithm monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain} term_crit = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective') kp_ada = KeypointADADELTA(decay_factor = 0.95, #init_momentum = 0.5, monitoring_dataset = monitoring_dataset, batch_size = batch_size, termination_criterion = term_crit, cost = mlp_cost) #train extension #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001) #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01) #train_ext = ADADELTA(0.95) #train object train = Train(dataset = ddmTrain, save_path= save_path, save_freq=10, model = MLPerc, algorithm= kp_ada, extensions = [#train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path= best_path)#, # MomentumAdjustor(start = 1,# # saturate = 25, # final_momentum = finMomentum) ] ) train.main_loop() train.save()
def main(): #creating layers #2 convolutional rectified layers, border mode valid batch_size = params.batch_size lr = params.lr finMomentum = params.momentum maxout_units = params.units num_pcs = params.pieces lay1_reg = lay2_reg = maxout_reg = params.norm_reg #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib' #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib' save_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[ 1] + '.joblib' best_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[ 1] + 'best.joblib' numBatches = 400000 / batch_size from emotiw.common.datasets.faces.EmotiwKeypoints import EmotiwKeypoints ''' print 'Applying preprocessing' ddmTrain = EmotiwKeypoints(start=0, stop =40000) ddmValid = EmotiwKeypoints(start=40000, stop = 44000) ddmTest = EmotiwKeypoints(start=44000) stndrdz = preprocessing.Standardize() stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train') stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val') stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test') GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000) GCN.apply(ddmTrain, can_fit =True, name = 'train') GCN.apply(ddmValid, can_fit =False, name = 'val') GCN.apply(ddmTest, can_fit = False, name = 'test') return ''' ddmTrain = EmotiwKeypoints(hack='train', preproc='STD') ddmValid = EmotiwKeypoints(hack='val', preproc='STD') layer1 = ConvRectifiedLinear(layer_name='convRect1', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=0.1, max_kernel_norm=lay1_reg) layer2 = ConvRectifiedLinear(layer_name='convRect2', output_channels=128, irange=.05, kernel_shape=[5, 5], pool_shape=[3, 3], pool_stride=[2, 2], W_lr_scale=0.1, max_kernel_norm=lay2_reg) # Rectified linear units #layer3 = RectifiedLinear(dim = 3000, # sparse_init = 15, # layer_name = 'RectLin3') #Maxout layer maxout = Maxout(layer_name='maxout', irange=.005, num_units=maxout_units, num_pieces=num_pcs, W_lr_scale=0.1, max_col_norm=maxout_reg) #multisoftmax n_groups = 196 n_classes = 96 irange = 0 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups, irange=0.05, n_classes=n_classes, layer_name=layer_name) #setting up MLP MLPerc = MLP(batch_size=batch_size, input_space=Conv2DSpace(shape=[96, 96], num_channels=3), layers=[layer1, layer2, maxout, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value) mlp_cost.setup_dropout(input_include_probs={'convRect1': 1.0}, input_scales={'convRect1': 1.}) #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 }, # input_scales= { 'convRect1': 1. }) #algorithm monitoring_dataset = {'validation': ddmValid} term_crit = MonitorBased(prop_decrease=1e-7, N=100, channel_name='validation_objective') kpSGD = KeypointSGD(learning_rate=lr, init_momentum=0.5, monitoring_dataset=monitoring_dataset, batch_size=batch_size, termination_criterion=term_crit, cost=mlp_cost) #train extension #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001) train_ext = LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01) #train object train = Train(dataset=ddmTrain, save_path=save_path, save_freq=10, model=MLPerc, algorithm=kpSGD, extensions=[ train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path=best_path), MomentumAdjustor(start=1, saturate=25, final_momentum=finMomentum) ]) train.main_loop() train.save()
kernel_shape=[3,3], pool_shape=[2,2], pool_stride=[2,2], max_kernel_norm= 1.9365, irange=.025) l5 = MaxoutConvC01B(layer_name='l5', tied_b=1, num_channels=256, num_pieces=2, pad=2, kernel_shape=[3,3], pool_shape=[2,2], pool_stride=[2,2], max_kernel_norm= 1.9365, irange=.025) l6 = MaxoutConvC01B(layer_name='l6', tied_b=1, num_channels=256, num_pieces=2, pad=2, kernel_shape=[3,3], pool_shape=[2,2], pool_stride=[2,2], max_kernel_norm= 1.9365, irange=.025) #dense layers l7 = Maxout(layer_name='l7', num_units=1024, num_pieces=2, irange=.025) l8 = Maxout(layer_name='l8', num_units=2048, num_pieces=2, irange=.025) output_layer = mlp.Softmax(layer_name='y', n_classes=121, irange=.01) layers = [l1,l2,l3,l4,l5, l6,l7, l8, output_layer] images = [] y = [] file_names = [] dimensions = [] train_labels = [x for x in os.listdir("train") if os.path.isdir("{0}{1}{2}".format("train", os.sep, x))] train_directories = ["{0}{1}{2}".format("train", os.sep, x) for x in train_labels] train_labels, train_directories = zip(*sorted(zip(train_labels, train_directories), key=lambda x: x[0])) for idx, folder in enumerate(train_directories):
def get_maxout(dim_input): config = { 'batch_size': bsize, 'input_space': Conv2DSpace(shape=dim_input[:2], num_channels=dim_input[2], axes=['c', 0, 1, 'b']), 'layers': [ MaxoutConvC01B(layer_name='h0', num_channels=96, num_pieces=2, irange=.005, tied_b=1, max_kernel_norm=.9, kernel_shape=[8, 8], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=.05, b_lr_scale=.05), MaxoutConvC01B(layer_name='h1', num_channels=128, num_pieces=2, irange=.005, tied_b=1, max_kernel_norm=0.9, kernel_shape=[7, 7], pad=3, pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=.05, b_lr_scale=.05), MaxoutConvC01B(layer_name='h2', num_channels=160, num_pieces=3, irange=.005, tied_b=1, max_kernel_norm=0.9, kernel_shape=[6, 6], pad=2, pool_shape=[2, 2], pool_stride=[2, 2], W_lr_scale=.05, b_lr_scale=.05), MaxoutConvC01B(layer_name='h3', num_channels=192, num_pieces=4, irange=.005, tied_b=1, max_kernel_norm=0.9, kernel_shape=[5, 5], pad=1, pool_shape=[2, 2], pool_stride=[2, 2], W_lr_scale=.05, b_lr_scale=.05), Maxout(layer_name='h4', irange=.005, num_units=500, num_pieces=5, max_col_norm=1.9), Softmax(layer_name='y', n_classes=nclass, irange=.005, max_col_norm=1.9) ] } return MLP(**config)
def test_works(): load = True if load == False: ddmTrain = FacialKeypoint(which_set = 'train', start=0, stop =6000) ddmValid = FacialKeypoint(which_set = 'train', start=6000, stop = 7049) ddmTest = FacialKeypoint(which_set = 'test') # valid can_fit = false pipeline = preprocessing.Pipeline() stndrdz = preprocessing.Standardize() stndrdz.apply(ddmTrain, can_fit=True) #doubt, how about can_fit = False? stndrdz.apply(ddmValid, can_fit=False) stndrdz.apply(ddmTest, can_fit=False) GCN = preprocessing.GlobalContrastNormalization() GCN.apply(ddmTrain, can_fit =True) GCN.apply(ddmValid, can_fit =False) GCN.apply(ddmTest, can_fit =False) pcklFile = open('kpd.pkl', 'wb') obj = (ddmTrain, ddmValid, ddmTest, GCN, stndrdz) pickle.dump(obj, pcklFile) pcklFile.close() return else: pcklFile = open('kpd.pkl', 'rb') (ddmTrain, ddmValid, ddmTest, GCN, stndrdz) = pickle.load(pcklFile) pcklFile.close() batch_size = 8 print 'going to compute test error' generateTest(ddmTrain, 'kpd_maxout_best.pkl', 'output_maxout2pcs.csv') return #creating layers #2 convolutional rectified layers, border mode valid layer1 = ConvRectifiedLinear(layer_name = 'convRect1', output_channels = 64, irange = .05, kernel_shape = [5, 5], pool_shape = [3, 3], pool_stride = [2, 2], max_kernel_norm = 1.9365) layer2 = ConvRectifiedLinear(layer_name = 'convRect2', output_channels = 64, irange = .05, kernel_shape = [5, 5], pool_shape = [3, 3], pool_stride = [2, 2], max_kernel_norm = 1.9365) # Rectified linear units #layer3 = RectifiedLinear(dim = 3000, # sparse_init = 15, # layer_name = 'RectLin3') #Maxout layer maxout = Maxout(layer_name= 'maxout', irange= .005, num_units= 2000, num_pieces= 2, max_col_norm= 1.9) #multisoftmax n_groups = 30 n_classes = 98 irange = 0 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name) #setting up MLP MLPerc = MLP(batch_size = batch_size, input_space = Conv2DSpace(shape = [96, 96], num_channels = 1), layers = [ layer1, layer2, maxout, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value ) mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : .8 }, input_scales= { 'convRect1': 1. }) #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 }, # input_scales= { 'convRect1': 1. }) #algorithm # learning rate, momentum, batch size, monitoring dataset, cost, termination criteria #monitoring_dataset = {'validation':ddmValid, 'training': ddmTrain} term_crit = MonitorBased(prop_decrease = 0.00001, N = 30, channel_name = 'validation_objective') kpSGD = KeypointSGD(learning_rate = 0.001, init_momentum = 0.5, monitoring_dataset = {'validation':ddmValid, 'training': ddmTrain}, batch_size = batch_size, batches_per_iter = 750, termination_criterion = term_crit, cost = mlp_cost) #train extension train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.01) #train object train = Train(dataset = ddmTrain, save_path='kpd_model2pcs_maxout.pkl', save_freq=3, model = MLPerc, algorithm= kpSGD, extensions = [train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path= 'kpd_maxout2pcs_best.pkl'), MomentumAdjustor(start = 1, saturate = 20, final_momentum = .9)] ) train.main_loop() train.save()
max_kernel_norm=.9, W_lr_scale=0.5, b_lr_scale=0.5), MaxoutConvC01B(layer_name='h1', pad=0, num_channels=64, num_pieces=2, kernel_shape=[8, 8], pool_shape=[3, 3], pool_stride=[2, 2], irange=.005, max_kernel_norm=.9, W_lr_scale=0.5, b_lr_scale=0.5), Maxout(layer_name='h2', num_units=last_ndim, num_pieces=2, irange=.005), Softmax(max_col_norm=1.9365, layer_name='y', n_classes=n_classes, sparse_init=23) ]) train = Train(dataset=train_ds, model=model, algorithm=algorithm, extensions=extensions, save_path=save_path, save_freq=save_freq) train.main_loop()
def get_layer_MLP(): extraset = BlackBoxDataset( which_set = 'extra') processor = Standardize(); processor.apply(extraset,can_fit=True) trainset = BlackBoxDataset( which_set = 'train', start = 0, stop = 900, preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = True, ) validset = BlackBoxDataset( which_set = 'train', start = 900, stop = 1000 , preprocessor = processor, fit_preprocessor = True, fit_test_preprocessor = False, ) dropCfg = { 'input_include_probs': { 'h0' : .8 } , 'input_scales': { 'h0': 1.} } config = { 'learning_rate': .05, 'init_momentum': .00, 'cost' : Dropout(**dropCfg), 'monitoring_dataset': { 'train' : trainset, 'valid' : validset }, 'termination_criterion': MonitorBased(channel_name='valid_y_misclass',N=100,prop_decrease=0), 'update_callbacks': None } config0 = { 'layer_name': 'h0', 'num_units': 1875, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } config1 = { 'layer_name': 'h1', 'num_units': 700, 'num_pieces': 2, 'irange': .05, # Rather than using weight decay, we constrain the norms of the weight vectors 'max_col_norm': 2. } sftmaxCfg = { 'layer_name': 'y', 'init_bias_target_marginals': trainset, # Initialize the weights to all 0s 'irange': .0, 'n_classes': 9 } l1 = Maxout(**config0) l2 = Maxout(**config1) l3 = Softmax(**sftmaxCfg) train_algo = SGD(**config) model = MLP(batch_size=75,layers=[l1,l2,l3],nvis=1875) return Train(model = model, dataset = trainset, algorithm = train_algo, extensions = None, save_path = "maxout_best_model.pkl", save_freq = 1)