def setup(self, trainers): """ Add tracking to all trainers. Parameters ---------- trainers : list List of Train objects belonging to the parent TrainCV object. """ for k, trainer in enumerate(trainers): if self.save_path is not None and self.save_folds: path, ext = os.path.splitext(self.save_path) save_path = path + '-{}'.format(k) + ext else: save_path = None if self.tag_key is not None: tag_key = '{}-{}'.format(self.tag_key, k) else: tag_key = None extension = MonitorBasedSaveBest( self.channel_name, save_path=save_path, store_best_model=True, higher_is_better=self.higher_is_better, tag_key=tag_key) trainer.extensions.append(extension)
def get_extensions(self, ext_array, config_id): if ext_array is None: return [] extensions = [] for ext_id in ext_array: row = self.db.executeSQL( """ SELECT ext_class FROM hps3.extension WHERE ext_id = %s """, (ext_id, ), self.db.FETCH_ONE) if not row or row is None: raise HPSData("No extension for ext_id=" + str(ext_id)) ext_class = row[0] fn = getattr(self, 'get_ext_' + ext_class) extensions.append(fn(ext_id)) # monitor based save best if self.mbsb_channel_name is not None: save_path = self.save_prefix + str(config_id) + "_optimum.pkl" extensions.append( MonitorBasedSaveBest(channel_name=self.mbsb_channel_name, save_path=save_path)) # HPS Logger extensions.append(HPSLog(self.log_channel_names, self.db, config_id)) return extensions
def create_training_problem(self, save_best_path): ext1 = MonitorBasedSaveBest(channel_name=self.optimize_for, save_path=save_best_path) trainer = Train(dataset=self.alg_datasets['train'], model=self.model, algorithm=self.algorithm, extensions=[ext1]) self.trainer = trainer
def main(job_id, requested_params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params = additional_args params.update(requested_params) if params.get('rate', None) is not None: params['log_init_learning_rate'][0] = numpy.array([params['rate']]) train_params = { 'train_start': params['start'], 'train_stop': params['stop'], 'valid_start': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': 20, 'max_batches': 10, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'init_momentum': 0.5, 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), } with open('slp_fooddata.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) pretrained_model_path = params.get('model', None) if pretrained_model_path is not None: print 'loading pre trained model' pretrained_model = serial.load(pretrained_model_path) print 'loading done' train_obj.model.set_param_values(pretrained_model.get_param_values()) if 'converge' in params: train_obj.algorithm.termination_criterion._criteria[ 0]._max_epochs = 100 train_obj.extensions.append( MonitorBasedSaveBest('valid_y_misclass', params['save'])) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize( train_obj.model) train_obj.main_loop(do_setup=False) original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass)
def test_tagging(): """Test the tagging functionality of this extension.""" try: # TODO: serial.save should be able to take an open file-like object so # we can direct its output to a StringIO or something and not need to # screw around like this in tests that don't actually need to touch # the filesystem. /dev/null would work but the test would fail on # Windows. fd, fn = tempfile.mkstemp(suffix='.pkl') os.close(fd) # Test that the default key gets created. def_model = MockModel() def_model.monitor = MockMonitor() def_ext = MonitorBasedSaveBest(channel_name='foobar', save_path=fn) def_ext.setup(def_model, None, None) assert 'MonitorBasedSaveBest' in def_model.tag # Test with a custom key. model = MockModel() model.monitor = MockMonitor() model.monitor.channels['foobar'] = MockChannel() ext = MonitorBasedSaveBest(channel_name='foobar', tag_key='test123', save_path=fn) # Best cost is initially infinity. ext.setup(model, None, None) assert model.tag['test123']['best_cost'] == float("inf") # Best cost after one iteration. model.monitor.channels['foobar'].val_record.append(5.0) ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 5.0 # Best cost after a second, worse iteration. model.monitor.channels['foobar'].val_record.append(7.0) ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 5.0 # Best cost after a third iteration better than 2 but worse than 1. model.monitor.channels['foobar'].val_record.append(6.0) ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 5.0 # Best cost after a fourth, better iteration. model.monitor.channels['foobar'].val_record.append(3.0) ext.on_monitor(model, None, None) assert model.tag['test123']['best_cost'] == 3.0 finally: os.remove(fn)
def create_algorithm(self, data, save_best_path=None): self.set_dataset(data) self.create_adjustors() term = EpochCounter(max_epochs=self.max_epochs) if self.valid_stop: cost_crit = MonitorBased(channel_name='valid_objective', prop_decrease=.0, N=3) term = And(criteria=[cost_crit, term]) #(layers, A_weight_decay) coeffs = None if self.reg_factors: rf = self.reg_factors lhdims = len(self.tagger.hdims) l_inputlayer = len(self.tagger.layers[0].layers) coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf) cost = SeqTaggerCost(coeffs, self.dropout) self.cost = cost self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective', save_path=save_best_path) mon_dataset = dict(self.dataset) if not self.monitor_train: del mon_dataset['train'] _learning_rule = (self.momentum_rule if self.use_momentum else None) self.algorithm = SGD( batch_size=1, learning_rate=self.lr, termination_criterion=term, monitoring_dataset=mon_dataset, cost=cost, learning_rule=_learning_rule, ) self.algorithm.setup(self, self.dataset['train']) if self.plot_monitor: cn = ["valid_objective", "test_objective"] if self.monitor_train: cn.append("train_objective") plots = Plots(channel_names=cn, save_path=self.plot_monitor) self.pm = PlotManager([plots], freq=1) self.pm.setup(self, None, self.algorithm)
def get_extensions(self): if 'ext_array' not in self.state: return [] extensions = [] for ext_obj in self.state.ext_array.values(): fn = getattr(self, 'get_ext_' + ext_obj.ext_class) extensions.append(fn(ext_obj)) # monitor based save best print 'save best channel', self.mbsb_channel_name if self.mbsb_channel_name is not None: self.save_path = self.save_prefix + str(self.state.config_id) + "_optimum.pkl" extensions.append(MonitorBasedSaveBest( channel_name = self.mbsb_channel_name, save_path = self.save_path ) ) return extensions
def get_layer_trainer_sgd(model, trainset): drop_cost = Dropout(input_include_probs={'h0': .4}, input_scales={'h0': 1.}) # configs on sgd train_algo = SGD(train_iteration_mode='batchwise_shuffled_equential', learning_rate=0.2, cost=drop_cost, monitoring_dataset=trainset, termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS), update_callbacks=None) extensions = [ MonitorBasedSaveBest(channel_name="y_kl", save_path="./convnet_test_best.pkl") ] return Train(model=model, algorithm=train_algo, extensions=extensions, dataset=trainset)
def get_trainer(model, trainset, validset, save_path): monitoring = dict(valid=validset, train=trainset) termination = MonitorBased(channel_name='valid_y_misclass', prop_decrease=.001, N=100) extensions = [MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_path), #MomentumAdjustor(start=1, saturate=100, final_momentum=.9), LinearDecayOverEpoch(start=1, saturate=200, decay_factor=0.01)] config = { 'learning_rate': .01, #'learning_rule': Momentum(0.5), 'learning_rule': RMSProp(), 'train_iteration_mode': 'shuffled_sequential', 'batch_size': 1200,#250, #'batches_per_iter' : 100, 'monitoring_dataset': monitoring, 'monitor_iteration_mode' : 'shuffled_sequential', 'termination_criterion' : termination, } return Train(model=model, algorithm=SGD(**config), dataset=trainset, extensions=extensions)
def main(): #creating layers #2 convolutional rectified layers, border mode valid batch_size = 48 lr = 1.0 #0.1/4 finMomentum = 0.9 maxout_units = 2000 num_pcs = 4 lay1_reg = lay2_reg = maxout_reg = None #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib' #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib' #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib' #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib' save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib' best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib' #numBatches = 400000/batch_size ''' print 'Applying preprocessing' ddmTrain = EmotiwKeypoints(start=0, stop =40000) ddmValid = EmotiwKeypoints(start=40000, stop = 44000) ddmTest = EmotiwKeypoints(start=44000) stndrdz = preprocessing.Standardize() stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train') stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val') stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test') GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000) GCN.apply(ddmTrain, can_fit =True, name = 'train') GCN.apply(ddmValid, can_fit =False, name = 'val') GCN.apply(ddmTest, can_fit = False, name = 'test') return ''' ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train') ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid') #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train') layer1 = ConvRectifiedLinear(layer_name = 'convRect1', output_channels = 64, irange = .05, kernel_shape = [5, 5], pool_shape = [4, 4], pool_stride = [2, 2], W_lr_scale = 0.1, max_kernel_norm = lay1_reg) layer2 = ConvRectifiedLinear(layer_name = 'convRect2', output_channels = 128, irange = .05, kernel_shape = [5, 5], pool_shape = [3, 3], pool_stride = [2, 2], W_lr_scale = 0.1, max_kernel_norm = lay2_reg) # Rectified linear units #layer3 = RectifiedLinear(dim = 3000, # sparse_init = 15, # layer_name = 'RectLin3') #Maxout layer maxout = Maxout(layer_name= 'maxout', irange= .005, num_units= maxout_units, num_pieces= num_pcs, W_lr_scale = 0.1, max_col_norm= maxout_reg) #multisoftmax n_groups = 196 n_classes = 96 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name) #setting up MLP MLPerc = MLP(batch_size = batch_size, input_space = Conv2DSpace(shape = [96, 96], num_channels = 3, axes=('b', 0, 1, 'c')), layers = [ layer1, layer2, maxout, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value ) mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. }) #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 }, # input_scales= { 'convRect1': 1. }) #algorithm monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain} term_crit = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective') kp_ada = KeypointADADELTA(decay_factor = 0.95, #init_momentum = 0.5, monitoring_dataset = monitoring_dataset, batch_size = batch_size, termination_criterion = term_crit, cost = mlp_cost) #train extension #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001) #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01) #train_ext = ADADELTA(0.95) #train object train = Train(dataset = ddmTrain, save_path= save_path, save_freq=10, model = MLPerc, algorithm= kp_ada, extensions = [#train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path= best_path)#, # MomentumAdjustor(start = 1,# # saturate = 25, # final_momentum = finMomentum) ] ) train.main_loop() train.save()
def set_extensions(self, extensions=None): self.extensions = [MonitorBasedSaveBest(channel_name='test_objective', save_path = './training/training_monitor_best.pkl')]
algorithm = SGD(learning_rate=0.01, cost=MethodCost("cost_from_X"), batch_size=batchSize, monitoring_batch_size=batchSize, monitoring_dataset={ 'train': train, 'valid': valid }, monitor_iteration_mode="even_batchwise_shuffled_sequential", termination_criterion=EpochCounter(max_epochs=200), learning_rule=Momentum(init_momentum=0.99), train_iteration_mode="even_batchwise_shuffled_sequential") train = Train(dataset=train, model=model, algorithm=algorithm, save_path="ConvNet4.pkl", save_freq=1, extensions=[ MonitorBasedSaveBest(channel_name="valid_y_misclass", save_path="ConvNet4_best.pkl"), MomentumAdjustor(final_momentum=0, start=0, saturate=100) ]) print("Starting training session") train.main_loop() print("Done!")
def main(): #creating layers #2 convolutional rectified layers, border mode valid batch_size = params.batch_size lr = params.lr finMomentum = params.momentum maxout_units = params.units num_pcs = params.pieces lay1_reg = lay2_reg = maxout_reg = params.norm_reg #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib' #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib' save_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[ 1] + '.joblib' best_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[ 1] + 'best.joblib' numBatches = 400000 / batch_size from emotiw.common.datasets.faces.EmotiwKeypoints import EmotiwKeypoints ''' print 'Applying preprocessing' ddmTrain = EmotiwKeypoints(start=0, stop =40000) ddmValid = EmotiwKeypoints(start=40000, stop = 44000) ddmTest = EmotiwKeypoints(start=44000) stndrdz = preprocessing.Standardize() stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train') stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val') stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test') GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000) GCN.apply(ddmTrain, can_fit =True, name = 'train') GCN.apply(ddmValid, can_fit =False, name = 'val') GCN.apply(ddmTest, can_fit = False, name = 'test') return ''' ddmTrain = EmotiwKeypoints(hack='train', preproc='STD') ddmValid = EmotiwKeypoints(hack='val', preproc='STD') layer1 = ConvRectifiedLinear(layer_name='convRect1', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[4, 4], pool_stride=[2, 2], W_lr_scale=0.1, max_kernel_norm=lay1_reg) layer2 = ConvRectifiedLinear(layer_name='convRect2', output_channels=128, irange=.05, kernel_shape=[5, 5], pool_shape=[3, 3], pool_stride=[2, 2], W_lr_scale=0.1, max_kernel_norm=lay2_reg) # Rectified linear units #layer3 = RectifiedLinear(dim = 3000, # sparse_init = 15, # layer_name = 'RectLin3') #Maxout layer maxout = Maxout(layer_name='maxout', irange=.005, num_units=maxout_units, num_pieces=num_pcs, W_lr_scale=0.1, max_col_norm=maxout_reg) #multisoftmax n_groups = 196 n_classes = 96 irange = 0 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups, irange=0.05, n_classes=n_classes, layer_name=layer_name) #setting up MLP MLPerc = MLP(batch_size=batch_size, input_space=Conv2DSpace(shape=[96, 96], num_channels=3), layers=[layer1, layer2, maxout, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value) mlp_cost.setup_dropout(input_include_probs={'convRect1': 1.0}, input_scales={'convRect1': 1.}) #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 }, # input_scales= { 'convRect1': 1. }) #algorithm monitoring_dataset = {'validation': ddmValid} term_crit = MonitorBased(prop_decrease=1e-7, N=100, channel_name='validation_objective') kpSGD = KeypointSGD(learning_rate=lr, init_momentum=0.5, monitoring_dataset=monitoring_dataset, batch_size=batch_size, termination_criterion=term_crit, cost=mlp_cost) #train extension #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001) train_ext = LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01) #train object train = Train(dataset=ddmTrain, save_path=save_path, save_freq=10, model=MLPerc, algorithm=kpSGD, extensions=[ train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path=best_path), MomentumAdjustor(start=1, saturate=25, final_momentum=finMomentum) ]) train.main_loop() train.save()
def main(job_id, params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params.update(additional_args) fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0]) if 'cached_trainer' + str(fixed_params) not in cache: train_params = { 'train_stop': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': 1, 'max_batches': 10, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'save_file': 'result', 'kernel_size_h2': int(params['kernel_size_h2'][0]), 'output_channels_h2': 1 * k, 'irange_h2': math.pow(10, params['l_ir_h2'][0]), 'max_kernel_norm_h2': params['max_norm_h2'][0], 'kernel_size_h3': int(params['kernel_size_h3'][0]), 'output_channels_h3': int(1.7 * k), 'irange_h3': math.pow(10, params['l_ir_h3'][0]), 'max_kernel_norm_h3': params['max_norm_h3'][0], 'kernel_size_h4': int(params['kernel_size_h4'][0]), 'output_channels_h4': int(2.5 * k), 'irange_h4': math.pow(10, params['l_ir_h4'][0]), 'max_kernel_norm_h4': params['max_norm_h4'][0], 'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]), 'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]), 'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]), 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), 'init_momentum': params['init_momentum'][0], 'rectifier_left_slope': 0.2 } with open('conv_fooddata_spearmint.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) if 'converge' in params: del train_obj.algorithm.termination_criterion._criteria[:] train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', 'best_model.pkl')) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' cache['cached_trainer' + str(fixed_params)] = train_obj else: train_obj = cache['cached_trainer' + str(fixed_params)] train_obj.model.monitor.set_state([0, 0, 0]) train_obj.model.training_succeeded = False # train_obj.algorithm.update_callbacks[0].reinit_from_monitor() model = train_obj.model model_params = dict([(param.name, param) for param in model.get_params()]) rng = model.rng update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng) update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng) update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng) update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng) train_obj.algorithm.learning_rate.set_value( math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32))) train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32)) pass if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model) train_obj.main_loop(do_setup=False) original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass) * 50
model = MLP(layers = [h0, h1, y], batch_size = batchSize, input_space = inputSpace) algorithm = SGD(learning_rate = 1e-3, cost = MethodCost("cost_from_X"), batch_size = batchSize, monitoring_batch_size = batchSize, monitoring_dataset = {'train': train, 'valid':valid}, monitor_iteration_mode = "even_batchwise_shuffled_sequential", termination_criterion = EpochCounter(max_epochs = 100), learning_rule = Momentum(init_momentum = 0.0), train_iteration_mode = "even_batchwise_shuffled_sequential") train = Train(dataset = train, model = model, algorithm = algorithm, save_path = "2_layer_conv.pkl", save_freq = 1, extensions = [ MonitorBasedSaveBest(channel_name = "valid_y_misclass", save_path = "2_layer_conv_best.pkl") ]) print("Starting training session") train.main_loop() print("Done!")
pdb.set_trace() algorithm = SGD( batch_size=batch_size, learning_rate=learning_rate, init_momentum=.5, monitoring_dataset={'valid': val_ds}, cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}), termination_criterion=MonitorBased(channel_name="valid_y_misclass", prop_decrease=0., N=100), #termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter {max_epochs: 1}, update_callbacks=ExponentialDecay(decay_factor=1.00004, min_lr=.000001)) extensions = [ MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_best_path), MomentumAdjustor(start=1, saturate=250, final_momentum=.7) ] model = MLP(batch_size=batch_size, input_space=Conv2DSpace(shape=[48, 48], num_channels=num_chan, axes=['c', 0, 1, 'b']), layers=[ MaxoutConvC01B(layer_name='h0', pad=0, num_channels=64, num_pieces=2, kernel_shape=[8, 8], pool_shape=[4, 4], pool_stride=[2, 2],
def test_works(): load = True if load == False: ddmTrain = FacialKeypoint(which_set='train', start=0, stop=6000) ddmValid = FacialKeypoint(which_set='train', start=6000, stop=7049) # valid can_fit = false pipeline = preprocessing.Pipeline() stndrdz = preprocessing.Standardize() stndrdz.apply(ddmTrain, can_fit=True) #doubt, how about can_fit = False? stndrdz.apply(ddmValid, can_fit=False) GCN = preprocessing.GlobalContrastNormalization() GCN.apply(ddmTrain, can_fit=True) GCN.apply(ddmValid, can_fit=False) pcklFile = open('kpd.pkl', 'wb') obj = (ddmTrain, ddmValid) pickle.dump(obj, pcklFile) pcklFile.close() return else: pcklFile = open('kpd.pkl', 'rb') (ddmTrain, ddmValid) = pickle.load(pcklFile) pcklFile.close() #creating layers #2 convolutional rectified layers, border mode valid layer1 = ConvRectifiedLinear(layer_name='convRect1', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[3, 3], pool_stride=[2, 2], max_kernel_norm=1.9365) layer2 = ConvRectifiedLinear(layer_name='convRect2', output_channels=64, irange=.05, kernel_shape=[5, 5], pool_shape=[3, 3], pool_stride=[2, 2], max_kernel_norm=1.9365) # Rectified linear units layer3 = RectifiedLinear(dim=3000, sparse_init=15, layer_name='RectLin3') #multisoftmax n_groups = 30 n_classes = 98 irange = 0 layer_name = 'multisoftmax' layerMS = MultiSoftmax(n_groups=n_groups, irange=0.05, n_classes=n_classes, layer_name=layer_name) #setting up MLP MLPerc = MLP(batch_size=8, input_space=Conv2DSpace(shape=[96, 96], num_channels=1), layers=[layer1, layer2, layer3, layerMS]) #mlp_cost missing_target_value = -1 mlp_cost = MLPCost(cost_type='default', missing_target_value=missing_target_value) #algorithm # learning rate, momentum, batch size, monitoring dataset, cost, termination criteria term_crit = MonitorBased(prop_decrease=0.00001, N=30, channel_name='validation_objective') kpSGD = KeypointSGD(learning_rate=0.001, init_momentum=0.5, monitoring_dataset={ 'validation': ddmValid, 'training': ddmTrain }, batch_size=8, batches_per_iter=750, termination_criterion=term_crit, train_iteration_mode='random_uniform', cost=mlp_cost) #train extension train_ext = ExponentialDecayOverEpoch(decay_factor=0.998, min_lr_scale=0.01) #train object train = Train(dataset=ddmTrain, save_path='kpd_model2.pkl', save_freq=1, model=MLPerc, algorithm=kpSGD, extensions=[ train_ext, MonitorBasedSaveBest(channel_name='validation_objective', save_path='kpd_best.pkl'), MomentumAdjustor(start=1, saturate=20, final_momentum=.9) ]) train.main_loop() train.save()
def main(job_id, requested_params, cache): # Fix sub directory problems sys.path.append(os.path.dirname(os.getcwd())) os.chdir(os.path.dirname(os.path.realpath(__file__))) # Add parameters that are not currently being tuned but could potentially be tuned. params = additional_args params.update(requested_params) output_channels_h2 = int(1.00 * 50) output_channels_h3 = int(3.42 * 50) output_channels_h4 = int(11.67 * 50) dropout_h2 = float(params['dropout_h2'][0]) / 10 dropout_h3 = float(params['dropout_h3'][0]) / 10 dropout_h4 = float(params['dropout_h4'][0]) / 10 dropout_y = float(params['dropout_y'][0]) / 10 if params.get('rate', None) is not None: params['log_init_learning_rate'][0] += numpy.array([params['rate']]) fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0], params['dropout_h2'][0], params['dropout_h3'][0], params['dropout_h4'][0], params['dropout_y'][0]) if 'cached_trainer' + str(fixed_params) not in cache: train_params = { 'train_start': params['start'], 'train_stop': params['stop'], 'valid_start': 20000, 'valid_stop': 24000, 'test_stop': 4000, 'batch_size': 100, 'max_epochs': params.get('epochs', 1), 'max_batches': 50, 'sgd_seed': sgd_seed_str, 'mlp_seed': mlp_seed_str, 'kernel_size_h2': int(params['kernel_size_h2'][0]), 'output_channels_h2': output_channels_h2, 'irange_h2': math.pow(10, params['l_ir_h2'][0]), 'max_kernel_norm_h2': params['max_norm_h2'][0], 'dropout_h2': dropout_h2, 'dscale_h2': params['dfac_h2'][0] * 1.0 / dropout_h2, 'w_lr_sc_h2': math.pow(dropout_h2, 2), 'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]), 'kernel_size_h3': int(params['kernel_size_h3'][0]), 'output_channels_h3': output_channels_h3, 'irange_h3': math.pow(10, params['l_ir_h3'][0]), 'max_kernel_norm_h3': params['max_norm_h3'][0], 'dropout_h3': dropout_h3, 'dscale_h3': params['dfac_h3'][0] * 1.0 / dropout_h3, 'w_lr_sc_h3': math.pow(dropout_h3, 2), 'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]), 'kernel_size_h4': int(params['kernel_size_h4'][0]), 'output_channels_h4': output_channels_h4, 'irange_h4': math.pow(10, params['l_ir_h4'][0]), 'max_kernel_norm_h4': params['max_norm_h4'][0], 'dropout_h4': dropout_h4, 'dscale_h4': params['dfac_h4'][0] * 1.0 / dropout_h4, 'w_lr_sc_h4': math.pow(dropout_h4, 2), 'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]), 'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]), 'max_col_norm_y': params['max_norm_y'][0], 'irange_y': math.pow(10, params['l_ir_y'][0]), 'dropout_y': dropout_y, 'dscale_y': 1.0 / dropout_y, 'w_lr_sc_y': math.pow(dropout_y, 2), 'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]), 'init_momentum': params['init_momentum'][0], 'rectifier_left_slope': 0.2 } with open('conv_fooddata_spearmint.yaml', 'r') as f: trainer = f.read() yaml_string = trainer % train_params train_obj = yaml_parse.load(yaml_string) if 'converge' in params: train_obj.algorithm.termination_criterion._criteria[0]._max_epochs = params.get('epochs', 100) train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', params['save'])) train_obj.setup() train_obj.model.monitor.on_channel_conflict = 'ignore' # cache['cached_trainer' + str(fixed_params)] = train_obj else: train_obj = cache['cached_trainer' + str(fixed_params)] train_obj.model.monitor.set_state([0, 0, 0]) train_obj.model.training_succeeded = False # train_obj.algorithm.update_callbacks[0].reinit_from_monitor() model = train_obj.model model_params = dict([(param.name, param) for param in model.get_params()]) rng = model.rng update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng) update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng) update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng) update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng) train_obj.algorithm.learning_rate.set_value( math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32))) train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32)) pass pretrained_model_path = params.get('model', None) if pretrained_model_path is not None: print 'loading pre trained model' pretrained_model = serial.load(pretrained_model_path) print 'loading done' train_obj.model.set_param_values(pretrained_model.get_param_values()) if 'converge' not in params: train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model) train_obj.main_loop(do_setup=False) if params.get('savelast', False): serial.save(params['save'] + 'f', train_obj.model, on_overwrite='backup') original_misclass = read_channel(train_obj.model, misclass_channel) return float(original_misclass) * 50
train = Train(dataset, mlp, SGD(0.1, batch_size=128, monitoring_dataset={ 'train': dataset, 'valid': dataset_valid, 'test': dataset_test }, termination_criterion=EpochCounter(100), train_iteration_mode='even_shuffled_sequential', monitor_iteration_mode='even_sequential'), save_path="pkl/multicolumn.pkl", save_freq=5, extensions=[ MonitorBasedSaveBest(channel_name='test_y_misclass', save_path="pkl/multicolumn_best.pkl") ]) # # Load the saved model # model = serial.load(saved_model_path) # # # Remove last layer # del model.layers[-1] # # # Add new layer # new_output_layer = <make your new layer here> # model.add_layers([new_output_layer]) # mlp.layers.extend(pretrained_layers[start_layer:]) # , cost=Dropout(input_include_probs={'composite':1.})))