예제 #1
0
    def setup(self, trainers):
        """
        Add tracking to all trainers.

        Parameters
        ----------
        trainers : list
            List of Train objects belonging to the parent TrainCV object.
        """
        for k, trainer in enumerate(trainers):
            if self.save_path is not None and self.save_folds:
                path, ext = os.path.splitext(self.save_path)
                save_path = path + '-{}'.format(k) + ext
            else:
                save_path = None
            if self.tag_key is not None:
                tag_key = '{}-{}'.format(self.tag_key, k)
            else:
                tag_key = None
            extension = MonitorBasedSaveBest(
                self.channel_name,
                save_path=save_path,
                store_best_model=True,
                higher_is_better=self.higher_is_better,
                tag_key=tag_key)
            trainer.extensions.append(extension)
예제 #2
0
    def get_extensions(self, ext_array, config_id):
        if ext_array is None:
            return []
        extensions = []
        for ext_id in ext_array:
            row = self.db.executeSQL(
                """
            SELECT ext_class
            FROM hps3.extension
            WHERE ext_id = %s
            """, (ext_id, ), self.db.FETCH_ONE)
            if not row or row is None:
                raise HPSData("No extension for ext_id=" + str(ext_id))
            ext_class = row[0]
            fn = getattr(self, 'get_ext_' + ext_class)
            extensions.append(fn(ext_id))
        # monitor based save best
        if self.mbsb_channel_name is not None:
            save_path = self.save_prefix + str(config_id) + "_optimum.pkl"
            extensions.append(
                MonitorBasedSaveBest(channel_name=self.mbsb_channel_name,
                                     save_path=save_path))

        # HPS Logger
        extensions.append(HPSLog(self.log_channel_names, self.db, config_id))
        return extensions
예제 #3
0
 def create_training_problem(self, save_best_path):
     ext1 = MonitorBasedSaveBest(channel_name=self.optimize_for,
                                 save_path=save_best_path)
     trainer = Train(dataset=self.alg_datasets['train'],
                     model=self.model,
                     algorithm=self.algorithm,
                     extensions=[ext1])
     self.trainer = trainer
def main(job_id, requested_params, cache):
    # Fix sub directory problems
    sys.path.append(os.path.dirname(os.getcwd()))
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    # Add parameters that are not currently being tuned but could potentially be tuned.
    params = additional_args
    params.update(requested_params)

    if params.get('rate', None) is not None:
        params['log_init_learning_rate'][0] = numpy.array([params['rate']])

    train_params = {
        'train_start': params['start'],
        'train_stop': params['stop'],
        'valid_start': 20000,
        'valid_stop': 24000,
        'test_stop': 4000,
        'batch_size': 100,
        'max_epochs': 20,
        'max_batches': 10,
        'sgd_seed': sgd_seed_str,
        'mlp_seed': mlp_seed_str,
        'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]),
        'max_col_norm_y': params['max_norm_y'][0],
        'irange_y': math.pow(10, params['l_ir_y'][0]),
        'init_momentum': 0.5,
        'init_learning_rate': math.pow(10,
                                       params['log_init_learning_rate'][0]),
    }

    with open('slp_fooddata.yaml', 'r') as f:
        trainer = f.read()

    yaml_string = trainer % train_params
    train_obj = yaml_parse.load(yaml_string)

    pretrained_model_path = params.get('model', None)
    if pretrained_model_path is not None:
        print 'loading pre trained model'
        pretrained_model = serial.load(pretrained_model_path)
        print 'loading done'
        train_obj.model.set_param_values(pretrained_model.get_param_values())

    if 'converge' in params:
        train_obj.algorithm.termination_criterion._criteria[
            0]._max_epochs = 100
        train_obj.extensions.append(
            MonitorBasedSaveBest('valid_y_misclass', params['save']))

    train_obj.setup()
    train_obj.model.monitor.on_channel_conflict = 'ignore'
    if 'converge' not in params:
        train_obj.algorithm.termination_criterion._criteria[0].initialize(
            train_obj.model)
    train_obj.main_loop(do_setup=False)
    original_misclass = read_channel(train_obj.model, misclass_channel)
    return float(original_misclass)
def test_tagging():
    """Test the tagging functionality of this extension."""
    try:
        # TODO: serial.save should be able to take an open file-like object so
        # we can direct its output to a StringIO or something and not need to
        # screw around like this in tests that don't actually need to touch
        # the filesystem. /dev/null would work but the test would fail on
        # Windows.
        fd, fn = tempfile.mkstemp(suffix='.pkl')
        os.close(fd)

        # Test that the default key gets created.
        def_model = MockModel()
        def_model.monitor = MockMonitor()
        def_ext = MonitorBasedSaveBest(channel_name='foobar', save_path=fn)
        def_ext.setup(def_model, None, None)
        assert 'MonitorBasedSaveBest' in def_model.tag

        # Test with a custom key.
        model = MockModel()
        model.monitor = MockMonitor()
        model.monitor.channels['foobar'] = MockChannel()
        ext = MonitorBasedSaveBest(channel_name='foobar', tag_key='test123',
                                   save_path=fn)
        # Best cost is initially infinity.
        ext.setup(model, None, None)
        assert model.tag['test123']['best_cost'] == float("inf")
        # Best cost after one iteration.
        model.monitor.channels['foobar'].val_record.append(5.0)
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 5.0
        # Best cost after a second, worse iteration.
        model.monitor.channels['foobar'].val_record.append(7.0)
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 5.0
        # Best cost after a third iteration better than 2 but worse than 1.
        model.monitor.channels['foobar'].val_record.append(6.0)
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 5.0
        # Best cost after a fourth, better iteration.
        model.monitor.channels['foobar'].val_record.append(3.0)
        ext.on_monitor(model, None, None)
        assert model.tag['test123']['best_cost'] == 3.0
    finally:
        os.remove(fn)
예제 #6
0
    def create_algorithm(self, data, save_best_path=None):
        self.set_dataset(data)
        self.create_adjustors()
        term = EpochCounter(max_epochs=self.max_epochs)
        if self.valid_stop:
            cost_crit = MonitorBased(channel_name='valid_objective',
                                     prop_decrease=.0,
                                     N=3)
            term = And(criteria=[cost_crit, term])

        #(layers, A_weight_decay)
        coeffs = None
        if self.reg_factors:
            rf = self.reg_factors
            lhdims = len(self.tagger.hdims)
            l_inputlayer = len(self.tagger.layers[0].layers)
            coeffs = ([[rf] * l_inputlayer] + ([rf] * lhdims) + [rf], rf)
        cost = SeqTaggerCost(coeffs, self.dropout)
        self.cost = cost

        self.mbsb = MonitorBasedSaveBest(channel_name='valid_objective',
                                         save_path=save_best_path)
        mon_dataset = dict(self.dataset)
        if not self.monitor_train:
            del mon_dataset['train']

        _learning_rule = (self.momentum_rule if self.use_momentum else None)
        self.algorithm = SGD(
            batch_size=1,
            learning_rate=self.lr,
            termination_criterion=term,
            monitoring_dataset=mon_dataset,
            cost=cost,
            learning_rule=_learning_rule,
        )

        self.algorithm.setup(self, self.dataset['train'])
        if self.plot_monitor:
            cn = ["valid_objective", "test_objective"]
            if self.monitor_train:
                cn.append("train_objective")
            plots = Plots(channel_names=cn, save_path=self.plot_monitor)
            self.pm = PlotManager([plots], freq=1)
            self.pm.setup(self, None, self.algorithm)
예제 #7
0
    def get_extensions(self):
        if 'ext_array' not in self.state:
            return []
        extensions = []

        for ext_obj in self.state.ext_array.values():
            fn = getattr(self, 'get_ext_' + ext_obj.ext_class)
            extensions.append(fn(ext_obj))

        # monitor based save best
        print 'save best channel', self.mbsb_channel_name
        if self.mbsb_channel_name is not None:
            self.save_path = self.save_prefix + str(self.state.config_id) + "_optimum.pkl"
            extensions.append(MonitorBasedSaveBest(
                    channel_name = self.mbsb_channel_name,
                    save_path = self.save_path
                )
            )

        return extensions
예제 #8
0
def get_layer_trainer_sgd(model, trainset):
    drop_cost = Dropout(input_include_probs={'h0': .4},
                        input_scales={'h0': 1.})

    # configs on sgd
    train_algo = SGD(train_iteration_mode='batchwise_shuffled_equential',
                     learning_rate=0.2,
                     cost=drop_cost,
                     monitoring_dataset=trainset,
                     termination_criterion=EpochCounter(max_epochs=MAX_EPOCHS),
                     update_callbacks=None)

    extensions = [
        MonitorBasedSaveBest(channel_name="y_kl",
                             save_path="./convnet_test_best.pkl")
    ]

    return Train(model=model,
                 algorithm=train_algo,
                 extensions=extensions,
                 dataset=trainset)
예제 #9
0
def get_trainer(model, trainset, validset, save_path):
  
  monitoring  = dict(valid=validset, train=trainset)
  termination = MonitorBased(channel_name='valid_y_misclass', prop_decrease=.001, N=100)
  extensions  = [MonitorBasedSaveBest(channel_name='valid_y_misclass', save_path=save_path),
                #MomentumAdjustor(start=1, saturate=100, final_momentum=.9),
                LinearDecayOverEpoch(start=1, saturate=200, decay_factor=0.01)]

  config = {
  'learning_rate': .01,
  #'learning_rule': Momentum(0.5),
  'learning_rule': RMSProp(),
  'train_iteration_mode': 'shuffled_sequential',
  'batch_size': 1200,#250,
  #'batches_per_iter' : 100,
  'monitoring_dataset': monitoring,
  'monitor_iteration_mode' : 'shuffled_sequential',
  'termination_criterion' : termination,
  }

  return Train(model=model, 
      algorithm=SGD(**config),
      dataset=trainset,
      extensions=extensions)
def main():

    #creating layers
        #2 convolutional rectified layers, border mode valid
    batch_size = 48
    lr = 1.0 #0.1/4
    finMomentum = 0.9
    maxout_units = 2000
    num_pcs = 4
    lay1_reg = lay2_reg = maxout_reg = None
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    #save_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'.joblib'
    #best_path = './models/'+params.host+'_'+params.device+'_'+sys.argv[1]+'best.joblib'
    save_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb.joblib'
    best_path = '/Tmp/zumerjer/bart10_sumcost_adadelta_drop_perturb_best.joblib'

    #numBatches = 400000/batch_size

    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)

    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='train')
    ddmValid = ComboDatasetPyTable('/Tmp/zumerjer/perturbed_', which_set='valid')
    #ddmSmallTrain = ComboDatasetPyTable('/Tmp/zumerjer/all_', which_set='small_train')

    layer1 = ConvRectifiedLinear(layer_name = 'convRect1',
                     output_channels = 64,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [4, 4],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name = 'convRect2',
                     output_channels = 128,
                     irange = .05,
                     kernel_shape = [5, 5],
                     pool_shape = [3, 3],
                     pool_stride = [2, 2],
                     W_lr_scale = 0.1,
                     max_kernel_norm = lay2_reg)

        # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name= 'maxout',
                    irange= .005,
                    num_units= maxout_units,
                    num_pieces= num_pcs,
                    W_lr_scale = 0.1,
                    max_col_norm= maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,irange = 0.05, n_classes=n_classes, layer_name= layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size = batch_size,
                 input_space = Conv2DSpace(shape = [96, 96],
                 num_channels = 3, axes=('b', 0, 1, 'c')),
                 layers = [ layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                            missing_target_value=missing_target_value )
    mlp_cost.setup_dropout(input_include_probs= { 'convRect1' : 1.0 }, input_scales= { 'convRect1': 1. })

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation':ddmValid}#, 'mini-train':ddmSmallTrain}

    term_crit  = MonitorBased(prop_decrease = 1e-7, N = 100, channel_name = 'validation_objective')

    kp_ada = KeypointADADELTA(decay_factor = 0.95, 
            #init_momentum = 0.5, 
                        monitoring_dataset = monitoring_dataset, batch_size = batch_size,
                        termination_criterion = term_crit,
                        cost = mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    #train_ext = LinearDecayOverEpoch(start= 1,saturate= 250,decay_factor= .01)
    #train_ext = ADADELTA(0.95)

    #train object
    train = Train(dataset = ddmTrain,
                  save_path= save_path,
                  save_freq=10,
                  model = MLPerc,
                  algorithm= kp_ada,
                  extensions = [#train_ext, 
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                                     save_path= best_path)#,

#                                MomentumAdjustor(start = 1,#
 #                                                saturate = 25,
  #                                               final_momentum = finMomentum)
  ] )
    train.main_loop()
    train.save()
예제 #11
0
	def set_extensions(self, extensions=None):
		self.extensions = [MonitorBasedSaveBest(channel_name='test_objective',
												save_path = './training/training_monitor_best.pkl')]
예제 #12
0
algorithm = SGD(learning_rate=0.01,
                cost=MethodCost("cost_from_X"),
                batch_size=batchSize,
                monitoring_batch_size=batchSize,
                monitoring_dataset={
                    'train': train,
                    'valid': valid
                },
                monitor_iteration_mode="even_batchwise_shuffled_sequential",
                termination_criterion=EpochCounter(max_epochs=200),
                learning_rule=Momentum(init_momentum=0.99),
                train_iteration_mode="even_batchwise_shuffled_sequential")

train = Train(dataset=train,
              model=model,
              algorithm=algorithm,
              save_path="ConvNet4.pkl",
              save_freq=1,
              extensions=[
                  MonitorBasedSaveBest(channel_name="valid_y_misclass",
                                       save_path="ConvNet4_best.pkl"),
                  MomentumAdjustor(final_momentum=0, start=0, saturate=100)
              ])

print("Starting training session")

train.main_loop()

print("Done!")
예제 #13
0
def main():

    #creating layers
    #2 convolutional rectified layers, border mode valid
    batch_size = params.batch_size
    lr = params.lr
    finMomentum = params.momentum
    maxout_units = params.units
    num_pcs = params.pieces
    lay1_reg = lay2_reg = maxout_reg = params.norm_reg
    #save_path = './models/no_maxout/titan_lr_0.1_btch_64_momFinal_0.9_maxout_2000_4.joblib'
    #best_path = '/models/no_maxout/titan_bart10_gpu2_best.joblib'
    save_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + '.joblib'
    best_path = './models/' + params.host + '_' + params.device + '_' + sys.argv[
        1] + 'best.joblib'
    numBatches = 400000 / batch_size

    from emotiw.common.datasets.faces.EmotiwKeypoints import EmotiwKeypoints
    '''
    print 'Applying preprocessing'
    ddmTrain = EmotiwKeypoints(start=0, stop =40000)
    ddmValid = EmotiwKeypoints(start=40000, stop = 44000)
    ddmTest = EmotiwKeypoints(start=44000)
    
    stndrdz = preprocessing.Standardize()
    stndrdz.applyLazily(ddmTrain, can_fit=True, name = 'train')
    stndrdz.applyLazily(ddmValid, can_fit=False, name = 'val')
    stndrdz.applyLazily(ddmTest, can_fit=False, name = 'test')

    GCN = preprocessing.GlobalContrastNormalization(batch_size = 1000)
    GCN.apply(ddmTrain, can_fit =True, name = 'train')
    GCN.apply(ddmValid, can_fit =False, name = 'val')
    GCN.apply(ddmTest, can_fit = False, name = 'test')
    return
    '''

    ddmTrain = EmotiwKeypoints(hack='train', preproc='STD')
    ddmValid = EmotiwKeypoints(hack='val', preproc='STD')

    layer1 = ConvRectifiedLinear(layer_name='convRect1',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay1_reg)
    layer2 = ConvRectifiedLinear(layer_name='convRect2',
                                 output_channels=128,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 W_lr_scale=0.1,
                                 max_kernel_norm=lay2_reg)

    # Rectified linear units
    #layer3 = RectifiedLinear(dim = 3000,
    #                         sparse_init = 15,
    #                 layer_name = 'RectLin3')

    #Maxout layer
    maxout = Maxout(layer_name='maxout',
                    irange=.005,
                    num_units=maxout_units,
                    num_pieces=num_pcs,
                    W_lr_scale=0.1,
                    max_col_norm=maxout_reg)

    #multisoftmax
    n_groups = 196
    n_classes = 96
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,
                           irange=0.05,
                           n_classes=n_classes,
                           layer_name=layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size=batch_size,
                 input_space=Conv2DSpace(shape=[96, 96], num_channels=3),
                 layers=[layer1, layer2, maxout, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                       missing_target_value=missing_target_value)
    mlp_cost.setup_dropout(input_include_probs={'convRect1': 1.0},
                           input_scales={'convRect1': 1.})

    #dropout_cost = Dropout(input_include_probs= { 'convRect1' : .8 },
    #                      input_scales= { 'convRect1': 1. })

    #algorithm
    monitoring_dataset = {'validation': ddmValid}

    term_crit = MonitorBased(prop_decrease=1e-7,
                             N=100,
                             channel_name='validation_objective')

    kpSGD = KeypointSGD(learning_rate=lr,
                        init_momentum=0.5,
                        monitoring_dataset=monitoring_dataset,
                        batch_size=batch_size,
                        termination_criterion=term_crit,
                        cost=mlp_cost)

    #train extension
    #train_ext = ExponentialDecayOverEpoch(decay_factor = 0.998, min_lr_scale = 0.001)
    train_ext = LinearDecayOverEpoch(start=1, saturate=250, decay_factor=.01)

    #train object
    train = Train(dataset=ddmTrain,
                  save_path=save_path,
                  save_freq=10,
                  model=MLPerc,
                  algorithm=kpSGD,
                  extensions=[
                      train_ext,
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                           save_path=best_path),
                      MomentumAdjustor(start=1,
                                       saturate=25,
                                       final_momentum=finMomentum)
                  ])
    train.main_loop()
    train.save()
예제 #14
0
def main(job_id, params, cache):
    # Fix sub directory problems
    sys.path.append(os.path.dirname(os.getcwd()))
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    # Add parameters that are not currently being tuned but could potentially be tuned.
    params.update(additional_args)

    fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0])
    if 'cached_trainer' + str(fixed_params) not in cache:
        train_params = {
            'train_stop': 20000,
            'valid_stop': 24000,
            'test_stop': 4000,
            'batch_size': 100,
            'max_epochs': 1,
            'max_batches': 10,
            'sgd_seed': sgd_seed_str,
            'mlp_seed': mlp_seed_str,
            'save_file': 'result',

            'kernel_size_h2': int(params['kernel_size_h2'][0]),
            'output_channels_h2': 1 * k,
            'irange_h2': math.pow(10, params['l_ir_h2'][0]),
            'max_kernel_norm_h2': params['max_norm_h2'][0],

            'kernel_size_h3': int(params['kernel_size_h3'][0]),
            'output_channels_h3': int(1.7 * k),
            'irange_h3': math.pow(10, params['l_ir_h3'][0]),
            'max_kernel_norm_h3': params['max_norm_h3'][0],

            'kernel_size_h4': int(params['kernel_size_h4'][0]),
            'output_channels_h4': int(2.5 * k),
            'irange_h4': math.pow(10, params['l_ir_h4'][0]),
            'max_kernel_norm_h4': params['max_norm_h4'][0],

            'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]),
            'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]),
            'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]),
            'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]),
            'max_col_norm_y': params['max_norm_y'][0],
            'irange_y': math.pow(10, params['l_ir_y'][0]),
            'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]),
            'init_momentum': params['init_momentum'][0],
            'rectifier_left_slope': 0.2
        }

        with open('conv_fooddata_spearmint.yaml', 'r') as f:
            trainer = f.read()

        yaml_string = trainer % train_params
        train_obj = yaml_parse.load(yaml_string)

        if 'converge' in params:
            del train_obj.algorithm.termination_criterion._criteria[:]
            train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', 'best_model.pkl'))

        train_obj.setup()
        train_obj.model.monitor.on_channel_conflict = 'ignore'
        cache['cached_trainer' + str(fixed_params)] = train_obj

    else:
        train_obj = cache['cached_trainer' + str(fixed_params)]
        train_obj.model.monitor.set_state([0, 0, 0])
        train_obj.model.training_succeeded = False
        # train_obj.algorithm.update_callbacks[0].reinit_from_monitor()

        model = train_obj.model
        model_params = dict([(param.name, param) for param in model.get_params()])

        rng = model.rng

        update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng)
        update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng)
        update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng)
        update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng)

        train_obj.algorithm.learning_rate.set_value(
                math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32)))
        train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32))
        pass

    if 'converge' not in params:
        train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model)
    train_obj.main_loop(do_setup=False)
    original_misclass = read_channel(train_obj.model, misclass_channel)
    return float(original_misclass) * 50
예제 #15
0
  
model = MLP(layers = [h0, h1, y], 
            batch_size = batchSize, 
            input_space = inputSpace)
 
algorithm = SGD(learning_rate = 1e-3, 
                cost = MethodCost("cost_from_X"), 
                batch_size = batchSize, 
                monitoring_batch_size = batchSize,
                monitoring_dataset = {'train': train,
                                      'valid':valid}, 
                monitor_iteration_mode = "even_batchwise_shuffled_sequential", 
                termination_criterion = EpochCounter(max_epochs = 100), 
                learning_rule = Momentum(init_momentum = 0.0),
                train_iteration_mode = "even_batchwise_shuffled_sequential")
     
train = Train(dataset = train, 
              model = model, 
              algorithm = algorithm, 
              save_path = "2_layer_conv.pkl", 
              save_freq = 1, 
              extensions = [
                            MonitorBasedSaveBest(channel_name = "valid_y_misclass",
                                                 save_path = "2_layer_conv_best.pkl")
                            ])
     
print("Starting training session")

train.main_loop()

print("Done!")
예제 #16
0
pdb.set_trace()

algorithm = SGD(
    batch_size=batch_size,
    learning_rate=learning_rate,
    init_momentum=.5,
    monitoring_dataset={'valid': val_ds},
    cost=Dropout(input_include_probs={'h0': .8}, input_scales={'h0': 1.}),
    termination_criterion=MonitorBased(channel_name="valid_y_misclass",
                                       prop_decrease=0.,
                                       N=100),
    #termination_criterion: !obj:pylearn2.termination_criteria.EpochCounter {max_epochs: 1},
    update_callbacks=ExponentialDecay(decay_factor=1.00004, min_lr=.000001))

extensions = [
    MonitorBasedSaveBest(channel_name='valid_y_misclass',
                         save_path=save_best_path),
    MomentumAdjustor(start=1, saturate=250, final_momentum=.7)
]

model = MLP(batch_size=batch_size,
            input_space=Conv2DSpace(shape=[48, 48],
                                    num_channels=num_chan,
                                    axes=['c', 0, 1, 'b']),
            layers=[
                MaxoutConvC01B(layer_name='h0',
                               pad=0,
                               num_channels=64,
                               num_pieces=2,
                               kernel_shape=[8, 8],
                               pool_shape=[4, 4],
                               pool_stride=[2, 2],
예제 #17
0
def test_works():
    load = True
    if load == False:
        ddmTrain = FacialKeypoint(which_set='train', start=0, stop=6000)
        ddmValid = FacialKeypoint(which_set='train', start=6000, stop=7049)
        # valid can_fit = false
        pipeline = preprocessing.Pipeline()
        stndrdz = preprocessing.Standardize()
        stndrdz.apply(ddmTrain, can_fit=True)
        #doubt, how about can_fit = False?
        stndrdz.apply(ddmValid, can_fit=False)
        GCN = preprocessing.GlobalContrastNormalization()
        GCN.apply(ddmTrain, can_fit=True)
        GCN.apply(ddmValid, can_fit=False)

        pcklFile = open('kpd.pkl', 'wb')
        obj = (ddmTrain, ddmValid)
        pickle.dump(obj, pcklFile)
        pcklFile.close()
        return
    else:
        pcklFile = open('kpd.pkl', 'rb')
        (ddmTrain, ddmValid) = pickle.load(pcklFile)
        pcklFile.close()

    #creating layers
    #2 convolutional rectified layers, border mode valid
    layer1 = ConvRectifiedLinear(layer_name='convRect1',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)
    layer2 = ConvRectifiedLinear(layer_name='convRect2',
                                 output_channels=64,
                                 irange=.05,
                                 kernel_shape=[5, 5],
                                 pool_shape=[3, 3],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    # Rectified linear units
    layer3 = RectifiedLinear(dim=3000, sparse_init=15, layer_name='RectLin3')

    #multisoftmax
    n_groups = 30
    n_classes = 98
    irange = 0
    layer_name = 'multisoftmax'
    layerMS = MultiSoftmax(n_groups=n_groups,
                           irange=0.05,
                           n_classes=n_classes,
                           layer_name=layer_name)

    #setting up MLP
    MLPerc = MLP(batch_size=8,
                 input_space=Conv2DSpace(shape=[96, 96], num_channels=1),
                 layers=[layer1, layer2, layer3, layerMS])

    #mlp_cost
    missing_target_value = -1
    mlp_cost = MLPCost(cost_type='default',
                       missing_target_value=missing_target_value)

    #algorithm

    # learning rate, momentum, batch size, monitoring dataset, cost, termination criteria

    term_crit = MonitorBased(prop_decrease=0.00001,
                             N=30,
                             channel_name='validation_objective')
    kpSGD = KeypointSGD(learning_rate=0.001,
                        init_momentum=0.5,
                        monitoring_dataset={
                            'validation': ddmValid,
                            'training': ddmTrain
                        },
                        batch_size=8,
                        batches_per_iter=750,
                        termination_criterion=term_crit,
                        train_iteration_mode='random_uniform',
                        cost=mlp_cost)

    #train extension
    train_ext = ExponentialDecayOverEpoch(decay_factor=0.998,
                                          min_lr_scale=0.01)
    #train object
    train = Train(dataset=ddmTrain,
                  save_path='kpd_model2.pkl',
                  save_freq=1,
                  model=MLPerc,
                  algorithm=kpSGD,
                  extensions=[
                      train_ext,
                      MonitorBasedSaveBest(channel_name='validation_objective',
                                           save_path='kpd_best.pkl'),
                      MomentumAdjustor(start=1, saturate=20, final_momentum=.9)
                  ])
    train.main_loop()
    train.save()
예제 #18
0
def main(job_id, requested_params, cache):
    # Fix sub directory problems
    sys.path.append(os.path.dirname(os.getcwd()))
    os.chdir(os.path.dirname(os.path.realpath(__file__)))

    # Add parameters that are not currently being tuned but could potentially be tuned.
    params = additional_args
    params.update(requested_params)

    output_channels_h2 = int(1.00 * 50)
    output_channels_h3 = int(3.42 * 50)
    output_channels_h4 = int(11.67 * 50)

    dropout_h2 = float(params['dropout_h2'][0]) / 10
    dropout_h3 = float(params['dropout_h3'][0]) / 10
    dropout_h4 = float(params['dropout_h4'][0]) / 10
    dropout_y = float(params['dropout_y'][0]) / 10

    if params.get('rate', None) is not None:
        params['log_init_learning_rate'][0] += numpy.array([params['rate']])

    fixed_params = (params['kernel_size_h2'][0], params['kernel_size_h3'][0], params['dropout_h2'][0],
                    params['dropout_h3'][0], params['dropout_h4'][0], params['dropout_y'][0])

    if 'cached_trainer' + str(fixed_params) not in cache:
        train_params = {
            'train_start': params['start'],
            'train_stop': params['stop'],
            'valid_start': 20000,
            'valid_stop': 24000,
            'test_stop': 4000,
            'batch_size': 100,
            'max_epochs': params.get('epochs', 1),
            'max_batches': 50,
            'sgd_seed': sgd_seed_str,
            'mlp_seed': mlp_seed_str,

            'kernel_size_h2': int(params['kernel_size_h2'][0]),
            'output_channels_h2': output_channels_h2,
            'irange_h2': math.pow(10, params['l_ir_h2'][0]),
            'max_kernel_norm_h2': params['max_norm_h2'][0],
            'dropout_h2': dropout_h2,
            'dscale_h2': params['dfac_h2'][0] * 1.0 / dropout_h2,
            'w_lr_sc_h2': math.pow(dropout_h2, 2),
            'weight_decay_h2': math.pow(10, params['l_wdecay_h2'][0]),

            'kernel_size_h3': int(params['kernel_size_h3'][0]),
            'output_channels_h3': output_channels_h3,
            'irange_h3': math.pow(10, params['l_ir_h3'][0]),
            'max_kernel_norm_h3': params['max_norm_h3'][0],
            'dropout_h3': dropout_h3,
            'dscale_h3': params['dfac_h3'][0] * 1.0 / dropout_h3,
            'w_lr_sc_h3': math.pow(dropout_h3, 2),
            'weight_decay_h3': math.pow(10, params['l_wdecay_h3'][0]),

            'kernel_size_h4': int(params['kernel_size_h4'][0]),
            'output_channels_h4': output_channels_h4,
            'irange_h4': math.pow(10, params['l_ir_h4'][0]),
            'max_kernel_norm_h4': params['max_norm_h4'][0],
            'dropout_h4': dropout_h4,
            'dscale_h4': params['dfac_h4'][0] * 1.0 / dropout_h4,
            'w_lr_sc_h4': math.pow(dropout_h4, 2),
            'weight_decay_h4': math.pow(10, params['l_wdecay_h4'][0]),

            'weight_decay_y': math.pow(10, params['l_wdecay_y'][0]),
            'max_col_norm_y': params['max_norm_y'][0],
            'irange_y': math.pow(10, params['l_ir_y'][0]),
            'dropout_y': dropout_y,
            'dscale_y': 1.0 / dropout_y,
            'w_lr_sc_y': math.pow(dropout_y, 2),
            'init_learning_rate': math.pow(10, params['log_init_learning_rate'][0]),
            'init_momentum': params['init_momentum'][0],
            'rectifier_left_slope': 0.2
        }

        with open('conv_fooddata_spearmint.yaml', 'r') as f:
            trainer = f.read()

        yaml_string = trainer % train_params
        train_obj = yaml_parse.load(yaml_string)

        if 'converge' in params:
            train_obj.algorithm.termination_criterion._criteria[0]._max_epochs = params.get('epochs', 100)
            train_obj.extensions.append(MonitorBasedSaveBest('valid_y_misclass', params['save']))

        train_obj.setup()
        train_obj.model.monitor.on_channel_conflict = 'ignore'
        # cache['cached_trainer' + str(fixed_params)] = train_obj

    else:
        train_obj = cache['cached_trainer' + str(fixed_params)]
        train_obj.model.monitor.set_state([0, 0, 0])
        train_obj.model.training_succeeded = False
        # train_obj.algorithm.update_callbacks[0].reinit_from_monitor()

        model = train_obj.model
        model_params = dict([(param.name, param) for param in model.get_params()])

        rng = model.rng

        update_conv_layer(model.layers[0], params['l_ir_h2'][0], params['max_norm_h2'][0], model_params, rng)
        update_conv_layer(model.layers[1], params['l_ir_h3'][0], params['max_norm_h3'][0], model_params, rng)
        update_conv_layer(model.layers[2], params['l_ir_h4'][0], params['max_norm_h4'][0], model_params, rng)
        update_softmax_layer(model.layers[3], params['l_ir_y'][0], params['max_norm_y'][0], model_params, rng)

        train_obj.algorithm.learning_rate.set_value(
                math.pow(10, params['log_init_learning_rate'][0].astype(numpy.float32)))
        train_obj.algorithm.learning_rule.momentum.set_value(params['init_momentum'][0].astype(numpy.float32))
        pass

    pretrained_model_path = params.get('model', None)
    if pretrained_model_path is not None:
        print 'loading pre trained model'
        pretrained_model = serial.load(pretrained_model_path)
        print 'loading done'
        train_obj.model.set_param_values(pretrained_model.get_param_values())

    if 'converge' not in params:
        train_obj.algorithm.termination_criterion._criteria[0].initialize(train_obj.model)
    train_obj.main_loop(do_setup=False)
    if params.get('savelast', False):
        serial.save(params['save'] + 'f', train_obj.model, on_overwrite='backup')
    original_misclass = read_channel(train_obj.model, misclass_channel)
    return float(original_misclass) * 50
예제 #19
0
train = Train(dataset,
              mlp,
              SGD(0.1,
                  batch_size=128,
                  monitoring_dataset={
                      'train': dataset,
                      'valid': dataset_valid,
                      'test': dataset_test
                  },
                  termination_criterion=EpochCounter(100),
                  train_iteration_mode='even_shuffled_sequential',
                  monitor_iteration_mode='even_sequential'),
              save_path="pkl/multicolumn.pkl",
              save_freq=5,
              extensions=[
                  MonitorBasedSaveBest(channel_name='test_y_misclass',
                                       save_path="pkl/multicolumn_best.pkl")
              ])

# # Load the saved model
# model = serial.load(saved_model_path)
#
# # Remove last layer
# del model.layers[-1]
#
# # Add new layer
# new_output_layer = <make your new layer here>
# model.add_layers([new_output_layer])

# mlp.layers.extend(pretrained_layers[start_layer:])

# , cost=Dropout(input_include_probs={'composite':1.})))