Esempio n. 1
0
def train(ladder,
          batch_size=100,
          num_train_examples=60000,
          num_epochs=150,
          lrate_decay=0.67):
    # Setting Logger
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/mnist_100_standard_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    model = Model(ladder.costs.total)
    all_params = model.parameters
    print len(all_params)
    print all_params

    training_algorithm = GradientDescent(
        cost=ladder.costs.total,
        parameters=all_params,
        step_rule=Adam(learning_rate=ladder.lr))

    # Fetch all batch normalization updates. They are in the clean path.
    # In addition to actual training, also do BN variable approximations
    bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
    training_algorithm.add_updates(bn_updates)

    monitored_variables = [
        ladder.costs.class_corr, ladder.costs.class_clean, ladder.error,
        training_algorithm.total_gradient_norm, ladder.costs.total
    ] + ladder.costs.denois.values()

    train_data_stream, test_data_stream = get_mixed_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=test_data_stream,
                                            prefix="test",
                                            after_epoch=True)

    main_loop = MainLoop(algorithm=training_algorithm,
                         data_stream=train_data_stream,
                         model=model,
                         extensions=[
                             train_monitoring, valid_monitoring,
                             FinishAfter(after_n_epochs=num_epochs),
                             SaveParams('test_CE_corr', model, save_path),
                             SaveLog(save_path, after_epoch=True),
                             LRDecay(lr=ladder.lr,
                                     decay_first=num_epochs * lrate_decay,
                                     decay_last=num_epochs,
                                     after_epoch=True),
                             Printing()
                         ])
    main_loop.run()
Esempio n. 2
0
def train(cli_params):
    cli_params['save_dir'] = prepare_dir(cli_params['save_to'])
    logfile = os.path.join(cli_params['save_dir'], 'log.txt')

    # Log also DEBUG to a file
    fh = logging.FileHandler(filename=logfile)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    logger.info('Logging into %s' % logfile)

    p, loaded = load_and_log_params(cli_params)
    in_dim, data, whiten, cnorm = setup_data(p, test_set=False)
    if not loaded:
        # Set the zero layer to match input dimensions
        p.encoder_layers = (in_dim,) + p.encoder_layers

    ladder = setup_model(p)

    # Training
    all_params = ComputationGraph([ladder.costs.total]).parameters
    logger.info('Found the following parameters: %s' % str(all_params))

    # Fetch all batch normalization updates. They are in the clean path.
    bn_updates = ComputationGraph([ladder.costs.class_clean]).updates
    assert 'counter' in [u.name for u in bn_updates.keys()], \
        'No batch norm params in graph - the graph has been cut?'

    training_algorithm = GradientDescent(
        cost=ladder.costs.total, params=all_params,
        step_rule=Adam(learning_rate=ladder.lr))
    # In addition to actual training, also do BN variable approximations
    training_algorithm.add_updates(bn_updates)

    short_prints = {
        "train": {
            'T_C_class': ladder.costs.class_corr,
            'T_C_de': ladder.costs.denois.values(),
        },
        "valid_approx": OrderedDict([
            ('V_C_class', ladder.costs.class_clean),
            ('V_E', ladder.error.clean),
            ('V_C_de', ladder.costs.denois.values()),
        ]),
        "valid_final": OrderedDict([
            ('VF_C_class', ladder.costs.class_clean),
            ('VF_E', ladder.error.clean),
            ('VF_C_de', ladder.costs.denois.values()),
        ]),
    }

    main_loop = MainLoop(
        training_algorithm,
        # Datastream used for training
        make_datastream(data.train, data.train_ind,
                        p.batch_size,
                        n_labeled=p.labeled_samples,
                        n_unlabeled=p.unlabeled_samples,
                        whiten=whiten,
                        cnorm=cnorm),
        model=Model(ladder.costs.total),
        extensions=[
            FinishAfter(after_n_epochs=p.num_epochs),

            # This will estimate the validation error using
            # running average estimates of the batch normalization
            # parameters, mean and variance
            ApproxTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean]
                + ladder.costs.denois.values(),
                make_datastream(data.valid, data.valid_ind,
                                p.valid_batch_size, whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                prefix="valid_approx"),

            # This Monitor is slower, but more accurate since it will first
            # estimate batch normalization parameters from training data and
            # then do another pass to calculate the validation error.
            FinalTestMonitoring(
                [ladder.costs.class_clean, ladder.error.clean]
                + ladder.costs.denois.values(),
                make_datastream(data.train, data.train_ind,
                                p.batch_size,
                                n_labeled=p.labeled_samples,
                                whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                make_datastream(data.valid, data.valid_ind,
                                p.valid_batch_size,
                                n_labeled=len(data.valid_ind),
                                whiten=whiten, cnorm=cnorm,
                                scheme=ShuffledScheme),
                prefix="valid_final",
                after_n_epochs=p.num_epochs),

            TrainingDataMonitoring(
                [ladder.costs.total, ladder.costs.class_corr,
                 training_algorithm.total_gradient_norm]
                + ladder.costs.denois.values(),
                prefix="train", after_epoch=True),

            SaveParams(None, all_params, p.save_dir, after_epoch=True),
            SaveExpParams(p, p.save_dir, before_training=True),
            SaveLog(p.save_dir, after_training=True),
            ShortPrinting(short_prints),
            LRDecay(ladder.lr, p.num_epochs * p.lrate_decay, p.num_epochs,
                    after_epoch=True),
        ])
    main_loop.run()

    # Get results
    df = main_loop.log.to_dataframe()
    col = 'valid_final_error_rate_clean'
    logger.info('%s %g' % (col, df[col].iloc[-1]))

    if main_loop.log.status['epoch_interrupt_received']:
        return None
    return df
Esempio n. 3
0
    def train(self):
        """ Setup and train the model """
        to_train = ComputationGraph([self.tagger.total_cost]).parameters
        logger.info('Found the following parameters: %s' % str(to_train))

        step_rule = Adam(learning_rate=self.p.lr)
        training_algorithm = GradientDescent(
            cost=self.tagger.total_cost, parameters=to_train, step_rule=step_rule,
            on_unused_sources='warn',
            theano_func_kwargs={'on_unused_input': 'warn'}
        )

        # TRACKED GRAPH NODES
        train_params = {
            'Train_Denoising_Cost': self.tagger.corr.denoising_cost,
        }
        if self.p.class_cost_x > 0:
            train_params['Train_Classification_Cost'] = self.tagger.corr.class_cost
            train_params['Train_Classification_Error'] = self.tagger.clean.class_error

        valid_params = {
            'Validation_Denoising_Cost': self.tagger.corr.denoising_cost,
        }
        if self.p.class_cost_x > 0:
            valid_params['Validation_Classification_Cost'] = self.tagger.corr.class_cost
            valid_params['Validation_Classification_Error'] = self.tagger.clean.class_error

        test_params = {
            'Test_AMI_Score': self.tagger.clean.ami_score,
            'Test_Denoising_Cost': self.tagger.corr.denoising_cost,
        }
        if self.p.class_cost_x > 0:
            test_params['Test_Classification_Cost'] = self.tagger.corr.class_cost
            test_params['Test_Classification_Error'] = self.tagger.clean.class_error

        short_prints = {
            "train": train_params,
            "valid": valid_params,
            "test": test_params,
        }

        main_loop = MainLoop(
            training_algorithm,
            # Datastream used for training
            self.streams['train'],
            model=Model(self.tagger.total_cost),
            extensions=[
                FinishAfter(after_n_epochs=self.p.num_epochs),
                SaveParams(self.p.get('save_freq', 0), self.tagger, self.save_dir, before_epoch=True),
                DataStreamMonitoring(
                    valid_params.values(),
                    self.streams['valid'],
                    prefix="valid"
                ),
                FinalTestMonitoring(
                    test_params.values(),
                    self.streams['train'],
                    {'valid': self.streams['valid'], 'test': self.streams['test']},
                    after_training=True
                ),
                TrainingDataMonitoring(
                    train_params.values(),
                    prefix="train", after_epoch=True
                ),
                SaveExpParams(self.p, self.save_dir, before_training=True),
                Timing(after_epoch=True),
                ShortPrinting(short_prints, after_epoch=True),
            ])
        logger.info('Running the main loop')
        main_loop.run()
Esempio n. 4
0
def train(model, configs):
    get_streams = configs['get_streams']
    save_path = configs['save_path']
    num_epochs = configs['num_epochs']
    batch_size = configs['batch_size']
    lrs = configs['lrs']
    until_which_epoch = configs['until_which_epoch']
    grad_clipping = configs['grad_clipping']
    monitorings = model.monitorings

    # Training
    if configs['weight_noise'] > 0:
        cg = ComputationGraph(model.cost)
        weights = VariableFilter(roles=[WEIGHT])(cg.variables)
        cg = apply_noise(cg, weights, configs['weight_noise'])
        model.cost = cg.outputs[0].copy(name='CE')

    if configs['l2_reg'] > 0:
        cg = ComputationGraph(model.cost)
        weights = VariableFilter(roles=[WEIGHT])(cg.variables)
        new_cost = model.cost + configs['l2_reg'] * sum(
            [(weight**2).sum() for weight in weights])
        model.cost = new_cost.copy(name='CE')

    blocks_model = Model(model.cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    default_lr = np.float32(configs['lrs'][0])
    lr_var = theano.shared(default_lr, name="learning_rate")

    clipping = StepClipping(threshold=np.cast[floatX](grad_clipping))
    # sgd_momentum = Momentum(
    #     learning_rate=0.0001,
    #     momentum=0.95)
    # step_rule = CompositeRule([clipping, sgd_momentum])
    adam = Adam(learning_rate=lr_var)
    step_rule = CompositeRule([clipping, adam])
    training_algorithm = GradientDescent(cost=model.cost,
                                         parameters=all_params,
                                         step_rule=step_rule,
                                         on_unused_sources='warn')

    monitored_variables = [
        lr_var,
        aggregation.mean(training_algorithm.total_gradient_norm)
    ] + monitorings

    for param in all_params:
        name = param.tag.annotations[0].name + "." + param.name
        to_monitor = training_algorithm.gradients[param].norm(2)
        to_monitor.name = name + "_grad_norm"
        monitored_variables.append(to_monitor)
        to_monitor = param.norm(2)
        to_monitor.name = name + "_norm"
        monitored_variables.append(to_monitor)

    train_data_stream, valid_data_stream = get_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=valid_data_stream,
                                            prefix="valid",
                                            after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring,
            valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_CE', blocks_model, save_path, after_epoch=True),
            SaveLog(after_epoch=True),
            ProgressBar(),
            # ErrorPerVideo(model, after_epoch=True, on_interrupt=True),
            LRDecay(lr_var, lrs, until_which_epoch, after_epoch=True),
            Printing(after_epoch=True)
        ])
    main_loop.run()
Esempio n. 5
0
def train(model, batch_size=100, num_epochs=1000):
    cost = model.cost
    monitorings = model.monitorings
    # Setting Loggesetr
    timestr = time.strftime("%Y_%m_%d_at_%H_%M")
    save_path = 'results/CMV_V2_' + timestr
    log_path = os.path.join(save_path, 'log.txt')
    os.makedirs(save_path)
    fh = logging.FileHandler(filename=log_path)
    fh.setLevel(logging.DEBUG)
    logger.addHandler(fh)

    # Training
    blocks_model = Model(cost)
    all_params = blocks_model.parameters
    print "Number of found parameters:" + str(len(all_params))
    print all_params

    clipping = StepClipping(threshold=np.cast[floatX](10))

    adam = Adam(learning_rate=model.lr_var)
    step_rule = CompositeRule([adam, clipping])
    training_algorithm = GradientDescent(cost=cost,
                                         parameters=all_params,
                                         step_rule=step_rule)

    monitored_variables = [
        model.lr_var, cost,
        aggregation.mean(training_algorithm.total_gradient_norm)
    ] + monitorings

    blocks_model = Model(cost)
    params_dicts = blocks_model.get_parameter_dict()
    for name, param in params_dicts.iteritems():
        to_monitor = training_algorithm.gradients[param].norm(2)
        to_monitor.name = name + "_grad_norm"
        monitored_variables.append(to_monitor)
        to_monitor = param.norm(2)
        to_monitor.name = name + "_norm"
        monitored_variables.append(to_monitor)

    train_data_stream, valid_data_stream = get_cmv_v2_streams(batch_size)

    train_monitoring = TrainingDataMonitoring(variables=monitored_variables,
                                              prefix="train",
                                              after_epoch=True)

    valid_monitoring = DataStreamMonitoring(variables=monitored_variables,
                                            data_stream=valid_data_stream,
                                            prefix="valid",
                                            after_epoch=True)

    main_loop = MainLoop(
        algorithm=training_algorithm,
        data_stream=train_data_stream,
        model=blocks_model,
        extensions=[
            train_monitoring, valid_monitoring,
            FinishAfter(after_n_epochs=num_epochs),
            SaveParams('valid_misclassificationrate_apply_error_rate',
                       blocks_model, save_path),
            SaveLog(save_path, after_epoch=True),
            ProgressBar(),
            LRDecay(model.lr_var, [0.001, 0.0001, 0.00001, 0.000001],
                    [8, 15, 30, 1000],
                    after_epoch=True),
            Printing()
        ])
    main_loop.run()