Beispiel #1
0
    def __init__(self,
                 runner,
                 model_params,
                 resume=False,
                 resume_data=None,
                 s3_data=None,
                 **kwargs):
        dataset = create_dense_design_matrix(x=runner.dp.train_set_x)

        if resume:
            model, model_params = self.resume_model(model_params, resume_data)
        else:
            model = self.new_model(model_params, dataset=dataset)

        termination_criterion = MaxEpochNumber(model_params['maxnum_iter'])
        algorithm = SGD(learning_rate=model_params['learning_rate']['init'],
                        monitoring_dataset=dataset,
                        cost=MeanSquaredReconstructionError(),
                        termination_criterion=termination_criterion,
                        batch_size=model_params['batch_size'])
        ext = AutoEncoderStatReporter(runner,
                                      resume=resume,
                                      resume_data=resume_data,
                                      save_freq=model_params['save_freq'])
        self.train_obj = Train(dataset=dataset,
                               model=model,
                               algorithm=algorithm,
                               extensions=[ext])
Beispiel #2
0
        def run_algorithm():
            unsupported_modes = ['random_slice', 'random_uniform']
            algorithm = SGD(learning_rate,
                            cost,
                            batch_size=5,
                            train_iteration_mode=mode,
                            monitoring_dataset=None,
                            termination_criterion=termination_criterion,
                            update_callbacks=None,
                            init_momentum=None,
                            set_batch_size=False)

            algorithm.setup(dataset=dataset, model=model)

            raised = False
            try:
                algorithm.train(dataset)
            except ValueError:
                print mode
                assert mode in unsupported_modes
                raised = True
            if mode in unsupported_modes:
                assert raised
                return True
            return False
Beispiel #3
0
def test_multiple_inputs():
    """
    Create a VectorSpacesDataset with two inputs (features0 and features1)
    and train an MLP which takes both inputs for 1 epoch.
    """
    mlp = MLP(layers=[
        FlattenerLayer(
            CompositeLayer('composite',
                           [Linear(10, 'h0', 0.1),
                            Linear(10, 'h1', 0.1)], {
                                0: [1],
                                1: [0]
                            })),
        Softmax(5, 'softmax', 0.1)
    ],
              input_space=CompositeSpace([VectorSpace(15),
                                          VectorSpace(20)]),
              input_source=('features0', 'features1'))
    dataset = VectorSpacesDataset(
        (np.random.rand(20, 20).astype(theano.config.floatX),
         np.random.rand(20, 15).astype(theano.config.floatX),
         np.random.rand(20, 5).astype(theano.config.floatX)),
        (CompositeSpace(
            [VectorSpace(20), VectorSpace(15),
             VectorSpace(5)]), ('features1', 'features0', 'targets')))
    train = Train(dataset, mlp, SGD(0.1, batch_size=5))
    train.algorithm.termination_criterion = EpochCounter(1)
    train.main_loop()
Beispiel #4
0
def get_layer_trainer_sgd_autoencoder(layer,
                                      trainset,
                                      batch_size=10,
                                      learning_rate=0.1,
                                      max_epochs=100,
                                      name=''):
    # configs on sgd
    train_algo = SGD(
        learning_rate=learning_rate,
        #             learning_rule = AdaDelta(),
        learning_rule=Momentum(init_momentum=0.5),
        cost=MeanSquaredReconstructionError(),
        batch_size=batch_size,
        monitoring_dataset=trainset,
        termination_criterion=EpochCounter(max_epochs=max_epochs),
        update_callbacks=None)

    log_callback = LoggingCallback(name)

    return Train(model=layer,
                 algorithm=train_algo,
                 extensions=[
                     log_callback,
                     OneOverEpoch(start=1, half_life=5),
                     MomentumAdjustor(final_momentum=0.7,
                                      start=10,
                                      saturate=100)
                 ],
                 dataset=trainset)
Beispiel #5
0
    def train_with_monitoring_datasets(train_dataset,
                                       monitoring_datasets,
                                       model_force_batch_size,
                                       train_iteration_mode,
                                       monitor_iteration_mode):

        model = SoftmaxModel(dim)
        if model_force_batch_size:
            model.force_batch_size = model_force_batch_size

        cost = DummyCost()

        algorithm = SGD(learning_rate, cost,
                        batch_size=batch_size,
                        train_iteration_mode=train_iteration_mode,
                        monitor_iteration_mode=monitor_iteration_mode,
                        monitoring_dataset=monitoring_datasets,
                        termination_criterion=EpochCounter(2))

        train = Train(train_dataset,
                      model,
                      algorithm,
                      save_path=None,
                      save_freq=0,
                      extensions=None)

        train.main_loop()
Beispiel #6
0
def test_pylearn2_trainin():
    # Construct the model
    mlp = MLP(activations=[Sigmoid(), Sigmoid()],
              dims=[784, 100, 784],
              weights_init=IsotropicGaussian(),
              biases_init=Constant(0.01))
    mlp.initialize()
    cost = SquaredError()

    block_cost = BlocksCost(cost)
    block_model = BlocksModel(mlp, (VectorSpace(dim=784), 'features'))

    # Load the data
    rng = numpy.random.RandomState(14)
    train_dataset = random_dense_design_matrix(rng, 1024, 784, 10)
    valid_dataset = random_dense_design_matrix(rng, 1024, 784, 10)

    # Silence Pylearn2's logger
    logger = logging.getLogger(pylearn2.__name__)
    logger.setLevel(logging.ERROR)

    # Training algorithm
    sgd = SGD(learning_rate=0.01,
              cost=block_cost,
              batch_size=128,
              monitoring_dataset=valid_dataset)
    train = Train(train_dataset, block_model, algorithm=sgd)
    train.main_loop(time_budget=3)
Beispiel #7
0
def test_train_ae():
    GC = GaussianCorruptor

    gsn = GSN.new(layer_sizes=[ds.X.shape[1], 1000],
                  activation_funcs=["sigmoid", "tanh"],
                  pre_corruptors=[None, GC(1.0)],
                  post_corruptors=[SaltPepperCorruptor(0.5),
                                   GC(1.0)],
                  layer_samplers=[BinomialSampler(), None],
                  tied=False)

    # average MBCE over example rather than sum it
    _mbce = MeanBinaryCrossEntropy()
    reconstruction_cost = lambda a, b: _mbce.cost(a, b) / ds.X.shape[1]

    c = GSNCost([(0, 1.0, reconstruction_cost)], walkback=WALKBACK)

    alg = SGD(LEARNING_RATE,
              init_momentum=MOMENTUM,
              cost=c,
              termination_criterion=EpochCounter(MAX_EPOCHS),
              batches_per_iter=BATCHES_PER_EPOCH,
              batch_size=BATCH_SIZE,
              monitoring_dataset=ds,
              monitoring_batches=10)

    trainer = Train(ds,
                    gsn,
                    algorithm=alg,
                    save_path="gsn_ae_example.pkl",
                    save_freq=5)
    trainer.main_loop()
    print "done training"
Beispiel #8
0
def test_execution_order():

    # ensure save is called directly after monitoring by checking
    # parameter values in `on_monitor` and `on_save`.

    model = MLP(layers=[Softmax(layer_name='y', n_classes=2, irange=0.)],
                nvis=3)

    dataset = DenseDesignMatrix(X=np.random.normal(size=(6, 3)),
                                y=np.random.normal(size=(6, 2)))

    epoch_counter = EpochCounter(max_epochs=1)

    algorithm = SGD(batch_size=2,
                    learning_rate=0.1,
                    termination_criterion=epoch_counter)

    extension = ParamMonitor()

    train = Train(dataset=dataset,
                  model=model,
                  algorithm=algorithm,
                  extensions=[extension],
                  save_freq=1,
                  save_path="save.pkl")

    # mock save
    train.save = MethodType(only_run_extensions, train)

    train.main_loop()
Beispiel #9
0
def get_ae_pretrainer(layer, data, batch_size, epochs=30):
    init_lr = 0.05

    train_algo = SGD(
        batch_size=batch_size,
        learning_rate=init_lr,
        learning_rule=Momentum(init_momentum=0.5),
        monitoring_batches=batch_size,
        monitoring_dataset=data,
        # for ContractiveAutoencoder:
        # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()],
        #                             [0.5, cost.MethodCost(method='contraction_penalty')]]),
        # for HigherOrderContractiveAutoencoder:
        # cost=cost.SumOfCosts(costs=[[1., MeanSquaredReconstructionError()],
        #                             [0.5, cost.MethodCost(method='contraction_penalty')],
        #                             [0.5, cost.MethodCost(method='higher_order_penalty')]]),
        # for DenoisingAutoencoder:
        cost=MeanSquaredReconstructionError(),
        termination_criterion=EpochCounter(epochs))
    return Train(model=layer,
                 algorithm=train_algo,
                 dataset=data,
                 extensions=[
                     MomentumAdjustor(final_momentum=0.9, start=0,
                                      saturate=25),
                     LinearDecayOverEpoch(start=1,
                                          saturate=25,
                                          decay_factor=.02)
                 ])
Beispiel #10
0
    def get_train_sgd(self, config_id):
        row = self.db.executeSQL(
            """
        SELECT  learning_rate,batch_size,init_momentum,
                train_iteration_mode,cost_array,term_array
        FROM hps3.train_sgd
        WHERE config_id = %s
        """, (config_id, ), self.db.FETCH_ONE)
        if not row or row is None:
            raise HPSData("No stochasticGradientDescent for config_id="\
                +str(config_id))
        (learning_rate, batch_size, init_momentum, train_iteration_mode,
         cost_array, term_array) = row
        # cost
        cost = self.get_costs(cost_array)

        num_train_batch = (self.ntrain / self.batch_size)
        print "num training batches:", num_train_batch
        termination_criterion \
            = self.get_terminations(config_id, term_array)
        return SGD(learning_rate=learning_rate,
                   cost=cost,
                   batch_size=batch_size,
                   batches_per_iter=num_train_batch,
                   monitoring_dataset=self.monitoring_dataset,
                   termination_criterion=termination_criterion,
                   init_momentum=init_momentum,
                   train_iteration_mode=train_iteration_mode)
Beispiel #11
0
def test_sgd_sup():

    # tests that we can run the sgd algorithm
    # on a supervised cost.
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    dim = 3
    m = 10

    rng = np.random.RandomState([25, 9, 2012])

    X = rng.randn(m, dim)

    idx = rng.randint(0, dim, (m, ))
    Y = np.zeros((m, dim))
    for i in xrange(m):
        Y[i, idx[i]] = 1

    dataset = DenseDesignMatrix(X=X, y=Y)

    m = 15
    X = rng.randn(m, dim)

    idx = rng.randint(0, dim, (m,))
    Y = np.zeros((m, dim))
    for i in xrange(m):
        Y[i, idx[i]] = 1

    # Including a monitoring dataset lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(X=X, y=Y)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    cost = SupervisedDummyCost()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate, cost,
                    batch_size=batch_size,
                    monitoring_batches=3,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Beispiel #12
0
def train_model():
    global ninput, noutput
    simdata = SimulationData(
        sim_path="../../javaDataCenter/generarDadesV1/CA_SDN_topo1/")
    simdata.load_data()
    simdata.preprocessor()
    dataset = simdata.get_matrix()

    structure = get_structure()
    layers = []
    for pair in structure:
        layers.append(get_autoencoder(pair))

    model = DeepComposedAutoencoder(layers)
    training_alg = SGD(learning_rate=1e-3,
                       cost=MeanSquaredReconstructionError(),
                       batch_size=1296,
                       monitoring_dataset=dataset,
                       termination_criterion=EpochCounter(max_epochs=50))
    extensions = [MonitorBasedLRAdjuster()]
    experiment = Train(dataset=dataset,
                       model=model,
                       algorithm=training_alg,
                       save_path='training2.pkl',
                       save_freq=10,
                       allow_overwrite=True,
                       extensions=extensions)
    experiment.main_loop()
Beispiel #13
0
def get_trainer(model, trainset, validset, epochs=20, batch_size=200):
    monitoring_batches = None if validset is None else 20
    train_algo = SGD(batch_size=batch_size,
                     init_momentum=0.5,
                     learning_rate=0.1,
                     monitoring_batches=monitoring_batches,
                     monitoring_dataset=validset,
                     cost=Dropout(input_include_probs={
                         'h0': 0.8,
                         'h1': 0.8,
                         'h2': 0.8,
                         'h3': 0.8,
                         'y': 0.5
                     },
                                  input_scales={
                                      'h0': 1. / 0.8,
                                      'h1': 1. / 0.8,
                                      'h2': 1. / 0.8,
                                      'h3': 1. / 0.8,
                                      'y': 1. / 0.5
                                  },
                                  default_input_include_prob=0.5,
                                  default_input_scale=1. / 0.5),
                     termination_criterion=EpochCounter(epochs),
                     update_callbacks=ExponentialDecay(decay_factor=1.0001,
                                                       min_lr=0.001))
    return Train(model=model, algorithm=train_algo, dataset=trainset, save_freq=0, save_path='epoch', \
            extensions=[MomentumAdjustor(final_momentum=0.9, start=0, saturate=int(epochs*0.8)), ])
Beispiel #14
0
    def get_train_sgd(self):

        cost = MethodCost('cost_from_X')
        #cost = self.get_costs()
        num_train_batch = (self.ntrain/self.batch_size)
        print "num training batches:", num_train_batch

        termination_criterion = self.get_terminations()

        monitoring_dataset = {}
        for dataset_id in self.state.monitoring_dataset:
            if dataset_id == 'test' and self.test_ddm is not None:
                monitoring_dataset['test'] = self.test_ddm
            elif dataset_id == 'valid' and self.valid_ddm is not None:
                monitoring_dataset['valid'] = self.valid_ddm
            else:
                monitoring_dataset = None
            
        return SGD( learning_rate=self.state.learning_rate,
                    batch_size=self.state.batch_size,
                    cost=cost,
                    batches_per_iter=num_train_batch,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    init_momentum=self.state.init_momentum,
                    train_iteration_mode=self.state.train_iteration_mode)
Beispiel #15
0
def train_example(dataset=None):
    model = GaussianBinaryRBM(nvis=1296,
                              nhid=61,
                              irange=0.5,
                              energy_function_class=grbm_type_1(),
                              learn_sigma=True,
                              init_sigma=.4,
                              init_bias_hid=2.,
                              mean_vis=False,
                              sigma_lr_scale=1e-3)
    cost = SMD(corruptor=GaussianCorruptor(stdev=0.4))
    algorithm = SGD(learning_rate=.1,
                    batch_size=5,
                    monitoring_batches=20,
                    monitoring_dataset=dataset,
                    cost=cost,
                    termination_criterion=MonitorBased(prop_decrease=0.01,
                                                       N=1))
    train = Train(dataset=dataset,
                  model=model,
                  save_path="./experiment/training.pkl",
                  save_freq=10,
                  algorithm=algorithm,
                  extensions=[])
    train.main_loop()
Beispiel #16
0
def test_lr_scalers():
    """
    Tests that SGD respects Model.get_lr_scalers
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            super(ModelWithScalers, self).__init__()
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def __call__(self, X):
            # Implemented only so that DummyCost would work
            return X

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(.0),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
Beispiel #17
0
def test_adadelta():
    """
    Make sure that learning_rule.AdaDelta obtains the same parameter values as
    with a hand-crafted AdaDelta implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "AdaDelta: An Adaptive Learning Rate Method", Matthew D. Zeiler.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    decay = 0.95

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaDelta(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)
        state[param]['dx2'] = np.zeros(param_shape)

    def adadelta_manual(model, state):
        inc = []
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.sqrt(pstate['g2'] + scale * learning_rate)
            rms_dx_tm1 = np.sqrt(pstate['dx2'] + scale * learning_rate)
            dx_t = -rms_dx_tm1 / rms_g_t * param_val
            pstate['dx2'] = decay * pstate['dx2'] + (1 - decay) * dx_t**2
            rval += [param_val + dx_t]
        return rval

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Beispiel #18
0
def model1():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = MNIST(which_set='train', one_hot=True)
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    valid_set = MNIST(which_set='test', one_hot=True)
    test_set = MNIST(which_set='test', one_hot=True)

    #import pdb
    #pdb.set_trace()
    #print train_set.X.shape[1]

    # =====<Create the MLP Model>=====

    h2_layer = NoisyRELU(layer_name='h1',
                         sparse_init=15,
                         noise_factor=5,
                         dim=1000,
                         desired_active_rate=0.2,
                         bias_factor=20,
                         max_col_norm=1)
    #h2_layer = RectifiedLinear(layer_name='h2', dim=100, sparse_init=15, max_col_norm=1)
    #print h1_layer.get_params()
    #h2 = RectifiedLinear(layer_name='h2', dim=500, sparse_init=15, max_col_norm=1)
    y_layer = Softmax(layer_name='y', n_classes=10, irange=0., max_col_norm=1)

    mlp = MLP(batch_size=200,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h2_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={'valid': valid_set},
              cost=MethodCost('cost_from_X'),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.001, N=50))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)]

    # =====<Create Training Object>=====
    save_path = './mlp_model1.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=0)
    #train_obj.setup_extensions()

    #import pdb
    #pdb.set_trace()
    train_obj.main_loop()

    # =====<Run the training>=====
    '''
Beispiel #19
0
 def create_algorithm(self):
     cost_crit = MonitorBased(channel_name=self.optimize_for,
                              prop_decrease=0.,
                              N=10)
     epoch_cnt_crit = EpochCounter(max_epochs=self.max_epochs)
     term = And(criteria=[cost_crit, epoch_cnt_crit])
     self.algorithm = SGD(batch_size=100,
                          learning_rate=.01,
                          monitoring_dataset=self.alg_datasets,
                          termination_criterion=term)
Beispiel #20
0
def test_train_supervised():
    """
    Train a supervised GSN.
    """
    # initialize the GSN
    gsn = GSN.new(
        layer_sizes=[ds.X.shape[1], 1000, ds.y.shape[1]],
        activation_funcs=["sigmoid", "tanh", rescaled_softmax],
        pre_corruptors=[GaussianCorruptor(0.5)] * 3,
        post_corruptors=[
            SaltPepperCorruptor(.3), None,
            SmoothOneHotCorruptor(.5)
        ],
        layer_samplers=[BinomialSampler(), None,
                        MultinomialSampler()],
        tied=False)

    # average over costs rather than summing
    _rcost = MeanBinaryCrossEntropy()
    reconstruction_cost = lambda a, b: _rcost.cost(a, b) / ds.X.shape[1]

    _ccost = MeanBinaryCrossEntropy()
    classification_cost = lambda a, b: _ccost.cost(a, b) / ds.y.shape[1]

    # combine costs into GSNCost object
    c = GSNCost(
        [
            # reconstruction on layer 0 with weight 1.0
            (0, 1.0, reconstruction_cost),

            # classification on layer 2 with weight 2.0
            (2, 2.0, classification_cost)
        ],
        walkback=WALKBACK,
        mode="supervised")

    alg = SGD(
        LEARNING_RATE,
        init_momentum=MOMENTUM,
        cost=c,
        termination_criterion=EpochCounter(MAX_EPOCHS),
        batches_per_iter=BATCHES_PER_EPOCH,
        batch_size=BATCH_SIZE,
        monitoring_dataset=ds,
        monitoring_batches=10,
    )

    trainer = Train(ds,
                    gsn,
                    algorithm=alg,
                    save_path="gsn_sup_example.pkl",
                    save_freq=10,
                    extensions=[MonitorBasedLRAdjuster()])
    trainer.main_loop()
    print("done training")
Beispiel #21
0
def test_sgd_unspec_num_mon_batch():

    # tests that if you don't specify a number of
    # monitoring batches, SGD configures the monitor
    # to run on all the data

    m = 25

    visited = [False] * m
    rng = np.random.RandomState([25, 9, 2012])
    X = np.zeros((m, 1))
    X[:, 0] = np.arange(m)
    dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(1)

    learning_rate = 1e-3
    batch_size = 5

    cost = DummyCost()

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=5,
                    monitoring_batches=None,
                    monitoring_dataset=dataset,
                    termination_criterion=None,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    monitor = Monitor.get_monitor(model)

    X = T.matrix()

    def tracker(*data):
        X, = data
        assert X.shape[1] == 1
        for i in xrange(X.shape[0]):
            visited[int(X[i, 0])] = True

    monitor.add_channel(name='tracker',
                        ipt=X,
                        val=0.,
                        prereqs=[tracker],
                        data_specs=(model.get_input_space(),
                                    model.get_input_source()))

    monitor()

    if False in visited:
        print visited
        assert False
Beispiel #22
0
def testing_multiple_datasets_with_specified_dataset_in_monitor_based_lr():
    # tests that the class MonitorBasedLRAdjuster in sgd.py can properly use
    # the spcified dataset_name in the constructor when multiple datasets
    # exist.

    dim = 3
    m = 10

    rng = np.random.RandomState([06, 02, 2014])

    X = rng.randn(m, dim)
    Y = rng.randn(m, dim)

    learning_rate = 1e-2
    batch_size = 5

    # We need to include this so the test actually stops running at some point
    epoch_num = 1

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_train = DenseDesignMatrix(X=X)
    monitoring_test = DenseDesignMatrix(X=Y)

    cost = DummyCost()

    model = SoftmaxModel(dim)

    dataset = DenseDesignMatrix(X=X)

    termination_criterion = EpochCounter(epoch_num)

    monitoring_dataset = {'train': monitoring_train, 'test': monitoring_test}

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=batch_size,
                    monitoring_batches=2,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    dataset_name = monitoring_dataset.keys()[0]
    monitor_lr = MonitorBasedLRAdjuster(dataset_name=dataset_name)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=[monitor_lr])

    train.main_loop()
Beispiel #23
0
def test_adagrad():
    """
    Make sure that learning_rule.AdaGrad obtains the same parameter values as
    with a hand-crafted AdaGrad implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "Adaptive subgradient methods for online learning and
    stochastic optimization", Duchi J, Hazan E, Singer Y.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    def adagrad_manual(model, state):
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['sg2'] += param_val**2
            dx_t = -(scale * learning_rate / np.sqrt(pstate['sg2']) *
                     param_val)
            rval += [param_val + dx_t]
        return rval

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Beispiel #24
0
	def set_training_criteria(self, 
							learning_rate=0.05, 
							cost=MeanSquaredReconstructionError(), 
							batch_size=10, 
							max_epochs=10):
		
		self.training_alg = SGD(learning_rate = learning_rate, 
								cost = cost, 
								batch_size = batch_size, 
								monitoring_dataset = self.datasets, 
								termination_criterion = EpochCounter(max_epochs))
Beispiel #25
0
    def set_training_criteria(self,
                              learning_rate=0.05,
                              cost=Default(),
                              batch_size=10,
                              max_epochs=10):

        self.training_alg = SGD(learning_rate=learning_rate,
                                cost=cost,
                                batch_size=batch_size,
                                monitoring_dataset=self.datasets,
                                termination_criterion=EpochCounter(max_epochs))
Beispiel #26
0
def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """

    cost = SumOfParams()

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              init_momentum=momentum,
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
Beispiel #27
0
    def __init__(self,
                 runner,
                 model_params,
                 resume=False,
                 resume_data=None,
                 s3_data=None,
                 **kwargs):
        self.model_params = model_params
        self.out_nonlin = runner.model['out_nonlin']
        if self.out_nonlin == 'LINEARGAUSSIAN':
            outputs_num = None
            cost = None
        else:
            outputs_num = runner.dp.uniq_outputs_num
            cost = self.get_cost_fn()
        dataset = self.construct_datasets(runner.dp.train_set_x,
                                          runner.dp.train_set_y, outputs_num)
        valid_dataset = self.construct_datasets(runner.dp.test_set_x,
                                                runner.dp.test_set_y,
                                                outputs_num)
        if resume:
            model = self.resume_model(model_params, resume_data)
            lr_init = model_params['learning_rate']['init'] / (
                model_params['learning_rate']['decay_factor']**
                model.monitor.get_batches_seen())
        else:
            model = self.new_model(model_params, dataset=dataset)
            lr_init = model_params['learning_rate']['init']

        batches_per_iter = get_batches_per_iter(model_params, dataset)
        termination_criterion = MaxEpochNumber(model_params['maxnum_iter'])
        update_callbacks, extensions = construct_update(
            model_params, resume, resume_data)
        algorithm = SGD(learning_rate=lr_init,
                        init_momentum=model_params['momentum']['init'],
                        monitoring_dataset={
                            'valid': valid_dataset,
                            'train': dataset
                        },
                        cost=cost,
                        termination_criterion=termination_criterion,
                        update_callbacks=update_callbacks,
                        batches_per_iter=batches_per_iter)
        self.train_obj = Train(dataset=dataset,
                               model=model,
                               algorithm=algorithm,
                               extensions=extensions)
        ext = MLPStatReporter(model,
                              runner,
                              resume=resume,
                              resume_data=resume_data,
                              save_freq=model_params['save_freq'])
        self.train_obj.extensions.append(ext)
Beispiel #28
0
def model2():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = MNIST(which_set='train', one_hot=True)
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    test_set = MNIST(which_set='test', one_hot=True)

    # =====<Create the MLP Model>=====

    h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5)
    #print h1_layer.get_params()
    h2_layer = RectifiedLinear(layer_name='h2',
                               dim=1000,
                               sparse_init=15,
                               max_col_norm=1)
    y_layer = Softmax(layer_name='y',
                      n_classes=train_set.y.shape[1],
                      irange=0.5)

    mlp = MLP(batch_size=100,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h1_layer, h2_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(batch_size=100,
              init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={
                  'valid': train_set,
                  'test': test_set
              },
              cost=SumOfCosts(costs=[
                  MethodCost('cost_from_X'),
                  WeightDecay(coeffs=[0.00005, 0.00005, 0.00005])
              ]),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.0001, N=5))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)]

    # =====<Create Training Object>=====
    save_path = './mlp_model2.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=0)
    #train_obj.setup_extensions()

    train_obj.main_loop()
Beispiel #29
0
def test_rmsprop():
    """
    Make sure that learning_rule.RMSProp obtains the same parameter values as
    with a hand-crafted RMSProp implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    decay = 0.90
    max_scaling = 1e5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=RMSProp(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)

    def rmsprop_manual(model, state):
        inc = []
        rval = []
        epsilon = 1. / max_scaling
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin rmsprop
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.maximum(np.sqrt(pstate['g2']), epsilon)
            dx_t = -scale * learning_rate / rms_g_t * param_val
            rval += [param_val + dx_t]
        return rval

    manual = rmsprop_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Beispiel #30
0
def model3():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = SVHN_On_Memory(which_set='train')
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    test_set = SVHN_On_Memory(which_set='test')

    # =====<Create the MLP Model>=====

    h1_layer = NoisyRELU(layer_name='h1',
                         dim=2000,
                         threshold=5,
                         sparse_init=15,
                         max_col_norm=1)
    #print h1_layer.get_params()
    #h2_layer = NoisyRELU(layer_name='h2', dim=100, threshold=15, sparse_init=15, max_col_norm=1)

    y_layer = Softmax(layer_name='y',
                      n_classes=train_set.y.shape[1],
                      irange=0.5)

    mlp = MLP(batch_size=64,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h1_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(batch_size=64,
              init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={
                  'valid': train_set,
                  'test': test_set
              },
              cost=MethodCost('cost_from_X'),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.001, N=50))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.9)]

    # =====<Create Training Object>=====
    save_path = './mlp_model.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=10)
    #train_obj.setup_extensions()

    train_obj.main_loop()