Exemple #1
0
def test_lr_scalers():
    """
    Tests that SGD respects Model.get_lr_scalers
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            super(ModelWithScalers, self).__init__()
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def __call__(self, X):
            # Implemented only so that DummyCost would work
            return X

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(.0),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
def test_adadelta():
    """
    Make sure that learning_rule.AdaDelta obtains the same parameter values as
    with a hand-crafted AdaDelta implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "AdaDelta: An Adaptive Learning Rate Method", Matthew D. Zeiler.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    decay = 0.95

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaDelta(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)
        state[param]['dx2'] = np.zeros(param_shape)

    def adadelta_manual(model, state):
        inc = []
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.sqrt(pstate['g2'] + scale * learning_rate)
            rms_dx_tm1 = np.sqrt(pstate['dx2'] + scale * learning_rate)
            dx_t = -rms_dx_tm1 / rms_g_t * param_val
            pstate['dx2'] = decay * pstate['dx2'] + (1 - decay) * dx_t**2
            rval += [param_val + dx_t]
        return rval

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Exemple #3
0
def test_adagrad():
    """
    Make sure that learning_rule.AdaGrad obtains the same parameter values as
    with a hand-crafted AdaGrad implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "Adaptive subgradient methods for online learning and
    stochastic optimization", Duchi J, Hazan E, Singer Y.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    def adagrad_manual(model, state):
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['sg2'] += param_val**2
            dx_t = -(scale * learning_rate / np.sqrt(pstate['sg2']) *
                     param_val)
            rval += [param_val + dx_t]
        return rval

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Exemple #4
0
    def get_costs(self, cost_array):
        costs = []
        for cost_id in cost_array:
            costs.extend(self.get_cost(cost_id))

        if len(costs) > 1:
            cost = SumOfCosts(costs)
        else:
            cost = costs[0]

        return cost
def test_rmsprop():
    """
    Make sure that learning_rule.RMSProp obtains the same parameter values as
    with a hand-crafted RMSProp implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    decay = 0.90
    max_scaling = 1e5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=RMSProp(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)

    def rmsprop_manual(model, state):
        inc = []
        rval = []
        epsilon = 1. / max_scaling
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin rmsprop
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.maximum(np.sqrt(pstate['g2']), epsilon)
            dx_t = -scale * learning_rate / rms_g_t * param_val
            rval += [param_val + dx_t]
        return rval

    manual = rmsprop_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
def model2():
    #pdb.set_trace()
    # train set X has dim (60,000, 784), y has dim (60,000, 10)
    train_set = MNIST(which_set='train', one_hot=True)
    # test set X has dim (10,000, 784), y has dim (10,000, 10)
    test_set = MNIST(which_set='test', one_hot=True)

    # =====<Create the MLP Model>=====

    h1_layer = RectifiedLinear(layer_name='h1', dim=1000, irange=0.5)
    #print h1_layer.get_params()
    h2_layer = RectifiedLinear(layer_name='h2',
                               dim=1000,
                               sparse_init=15,
                               max_col_norm=1)
    y_layer = Softmax(layer_name='y',
                      n_classes=train_set.y.shape[1],
                      irange=0.5)

    mlp = MLP(batch_size=100,
              input_space=VectorSpace(dim=train_set.X.shape[1]),
              layers=[h1_layer, h2_layer, y_layer])

    # =====<Create the SGD algorithm>=====
    sgd = SGD(batch_size=100,
              init_momentum=0.1,
              learning_rate=0.01,
              monitoring_dataset={
                  'valid': train_set,
                  'test': test_set
              },
              cost=SumOfCosts(costs=[
                  MethodCost('cost_from_X'),
                  WeightDecay(coeffs=[0.00005, 0.00005, 0.00005])
              ]),
              termination_criterion=MonitorBased(
                  channel_name='valid_y_misclass', prop_decrease=0.0001, N=5))
    #sgd.setup(model=mlp, dataset=train_set)

    # =====<Extensions>=====
    ext = [MomentumAdjustor(start=1, saturate=10, final_momentum=0.99)]

    # =====<Create Training Object>=====
    save_path = './mlp_model2.pkl'
    train_obj = Train(dataset=train_set,
                      model=mlp,
                      algorithm=sgd,
                      extensions=ext,
                      save_path=save_path,
                      save_freq=0)
    #train_obj.setup_extensions()

    train_obj.main_loop()
def test_nesterov_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum, nesterov_momentum=True),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    vel = [-learning_rate * scale for scale in scales]
    updates = [
        -learning_rate * scale + v * momentum
        for scale, v in izip(scales, vel)
    ]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    vel = [
        -learning_rate * scale + i * momentum
        for scale, i in izip(scales, vel)
    ]
    updates = [
        -learning_rate * scale + v * momentum
        for scale, v in izip(scales, vel)
    ]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Exemple #8
0
def test_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
Exemple #9
0
def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              init_momentum=momentum,
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
def prepare_adagrad_test(dataset_type='arange', model_type='random'):
    """
    Factor out common code for AdaGrad tests.
    Parameters
    ----------
    dataset_type : string, optional
        Can use either `arange` to use an ArangeDataset instance or
        `zeros` to create an all-zeros DenseDesignMatrix.
    model_type : string, optional
        How to initialize the model; `random` will initialize parameters
        to random values, `zeros` to zero.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales, init_type=model_type)
    if dataset_type == 'arange':
        dataset = ArangeDataset(1)
    elif dataset_type == 'zeros':
        X = np.zeros((1, 1))
        X[:, 0] = np.arange(1)
        dataset = DenseDesignMatrix(X)
    else:
        raise ValueError('Unknown value for dataset_type: %s', dataset_type)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    return (cost, model, dataset, sgd, state)
Exemple #11
0
def test_correctness():
    """
    Test that the cost function works with float64
    """
    x_train, y_train, x_valid, y_valid = create_dataset()

    trainset = DenseDesignMatrix(X=np.array(x_train), y=y_train)
    validset = DenseDesignMatrix(X=np.array(x_valid), y=y_valid)

    n_inputs = trainset.X.shape[1]
    n_outputs = 1
    n_hidden = 10

    hidden_istdev = 4 * (6 / float(n_inputs + n_hidden)) ** 0.5
    output_istdev = 4 * (6 / float(n_hidden + n_outputs)) ** 0.5

    model = MLP(layers=[Sigmoid(dim=n_hidden, layer_name='hidden',
                                istdev=hidden_istdev),
                        Sigmoid(dim=n_outputs, layer_name='output',
                                istdev=output_istdev)],
                nvis=n_inputs, seed=[2013, 9, 16])

    termination_criterion = And([EpochCounter(max_epochs=1),
                                 MonitorBased(prop_decrease=1e-7,
                                 N=2)])

    cost = SumOfCosts([(0.99, Default()),
                       (0.01, L1WeightDecay({}))])

    algo = SGD(1e-1,
               update_callbacks=[ExponentialDecay(decay_factor=1.00001,
                                 min_lr=1e-10)],
               cost=cost,
               monitoring_dataset=validset,
               termination_criterion=termination_criterion,
               monitor_iteration_mode='even_shuffled_sequential',
               batch_size=2)

    train = Train(model=model, dataset=trainset, algorithm=algo)
    train.main_loop()
Exemple #12
0
def test_fixed_vars():
    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' expr and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches * batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval = DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = ""  # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):
        def expr(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X = data
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(
                self, model, data, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, data, **kwargs):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}
            rval.data_specs = data_specs

            # The input to function should be a flat, non-redundent tuple
            mapping = DataSpecsMapping(data_specs)
            data_tuple = mapping.flatten(data, return_tuple=True)
            theano_func = function(data_tuple,
                                   updates=[(unsup_counter, unsup_counter + 1)
                                            ])
            # the on_load_batch function will take numerical data formatted
            # as rval.data_specs, so we have to flatten it inside the
            # returned function too.
            # Using default argument binds the variables used in the lambda
            # function to the value they have when the lambda is defined.
            on_load = (lambda batch, mapping=mapping, theano_func=theano_func:
                       theano_func(*mapping.flatten(batch, return_tuple=True)))
            rval.on_load_batch = [on_load]

            return rval

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def expr(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X, Y = data
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars,
                         self).get_gradients(model=model,
                                             data=data,
                                             sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, data):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}
            rval.data_specs = data_specs

            # data has to be flattened into a tuple before being passed
            # to `function`.
            mapping = DataSpecsMapping(data_specs)
            flat_data = mapping.flatten(data, return_tuple=True)
            theano_func = function(flat_data,
                                   updates=[(sup_counter, sup_counter + 1)])
            # the on_load_batch function will take numerical data formatted
            # as rval.data_specs, so we have to flatten it inside the
            # returned function too.
            # Using default argument binds the variables used in the lambda
            # function to the value they have when the lambda is defined.
            on_load = (lambda batch, mapping=mapping, theano_func=theano_func:
                       theano_func(*mapping.flatten(batch, return_tuple=True)))
            rval.on_load_batch = [on_load]
            return rval

        def get_data_specs(self, model):
            space = CompositeSpace(
                (model.get_input_space(), model.get_output_space()))
            source = (model.get_input_source(), model.get_target_source())
            return (space, source)

    cost = SumOfCosts(
        costs=[UnsupervisedCostWithFixedVars(),
               SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost,
                    batch_size=batch_size,
                    conjugate=1,
                    line_search_mode='exhaustive',
                    updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #13
0
def test_fixed_vars():
    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' expr and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """
    """
    Notes: this test is fairly messy. PL made some change to how
    FixedVarDescr worked. FixedVarDescr got an added data_specs
    field. But BGD itself was never changed to obey this data_specs.
    Somehow these tests passed regardless. It looks like PL just built
    a lot of machinery into the test itself to make the individual
    callbacks reformat data internally. This mechanism required the
    data_specs field to be present. Weirdly, the theano functions
    never actually used any of the data, so their data_specs should
    have just been NullSpace anyway. IG deleted a lot of this useless
    code from these tests but there is still a lot of weird stuff here
    that he has not attempted to clean up.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches * batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval = DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = ""  # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):
        def expr(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X = data
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(
                self, model, data, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, data, **kwargs):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}

            # The input to function should be a flat, non-redundent tuple
            mapping = DataSpecsMapping(data_specs)
            data_tuple = mapping.flatten(data, return_tuple=True)
            theano_func = function([],
                                   updates=[(unsup_counter, unsup_counter + 1)
                                            ])

            def on_load(batch, mapping=mapping, theano_func=theano_func):
                return theano_func()

            rval.on_load_batch = [on_load]

            return rval

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def expr(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X, Y = data
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars,
                         self).get_gradients(model=model,
                                             data=data,
                                             sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, data):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}

            theano_func = function([],
                                   updates=[(sup_counter, sup_counter + 1)])

            def on_load(data):
                theano_func()

            rval.on_load_batch = [on_load]
            return rval

        def get_data_specs(self, model):
            space = CompositeSpace(
                (model.get_input_space(), model.get_output_space()))
            source = (model.get_input_source(), model.get_target_source())
            return (space, source)

    cost = SumOfCosts(
        costs=[UnsupervisedCostWithFixedVars(),
               SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost,
                    batch_size=batch_size,
                    conjugate=1,
                    line_search_mode='exhaustive',
                    updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #14
0
def test_fixed_vars():
    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' __call__ and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches * batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval = DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = ""  # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):
        def __call__(self, model, X, Y=None, unsup_aux_var=None, **kwargs):
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self,
                          model,
                          X,
                          Y=None,
                          unsup_aux_var=None,
                          **kwargs):
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(
                self, model, X, Y, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, X, Y, **kwargs):
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}
            Y = T.matrix()
            theano_func = function([X, Y],
                                   updates=[(unsup_counter, unsup_counter + 1)
                                            ])
            rval.on_load_batch = [theano_func]

            return rval

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def __call__(self, model, X, Y=None, sup_aux_var=None, **kwargs):
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, X, Y=None, sup_aux_var=None, **kwargs):
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars,
                         self).get_gradients(model=model,
                                             X=X,
                                             Y=Y,
                                             sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, X, Y=None):
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}
            rval.on_load_batch = [
                function([X, Y], updates=[(sup_counter, sup_counter + 1)])
            ]
            return rval

    cost = SumOfCosts(
        costs=[UnsupervisedCostWithFixedVars(),
               SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost,
                    batch_size=batch_size,
                    conjugate=1,
                    line_search_mode='exhaustive',
                    updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #15
0
def supervisedLayerwisePRL(trainset, testset):
    '''
	The supervised layerwise training as used in the PRL Paper.
	
	Input
	------
	trainset : A path to an hdf5 file created through h5py.
	testset  : A path to an hdf5 file created through h5py.
	'''
    batch_size = 100

    # Both train and test h5py files are expected to have a 'topo_view' and 'y'
    # datasets side them corresponding to the 'b01c' data format as used in pylearn2
    # and 'y' equivalent to the one hot encoded labels
    trn = HDF5Dataset(filename=trainset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    tst = HDF5Dataset(filename=testset,
                      topo_view='topo_view',
                      y='y',
                      load_all=False)
    '''
	The 1st Convolution and Pooling Layers are added below.
	'''
    h1 = mlp.ConvRectifiedLinear(layer_name='h1',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    layers = [h1, fc, output]

    mdl = mlp.MLP(layers,
                  input_space=Conv2DSpace(shape=(70, 70), num_channels=1))

    trainer = sgd.SGD(
        learning_rate=0.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(
            costs=[Default(),
                   WeightDecay(coeffs=[0.0005, 0.0005, 0.0005])]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best1.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best1.pkl')
    mdl = push_monitor(mdl, 'k')
    '''
	The 2nd Convolution and Pooling Layers are added below.
	'''
    h2 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='fc', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=171,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h2, fc, output])

    trainer = sgd.SGD(learning_rate=0.002,
                      batch_size=batch_size,
                      learning_rule=learning_rule.RMSProp(),
                      cost=SumOfCosts(costs=[
                          Default(),
                          WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005])
                      ]),
                      train_iteration_mode='shuffled_sequential',
                      monitor_iteration_mode='sequential',
                      termination_criterion=EpochCounter(max_epochs=15),
                      monitoring_dataset={
                          'test': tst,
                          'valid': vld
                      })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best2.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()

    del mdl
    mdl = serial.load('./Saved Models/conv_supervised_layerwise_best2.pkl')
    mdl = push_monitor(mdl, 'l')
    '''
	The 3rd Convolution and Pooling Layers are added below.
	'''
    h3 = mlp.ConvRectifiedLinear(layer_name='h2',
                                 output_channels=64,
                                 irange=0.05,
                                 kernel_shape=[4, 4],
                                 pool_shape=[4, 4],
                                 pool_stride=[2, 2],
                                 max_kernel_norm=1.9365)

    fc = mlp.RectifiedLinear(layer_name='h3', dim=1500, irange=0.05)
    output = mlp.Softmax(layer_name='y',
                         n_classes=10,
                         irange=.005,
                         max_col_norm=1.9365)

    del mdl.layers[-1]
    mdl.layer_names.remove('y')
    del mdl.layers[-1]
    mdl.layer_names.remove('fc')
    mdl.add_layers([h3, output])

    trainer = sgd.SGD(
        learning_rate=.002,
        batch_size=batch_size,
        learning_rule=learning_rule.RMSProp(),
        cost=SumOfCosts(costs=[
            Default(),
            WeightDecay(coeffs=[0.0005, 0.0005, 0.0005, 0.0005, 0.0005])
        ]),
        train_iteration_mode='shuffled_sequential',
        monitor_iteration_mode='sequential',
        termination_criterion=EpochCounter(max_epochs=15),
        monitoring_dataset={
            'test': tst,
            'valid': vld
        })

    watcher = best_params.MonitorBasedSaveBest(
        channel_name='valid_y_misclass',
        save_path='./Saved Models/conv_supervised_layerwise_best3.pkl')

    decay = sgd.LinearDecayOverEpoch(start=8, saturate=15, decay_factor=0.1)

    experiment = Train(
        dataset=trn,
        model=mdl,
        algorithm=trainer,
        extensions=[watcher, decay],
    )

    experiment.main_loop()
Exemple #16
0
 
y = Softmax(n_classes = 2,
            layer_name = "y",
            irange = 0.1)

inputSpace = Conv2DSpace(shape = [cropSize,cropSize],
                         num_channels = 3)
  
model = MLP(layers = [h0, h1, y], 
            batch_size = batchSize, 
            input_space = inputSpace)
 
algorithm = SGD(learning_rate = 1E-3, 
                cost = SumOfCosts([
                                   MethodCost("cost_from_X"),
                                   Dropout(default_input_include_prob = 0.25,
                                           default_input_scale = 1.3333)
                                  ]), 
                batch_size = batchSize, 
                monitoring_batch_size = batchSize,
                monitoring_dataset = {'train': train,
                                      'valid':valid}, 
                monitor_iteration_mode = "even_batchwise_shuffled_sequential", 
                termination_criterion = EpochCounter(max_epochs = 200),
                learning_rule = Momentum(init_momentum = 0.0),
                train_iteration_mode = "even_batchwise_shuffled_sequential")
     
train = Train(dataset = train, 
              model = model, 
              algorithm = algorithm, 
              save_path = "ConvNet8.pkl",