Example #1
0
def test_arangedataset():
    """
    This test will verify if ArangeDataset can be used with preprocessors
    """
    preprocessor = RemoveMean()
    dataset = ArangeDataset(1000, preprocessor=preprocessor,
                            fit_preprocessor=True)
    dataset_no_preprocessing = ArangeDataset(1000)
    assert (dataset.get_data() !=
            dataset_no_preprocessing.get_data()).any()
Example #2
0
def test_adadelta():
    """
    Make sure that learning_rule.AdaDelta obtains the same parameter values as
    with a hand-crafted AdaDelta implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "AdaDelta: An Adaptive Learning Rate Method", Matthew D. Zeiler.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    decay = 0.95

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaDelta(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)
        state[param]['dx2'] = np.zeros(param_shape)

    def adadelta_manual(model, state):
        inc = []
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.sqrt(pstate['g2'] + scale * learning_rate)
            rms_dx_tm1 = np.sqrt(pstate['dx2'] + scale * learning_rate)
            dx_t = -rms_dx_tm1 / rms_g_t * param_val
            pstate['dx2'] = decay * pstate['dx2'] + (1 - decay) * dx_t**2
            rval += [param_val + dx_t]
        return rval

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adadelta_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Example #3
0
def test_lr_scalers():
    """
    Tests that SGD respects Model.get_lr_scalers
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            super(ModelWithScalers, self).__init__()
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def __call__(self, X):
            # Implemented only so that DummyCost would work
            return X

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(.0),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale for param, scale in zip(manual, scales)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
Example #4
0
def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """

    cost = SumOfParams()

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    learning_rate = .001

    class ModelWithScalers(Model):
        def __init__(self):
            self._params = [sharedX(np.zeros(shape)) for shape in shapes]
            self.input_space = VectorSpace(1)

        def get_lr_scalers(self):
            return dict(zip(self._params, scales))

    model = ModelWithScalers()

    dataset = ArangeDataset(1)

    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              init_momentum=momentum,
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
Example #5
0
def test_adagrad():
    """
    Make sure that learning_rule.AdaGrad obtains the same parameter values as
    with a hand-crafted AdaGrad implementation, given a dummy model and
    learning rate scaler for each parameter.

    Reference:
    "Adaptive subgradient methods for online learning and
    stochastic optimization", Duchi J, Hazan E, Singer Y.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    def adagrad_manual(model, state):
        rval = []
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin adadelta
            pstate['sg2'] += param_val**2
            dx_t = -(scale * learning_rate / np.sqrt(pstate['sg2']) *
                     param_val)
            rval += [param_val + dx_t]
        return rval

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    manual = adagrad_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Example #6
0
def test_rmsprop():
    """
    Make sure that learning_rule.RMSProp obtains the same parameter values as
    with a hand-crafted RMSProp implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    decay = 0.90
    max_scaling = 1e5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=RMSProp(decay),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['g2'] = np.zeros(param_shape)

    def rmsprop_manual(model, state):
        inc = []
        rval = []
        epsilon = 1. / max_scaling
        for scale, param in izip(scales, model.get_params()):
            pstate = state[param]
            param_val = param.get_value()
            # begin rmsprop
            pstate['g2'] = decay * pstate['g2'] + (1 - decay) * param_val**2
            rms_g_t = np.maximum(np.sqrt(pstate['g2']), epsilon)
            dx_t = -scale * learning_rate / rms_g_t * param_val
            rval += [param_val + dx_t]
        return rval

    manual = rmsprop_manual(model, state)
    sgd.train(dataset=dataset)
    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Example #7
0
def test_nesterov_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """

    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum, nesterov_momentum=True),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    vel = [-learning_rate * scale for scale in scales]
    updates = [
        -learning_rate * scale + v * momentum
        for scale, v in izip(scales, vel)
    ]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))

    vel = [
        -learning_rate * scale + i * momentum
        for scale, i in izip(scales, vel)
    ]
    updates = [
        -learning_rate * scale + v * momentum
        for scale, v in izip(scales, vel)
    ]
    manual = [param + update for param, update in izip(manual, updates)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in izip(manual, model.get_params()))
Example #8
0
def test_sgd_sequential():

    # tests that requesting train_iteration_mode = 'sequential'
    # works

    dim = 1
    batch_size = 3
    m = 5 * batch_size

    dataset = ArangeDataset(m)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    visited = [False] * m

    def visit(X):
        assert X.shape[1] == 1
        assert np.all(X[1:] == X[0:-1] + 1)
        start = int(X[0, 0])
        if start > 0:
            assert visited[start - 1]
        for i in xrange(batch_size):
            assert not visited[start + i]
            visited[start + i] = 1

    data_specs = (model.get_input_space(), model.get_input_source())
    cost = CallbackCost(visit, data_specs)

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = SGD(learning_rate,
                    cost,
                    batch_size=5,
                    train_iteration_mode='sequential',
                    monitoring_dataset=None,
                    termination_criterion=termination_criterion,
                    update_callbacks=None,
                    init_momentum=None,
                    set_batch_size=False)

    algorithm.setup(dataset=dataset, model=model)

    algorithm.train(dataset)

    assert all(visited)
Example #9
0
def test_arangedataset():
    """
    This test will verify if ArangeDataset can be used with preprocessors
    """
    preprocessor = RemoveMean()
    dataset = ArangeDataset(1000,
                            preprocessor=preprocessor,
                            fit_preprocessor=True)
    dataset_no_preprocessing = ArangeDataset(1000)
    assert (dataset.get_data() != dataset_no_preprocessing.get_data()).any()
Example #10
0
def test_momentum():
    """
    Make sure that learning_rule.Momentum obtains the same parameter values as
    with a hand-crafted sgd w/ momentum implementation, given a dummy model and
    learning rate scaler for each parameter.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=Momentum(momentum),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
Example #11
0
def test_lr_scalers_momentum():
    """
    Tests that SGD respects Model.get_lr_scalers when using
    momentum.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfParams(), (0., DummyCost())])

    scales = [.01, .02, .05, 1., 5.]
    shapes = [(1, ), (9, ), (8, 7), (6, 5, 4), (3, 2, 2, 2)]

    model = DummyModel(shapes, lr_scalers=scales)
    dataset = ArangeDataset(1)
    learning_rate = .001
    momentum = 0.5

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              init_momentum=momentum,
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    manual = [param.get_value() for param in model.get_params()]
    inc = [-learning_rate * scale for param, scale in zip(manual, scales)]
    manual = [param + i for param, i in zip(manual, inc)]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))

    manual = [
        param - learning_rate * scale + i * momentum
        for param, scale, i in zip(manual, scales, inc)
    ]

    sgd.train(dataset=dataset)

    assert all(
        np.allclose(manual_param, sgd_param.get_value())
        for manual_param, sgd_param in zip(manual, model.get_params()))
Example #12
0
def prepare_adagrad_test(dataset_type='arange', model_type='random'):
    """
    Factor out common code for AdaGrad tests.
    Parameters
    ----------
    dataset_type : string, optional
        Can use either `arange` to use an ArangeDataset instance or
        `zeros` to create an all-zeros DenseDesignMatrix.
    model_type : string, optional
        How to initialize the model; `random` will initialize parameters
        to random values, `zeros` to zero.
    """
    # We include a cost other than SumOfParams so that data is actually
    # queried from the training set, and the expected number of updates
    # are applied.
    cost = SumOfCosts([SumOfOneHalfParamsSquared(), (0., DummyCost())])
    model = DummyModel(shapes, lr_scalers=scales, init_type=model_type)
    if dataset_type == 'arange':
        dataset = ArangeDataset(1)
    elif dataset_type == 'zeros':
        X = np.zeros((1, 1))
        X[:, 0] = np.arange(1)
        dataset = DenseDesignMatrix(X)
    else:
        raise ValueError('Unknown value for dataset_type: %s', dataset_type)

    sgd = SGD(cost=cost,
              learning_rate=learning_rate,
              learning_rule=AdaGrad(),
              batch_size=1)

    sgd.setup(model=model, dataset=dataset)

    state = {}
    for param in model.get_params():
        param_shape = param.get_value().shape
        state[param] = {}
        state[param]['sg2'] = np.zeros(param_shape)

    return (cost, model, dataset, sgd, state)
Example #13
0
def test_determinism():

    # Verifies that running SGD twice results in the same examples getting visited
    # in the same order

    for mode in _iteration_schemes:
        dim = 1
        batch_size = 3
        num_batches = 5
        m = num_batches * batch_size

        dataset = ArangeDataset(m)

        model = SoftmaxModel(dim)

        learning_rate = 1e-3
        batch_size = 5

        visited = [[-1] * m]

        def visit(X):
            mx = max(visited[0])
            counter = mx + 1
            for i in X[:, 0]:
                i = int(i)
                assert visited[0][i] == -1
                visited[0][i] = counter
                counter += 1

        data_specs = (model.get_input_space(), model.get_input_source())
        cost = CallbackCost(visit, data_specs)

        # We need to include this so the test actually stops running at some point
        termination_criterion = EpochCounter(5)

        def run_algorithm():
            unsupported_modes = ['random_slice', 'random_uniform']
            algorithm = SGD(learning_rate,
                            cost,
                            batch_size=5,
                            train_iteration_mode=mode,
                            monitoring_dataset=None,
                            termination_criterion=termination_criterion,
                            update_callbacks=None,
                            init_momentum=None,
                            set_batch_size=False)

            algorithm.setup(dataset=dataset, model=model)

            raised = False
            try:
                algorithm.train(dataset)
            except ValueError:
                print mode
                assert mode in unsupported_modes
                raised = True
            if mode in unsupported_modes:
                assert raised
                return True
            return False

        if run_algorithm():
            continue

        visited.insert(0, [-1] * m)

        del model.monitor

        run_algorithm()

        for v in visited:
            assert len(v) == m
            for elem in range(m):
                assert elem in v

        assert len(visited) == 2

        print visited[0]
        print visited[1]
        assert np.all(np.asarray(visited[0]) == np.asarray(visited[1]))
Example #14
0
def test_revisit():

    # Test that each call to monitor revisits exactly the same data

    BATCH_SIZE = 3
    MAX_BATCH_SIZE = 12
    BATCH_SIZE_STRIDE = 3
    NUM_BATCHES = 10
    num_examples = NUM_BATCHES * BATCH_SIZE

    monitoring_dataset = ArangeDataset(num_examples)

    for mon_batch_size in xrange(BATCH_SIZE, MAX_BATCH_SIZE + 1,
            BATCH_SIZE_STRIDE):
        for num_mon_batches in [ 1, 3, num_examples / mon_batch_size, None ]:
            for mode in sorted(_iteration_schemes):

                if num_mon_batches is None and mode in ['random_uniform', 'random_slice']:
                    continue

                model = DummyModel(1)
                monitor = Monitor.get_monitor(model)

                try:
                    monitor.add_dataset(monitoring_dataset, mode,
                        batch_size=mon_batch_size, num_batches=num_mon_batches)
                except TypeError:
                    monitor.add_dataset(monitoring_dataset, mode,
                        batch_size=mon_batch_size, num_batches=num_mon_batches,
                        seed = 0)

                if num_mon_batches is None:
                    num_mon_batches = int(np.ceil(float(num_examples) / float(mon_batch_size)))

                batches = [ None ] * num_mon_batches
                visited = [ False ] * num_mon_batches

                batch_idx = shared(0)

                class RecorderAndValidator(object):

                    def __init__(self):
                        self.validate = False

                    def __call__(self, *data):
                        """ Initially, records the batches the monitor shows it.
                        When set to validate mode, makes sure the batches shown
                        on the second monitor call match those from the first."""
                        X, = data

                        idx = batch_idx.get_value()
                        batch_idx.set_value(idx + 1)

                        # Note: if the monitor starts supporting variable batch sizes,
                        # take this out. Maybe move it to a new test that the iterator's
                        # uneven property is set accurately
                        warnings.warn("TODO: add unit test that iterators uneven property is set correctly.")
                        # assert X.shape[0] == mon_batch_size

                        if self.validate:
                            previous_batch = batches[idx]
                            assert not visited[idx]
                            visited[idx] = True
                            if not np.allclose(previous_batch, X):
                                print 'Visited different data in batch',idx
                                print previous_batch
                                print X
                                print 'Iteration mode', mode
                                assert False
                        else:
                            batches[idx] = X
                        # end if
                    # end __call__
                #end class

                prereq = RecorderAndValidator()

                monitor.add_channel(name = 'dummy',
                    ipt = model.input_space.make_theano_batch(),
                    val = 0.,
                    prereqs = [ prereq ],
                    data_specs=(model.get_input_space(),
                                model.get_input_source()))

                try:
                    monitor()
                except RuntimeError:
                    print 'monitor raised RuntimeError for iteration mode', mode
                    raise


                assert None not in batches

                batch_idx.set_value(0)
                prereq.validate = True

                monitor()

                assert all(visited)
Example #15
0
def test_revisit():

    # Test that each call to monitor revisits exactly the same data

    BATCH_SIZE = 3
    MAX_BATCH_SIZE = 12
    BATCH_SIZE_STRIDE = 3
    NUM_BATCHES = 10
    num_examples = NUM_BATCHES * BATCH_SIZE

    monitoring_dataset = ArangeDataset(num_examples)

    for mon_batch_size in xrange(BATCH_SIZE, MAX_BATCH_SIZE + 1,
            BATCH_SIZE_STRIDE):
        for num_mon_batches in [ 1, 3, num_examples / mon_batch_size, None ]:
            for mode in sorted(_iteration_schemes):

                if num_mon_batches is None and mode in ['random_uniform', 'random_slice']:
                    continue

                model = DummyModel(1)
                monitor = Monitor.get_monitor(model)

                try:
                    try:
                        monitor.add_dataset(monitoring_dataset, mode,
                            batch_size=mon_batch_size, num_batches=num_mon_batches)
                    except TypeError:
                        monitor.add_dataset(monitoring_dataset, mode,
                            batch_size=mon_batch_size, num_batches=num_mon_batches,
                            seed = 0)
                except NotImplementedError:
                    # Monitor does not currently support uneven iterators, so skip
                    # uneven iteration modes
                    # Check that this is what caused the error
                    if num_mon_batches is not None and mon_batch_size * num_mon_batches > num_examples:
                        continue
                    if num_mon_batches is None and num_examples % mon_batch_size != 0:
                        continue
                    print num_mon_batches, mon_batch_size, num_examples, mode
                    raise

                if num_mon_batches is None:
                    num_mon_batches = num_examples / mon_batch_size

                batches = [ None ] * num_mon_batches
                visited = [ False ] * num_mon_batches

                batch_idx = shared(0)

                class RecorderAndValidator:

                    def __init__(self):
                        self.validate = False

                    def __call__(self, X, y):
                        """ Initially, records the batches the monitor shows it.
                        When set to validate mode, makes sure the batches shown
                        on the second monitor call match those from the first."""
                        assert y is None

                        idx = batch_idx.get_value()
                        batch_idx.set_value(idx + 1)

                        # Note: if the monitor starts supporting variable batch sizes,
                        # take this out. Maybe move it to a new test that the iterator's
                        # uneven property is set accurately
                        assert X.shape[0] == mon_batch_size

                        if self.validate:
                            previous_batch = batches[idx]
                            assert not visited[idx]
                            visited[idx] = True
                            if not np.allclose(previous_batch, X):
                                print 'Visited different data in batch',idx
                                print previous_batch
                                print X
                                print 'Iteration mode', mode
                                assert False
                        else:
                            batches[idx] = X
                        # end if
                    # end __call__
                #end class

                prereq = RecorderAndValidator()

                monitor.add_channel(name = 'dummy',
                    ipt = model.input_space.make_theano_batch(),
                    val = 0.,
                    prereqs = [ prereq ])

                monitor()

                assert None not in batches

                batch_idx.set_value(0)
                prereq.validate = True

                monitor()

                assert all(visited)