Exemple #1
0
def test_bgd_unsup():

    # tests that we can run the bgd algorithm
    # on an supervised cost.
    # does not test for correctness at all, just
    # that the algorithm runs without dying

    dim = 3
    m = 10

    rng = np.random.RandomState([25, 9, 2012])

    X = rng.randn(m, dim)

    dataset = DenseDesignMatrix(X=X)

    m = 15
    X = rng.randn(m, dim)

    # including a monitoring datasets lets us test that
    # the monitor works with supervised data
    monitoring_dataset = DenseDesignMatrix(X=X)

    model = SoftmaxModel(dim)

    learning_rate = 1e-3
    batch_size = 5

    class DummyCost(Cost):
        def expr(self, model, data):
            self.get_data_specs(model)[0].validate(data)
            X = data
            return T.square(model(X) - X).mean()

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    cost = DummyCost()

    # We need to include this so the test actually stops running at some point
    termination_criterion = EpochCounter(5)

    algorithm = BGD(cost,
                    batch_size=5,
                    monitoring_batches=2,
                    monitoring_dataset=monitoring_dataset,
                    termination_criterion=termination_criterion)

    train = Train(dataset,
                  model,
                  algorithm,
                  save_path=None,
                  save_freq=0,
                  extensions=None)

    train.main_loop()
Exemple #2
0
 def loadAlgo(self,p_algo):
     # setup algo
     #print self.DataLoader.data
     if p_algo.algo_type==0:
         self.algo =  SGD(learning_rate = p_algo.learning_rate,
         cost = p_algo.cost,
         batch_size = p_algo.batch_size,
         monitoring_batches = p_algo.monitoring_batches,
         monitoring_dataset = p_algo.monitoring_dataset,
         monitor_iteration_mode = p_algo.monitor_iteration_mode,
         termination_criterion = p_algo.termination_criterion,
         update_callbacks = p_algo.update_callbacks,
         learning_rule = p_algo.learning_rule,
         init_momentum = p_algo.init_momentum,
         set_batch_size = p_algo.set_batch_size,
         train_iteration_mode = p_algo.train_iteration_mode,
         batches_per_iter = p_algo.batches_per_iter,
         theano_function_mode = p_algo.theano_function_mode,
         monitoring_costs = p_algo.monitoring_costs,
         seed = p_algo.seed)
     elif p_algo.algo_type==1:
             self.algo = BGD(
             cost = p_algo.cost,
             batch_size=p_algo.batch_size, 
             batches_per_iter=p_algo.batches_per_iter,
             updates_per_batch = p_algo.updates_per_batch,
             monitoring_batches=p_algo.monitoring_batches,
             monitoring_dataset=p_algo.monitoring_dataset,
             termination_criterion =p_algo.termination_criterion, 
             set_batch_size = p_algo.set_batch_size,
             reset_alpha = p_algo.reset_alpha, 
             conjugate = p_algo.conjugate,
             min_init_alpha = p_algo.min_init_alpha,
             reset_conjugate = p_algo.reset_conjugate, 
             line_search_mode = p_algo.line_search_mode,
             verbose_optimization=p_algo.verbose_optimization, 
             scale_step=p_algo.scale_step, 
             theano_function_mode=p_algo.theano_function_mode,
             init_alpha = p_algo.init_alpha, 
             seed = p_algo.seed)
     self.algo.setup(self.model, self.DataLoader.data['train'])
Exemple #3
0
def test_fixed_vars():

    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' expr and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """

    """
    Notes: this test is fairly messy. PL made some change to how
    FixedVarDescr worked. FixedVarDescr got an added data_specs
    field. But BGD itself was never changed to obey this data_specs.
    Somehow these tests passed regardless. It looks like PL just built
    a lot of machinery into the test itself to make the individual
    callbacks reformat data internally. This mechanism required the
    data_specs field to be present. Weirdly, the theano functions
    never actually used any of the data, so their data_specs should
    have just been NullSpace anyway. IG deleted a lot of this useless
    code from these tests but there is still a lot of weird stuff here
    that he has not attempted to clean up.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches*batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval =  DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = "" # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):

        def expr(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X = data
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(self, model, data,
                    unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, data, **kwargs):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}

            # The input to function should be a flat, non-redundent tuple
            mapping = DataSpecsMapping(data_specs)
            data_tuple = mapping.flatten(data, return_tuple=True)
            theano_func = function([],
                    updates=[(unsup_counter, unsup_counter + 1)])
            def on_load(batch, mapping=mapping, theano_func=theano_func):
                return theano_func()
            rval.on_load_batch = [on_load]

            return rval

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def expr(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X, Y = data
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars, self).get_gradients(
                    model=model, data=data, sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, data):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}

            theano_func = function([], updates=[(sup_counter,
                sup_counter + 1)])
            def on_load(data):
                theano_func()
            rval.on_load_batch = [on_load]
            return rval

        def get_data_specs(self, model):
            space = CompositeSpace((model.get_input_space(),
                                   model.get_output_space()))
            source = (model.get_input_source(), model.get_target_source())
            return (space, source)

    cost = SumOfCosts(costs=[UnsupervisedCostWithFixedVars(),
                             SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost, batch_size=batch_size,
            conjugate=1, line_search_mode='exhaustive',
            updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #4
0
def test_fixed_vars():
    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' expr and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches * batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval = DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = ""  # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):
        def expr(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X = data
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(
                self, model, data, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, data, **kwargs):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}
            rval.data_specs = data_specs

            # The input to function should be a flat, non-redundent tuple
            mapping = DataSpecsMapping(data_specs)
            data_tuple = mapping.flatten(data, return_tuple=True)
            theano_func = function(data_tuple,
                                   updates=[(unsup_counter, unsup_counter + 1)
                                            ])
            # the on_load_batch function will take numerical data formatted
            # as rval.data_specs, so we have to flatten it inside the
            # returned function too.
            # Using default argument binds the variables used in the lambda
            # function to the value they have when the lambda is defined.
            on_load = (lambda batch, mapping=mapping, theano_func=theano_func:
                       theano_func(*mapping.flatten(batch, return_tuple=True)))
            rval.on_load_batch = [on_load]

            return rval

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def expr(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X, Y = data
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars,
                         self).get_gradients(model=model,
                                             data=data,
                                             sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, data):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}
            rval.data_specs = data_specs

            # data has to be flattened into a tuple before being passed
            # to `function`.
            mapping = DataSpecsMapping(data_specs)
            flat_data = mapping.flatten(data, return_tuple=True)
            theano_func = function(flat_data,
                                   updates=[(sup_counter, sup_counter + 1)])
            # the on_load_batch function will take numerical data formatted
            # as rval.data_specs, so we have to flatten it inside the
            # returned function too.
            # Using default argument binds the variables used in the lambda
            # function to the value they have when the lambda is defined.
            on_load = (lambda batch, mapping=mapping, theano_func=theano_func:
                       theano_func(*mapping.flatten(batch, return_tuple=True)))
            rval.on_load_batch = [on_load]
            return rval

        def get_data_specs(self, model):
            space = CompositeSpace(
                (model.get_input_space(), model.get_output_space()))
            source = (model.get_input_source(), model.get_target_source())
            return (space, source)

    cost = SumOfCosts(
        costs=[UnsupervisedCostWithFixedVars(),
               SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost,
                    batch_size=batch_size,
                    conjugate=1,
                    line_search_mode='exhaustive',
                    updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #5
0
    def run_bgd(mode):
        # Must be seeded the same both times run_bgd is called
        disturb_mem.disturb_mem()
        rng = np.random.RandomState([2012, 11, 27, 8])

        batch_size = 5
        train_batches = 3
        valid_batches = 4
        num_features = 2

        # Synthesize dataset with a linear decision boundary
        w = rng.randn(num_features)

        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches * batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m, 1))
            y[:, 0] = np.dot(X, w) > 0.

            rval = DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = ""  # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval

        train = make_dataset(train_batches)
        valid = make_dataset(valid_batches)

        num_chunks = 10
        chunk_width = 2

        class ManyParamsModel(Model):
            """
            Make a model with lots of parameters, so that there are many
            opportunities for their updates to get accidentally re-ordered
            non-deterministically. This makes non-determinism bugs manifest
            more frequently.
            """
            def __init__(self):
                self.W1 = [
                    sharedX(rng.randn(num_features, chunk_width))
                    for i in xrange(num_chunks)
                ]
                disturb_mem.disturb_mem()
                self.W2 = [
                    sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)
                ]
                self._params = safe_union(self.W1, self.W2)
                self.input_space = VectorSpace(num_features)
                self.output_space = VectorSpace(1)

        disturb_mem.disturb_mem()
        model = ManyParamsModel()
        disturb_mem.disturb_mem()

        class LotsOfSummingCost(Cost):
            """
            Make a cost whose gradient on the parameters involves summing many terms together,
            so that T.grad is more likely to sum things in a random order.
            """

            supervised = True

            def expr(self, model, data, **kwargs):
                self.get_data_specs(model)[0].validate(data)
                X, Y = data
                disturb_mem.disturb_mem()

                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(
                    mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred - Y[:, 0]).sum()

            def get_data_specs(self, model):
                data = CompositeSpace(
                    (model.get_input_space(), model.get_output_space()))
                source = (model.get_input_source(), model.get_target_source())
                return (data, source)

        cost = LotsOfSummingCost()

        disturb_mem.disturb_mem()

        algorithm = BGD(cost=cost,
                        batch_size=batch_size,
                        updates_per_batch=5,
                        scale_step=.5,
                        conjugate=1,
                        reset_conjugate=0,
                        monitoring_dataset={
                            'train': train,
                            'valid': valid
                        },
                        termination_criterion=EpochCounter(max_epochs=5))

        disturb_mem.disturb_mem()

        train_object = Train(dataset=train,
                             model=model,
                             algorithm=algorithm,
                             save_freq=0)

        disturb_mem.disturb_mem()

        train_object.main_loop()
Exemple #6
0
def test_fixed_vars():
    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' expr and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """
    """
    Notes: this test is fairly messy. PL made some change to how
    FixedVarDescr worked. FixedVarDescr got an added data_specs
    field. But BGD itself was never changed to obey this data_specs.
    Somehow these tests passed regardless. It looks like PL just built
    a lot of machinery into the test itself to make the individual
    callbacks reformat data internally. This mechanism required the
    data_specs field to be present. Weirdly, the theano functions
    never actually used any of the data, so their data_specs should
    have just been NullSpace anyway. IG deleted a lot of this useless
    code from these tests but there is still a lot of weird stuff here
    that he has not attempted to clean up.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches * batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval = DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = ""  # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):
        def expr(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X = data
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(
                self, model, data, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, data, **kwargs):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}

            # The input to function should be a flat, non-redundent tuple
            mapping = DataSpecsMapping(data_specs)
            data_tuple = mapping.flatten(data, return_tuple=True)
            theano_func = function([],
                                   updates=[(unsup_counter, unsup_counter + 1)
                                            ])

            def on_load(batch, mapping=mapping, theano_func=theano_func):
                return theano_func()

            rval.on_load_batch = [on_load]

            return rval

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def expr(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X, Y = data
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars,
                         self).get_gradients(model=model,
                                             data=data,
                                             sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, data):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}

            theano_func = function([],
                                   updates=[(sup_counter, sup_counter + 1)])

            def on_load(data):
                theano_func()

            rval.on_load_batch = [on_load]
            return rval

        def get_data_specs(self, model):
            space = CompositeSpace(
                (model.get_input_space(), model.get_output_space()))
            source = (model.get_input_source(), model.get_target_source())
            return (space, source)

    cost = SumOfCosts(
        costs=[UnsupervisedCostWithFixedVars(),
               SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost,
                    batch_size=batch_size,
                    conjugate=1,
                    line_search_mode='exhaustive',
                    updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #7
0
def test_fixed_vars():
    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' __call__ and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches * batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval = DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = ""  # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):
        def __call__(self, model, X, Y=None, unsup_aux_var=None, **kwargs):
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self,
                          model,
                          X,
                          Y=None,
                          unsup_aux_var=None,
                          **kwargs):
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(
                self, model, X, Y, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, X, Y, **kwargs):
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}
            Y = T.matrix()
            theano_func = function([X, Y],
                                   updates=[(unsup_counter, unsup_counter + 1)
                                            ])
            rval.on_load_batch = [theano_func]

            return rval

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def __call__(self, model, X, Y=None, sup_aux_var=None, **kwargs):
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, X, Y=None, sup_aux_var=None, **kwargs):
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars,
                         self).get_gradients(model=model,
                                             X=X,
                                             Y=Y,
                                             sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, X, Y=None):
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}
            rval.on_load_batch = [
                function([X, Y], updates=[(sup_counter, sup_counter + 1)])
            ]
            return rval

    cost = SumOfCosts(
        costs=[UnsupervisedCostWithFixedVars(),
               SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost,
                    batch_size=batch_size,
                    conjugate=1,
                    line_search_mode='exhaustive',
                    updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #8
0
def test_fixed_vars():

    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' __call__ and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches*batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval =  DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = "" # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):

        def __call__(self, model, X, Y=None, unsup_aux_var=None, **kwargs):
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self, model, X, Y=None, unsup_aux_var=None, **kwargs):
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(self, model, X, Y, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, X, Y, **kwargs):
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}
            Y=T.matrix()
            theano_func = function([X, Y], updates=[(unsup_counter, unsup_counter + 1)])
            rval.on_load_batch = [theano_func]

            return rval

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def __call__(self, model, X, Y=None, sup_aux_var=None, **kwargs):
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X* Y ).sum()

        def get_gradients(self, model, X, Y=None, sup_aux_var=None, **kwargs):
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars, self).get_gradients(model=model, X=X, Y=Y, sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, X, Y=None):
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}
            rval.on_load_batch = [ function([X, Y], updates=[(sup_counter, sup_counter+1)])]
            return rval

    cost = SumOfCosts(costs=[UnsupervisedCostWithFixedVars(), SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost, batch_size=batch_size, conjugate=1, line_search_mode='exhaustive',
            updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #9
0
def test_fixed_vars():

    """
    A very basic test of the the fixed vars interface.
    Checks that the costs' __call__ and get_gradients methods
    are called with the right parameters and that the updates
    functions are called the right number of times.
    """

    rng = np.random.RandomState([2012, 11, 27, 9])

    batch_size = 5
    updates_per_batch = 4
    train_batches = 3
    num_features = 2

    # Synthesize dataset with a linear decision boundary
    w = rng.randn(num_features)

    def make_dataset(num_batches):
        m = num_batches*batch_size
        X = rng.randn(m, num_features)
        y = rng.randn(m, num_features)

        rval =  DenseDesignMatrix(X=X, y=y)

        rval.yaml_src = "" # suppress no yaml_src warning

        return rval

    train = make_dataset(train_batches)

    model = SoftmaxModel(num_features)

    unsup_counter = shared(0)
    grad_counter = shared(0)

    called = [False, False, False, False]

    class UnsupervisedCostWithFixedVars(Cost):

        def expr(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X = data
            assert unsup_aux_var is unsup_counter
            called[0] = True
            return (model.P * X).sum()

        def get_gradients(self, model, data, unsup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert unsup_aux_var is unsup_counter
            called[1] = True
            gradients, updates = Cost.get_gradients(self, model, data, unsup_aux_var=unsup_aux_var)
            updates[grad_counter] = grad_counter + 1
            return gradients, updates

        def get_fixed_var_descr(self, model, data, **kwargs):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'unsup_aux_var': unsup_counter}
            rval.data_specs = data_specs

            # The input to function should be a flat, non-redundent tuple
            mapping = DataSpecsMapping(data_specs)
            data_tuple = mapping.flatten(data, return_tuple=True)
            theano_func = function(data_tuple,
                    updates=[(unsup_counter, unsup_counter + 1)])
            # the on_load_batch function will take numerical data formatted
            # as rval.data_specs, so we have to flatten it inside the
            # returned function too.
            # Using default argument binds the variables used in the lambda
            # function to the value they have when the lambda is defined.
            on_load = (lambda batch, mapping=mapping, theano_func=theano_func:
                    theano_func(*mapping.flatten(batch, return_tuple=True)))
            rval.on_load_batch = [on_load]

            return rval

        def get_data_specs(self, model):
            return (model.get_input_space(), model.get_input_source())

    sup_counter = shared(0)

    class SupervisedCostWithFixedVars(Cost):

        supervised = True

        def expr(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            X, Y = data
            assert sup_aux_var is sup_counter
            called[2] = True
            return (model.P * X * Y).sum()

        def get_gradients(self, model, data, sup_aux_var=None, **kwargs):
            self.get_data_specs(model)[0].validate(data)
            assert sup_aux_var is sup_counter
            called[3] = True
            return super(SupervisedCostWithFixedVars, self).get_gradients(
                    model=model, data=data, sup_aux_var=sup_aux_var)

        def get_fixed_var_descr(self, model, data):
            data_specs = self.get_data_specs(model)
            data_specs[0].validate(data)
            rval = FixedVarDescr()
            rval.fixed_vars = {'sup_aux_var': sup_counter}
            rval.data_specs = data_specs

            # data has to be flattened into a tuple before being passed
            # to `function`.
            mapping = DataSpecsMapping(data_specs)
            flat_data = mapping.flatten(data, return_tuple=True)
            theano_func = function(flat_data,
                                 updates=[(sup_counter, sup_counter + 1)])
            # the on_load_batch function will take numerical data formatted
            # as rval.data_specs, so we have to flatten it inside the
            # returned function too.
            # Using default argument binds the variables used in the lambda
            # function to the value they have when the lambda is defined.
            on_load = (lambda batch, mapping=mapping, theano_func=theano_func:
                    theano_func(*mapping.flatten(batch, return_tuple=True)))
            rval.on_load_batch = [on_load]
            return rval

        def get_data_specs(self, model):
            space = CompositeSpace((model.get_input_space(),
                                   model.get_output_space()))
            source = (model.get_input_source(), model.get_target_source())
            return (space, source)

    cost = SumOfCosts(costs=[UnsupervisedCostWithFixedVars(),
                             SupervisedCostWithFixedVars()])

    algorithm = BGD(cost=cost, batch_size=batch_size,
            conjugate=1, line_search_mode='exhaustive',
            updates_per_batch=updates_per_batch)

    algorithm.setup(model=model, dataset=train)

    # Make sure all the right methods were used to compute the updates
    assert all(called)

    algorithm.train(dataset=train)

    # Make sure the load_batch callbacks were called the right amount of times
    assert unsup_counter.get_value() == train_batches
    assert sup_counter.get_value() == train_batches

    # Make sure the gradient updates were run the right amount of times
    assert grad_counter.get_value() == train_batches * updates_per_batch
Exemple #10
0
from pylearn2.training_algorithms.bgd import BGD
from pylearn2.devtools.record import RecordMode
allocate_random()
from pylearn2.costs.cost import Cost

class DummyCost(Cost):
    supervised = True
    def __call__(self, model, X, Y, **kwargs):
        return sum([x.sum() for x in (model.get_params()+[X, Y])])


algorithm =  BGD( **{
               'theano_function_mode': RecordMode(
                        path = 'nondeterminism_2_record.txt',
                        replay = replay
               ),
               'line_search_mode': 'exhaustive',
               'batch_size': 100,
               'set_batch_size': 1,
               'updates_per_batch': 1,
               'reset_alpha': 0,
               'conjugate': 1,
               'reset_conjugate': 0,
               'cost' : DummyCost()
})

algorithm.setup(model=model, dataset=None)
algorithm.optimizer._cache_values()

Exemple #11
0
model = DummyModel()

from pylearn2.training_algorithms.bgd import BGD
from pylearn2.devtools.record import RecordMode
allocate_random()
from pylearn2.costs.cost import Cost


class DummyCost(Cost):
    supervised = True

    def __call__(self, model, X, Y, **kwargs):
        return sharedX(0.)
        return sum([x.sum() for x in (model.get_params() + [X, Y])])


algorithm = BGD(
    **{
        'theano_function_mode':
        RecordMode(path='nondeterminism_2_record.txt', replay=replay),
        'conjugate':
        1,
        'batch_size':
        100,
        'cost':
        DummyCost()
    })

algorithm.setup(model=model, dataset=None)
algorithm.optimizer._cache_values()
Exemple #12
0
class DBL_model(object):
    def __init__(self,algo_id,model_id,num_epoch,num_dim,train_id,test_id): 
        self.algo_id = algo_id
        self.model_id = model_id
        self.num_epoch = num_epoch
        self.num_dim = num_dim
        self.train_id = train_id
        self.test_id = test_id

        self.path_train = None
        self.path_test = None
        self.p_data = None
        self.batch_size = None
        self.do_savew = True

        self.param = paramSet()
        self.p_monitor = {}
    def loadData(self,basepath,which_set,data_ind=None):
        self.DataLoader.loadData(self.p_data,basepath,which_set,data_ind)
            
    def loadWeight(self, fname):
         # create DBL_model          
        # load and rebuild model
        if fname[-3:] == 'pkl':
            layer_params = cPickle.load(open(fname))
        elif fname[-3:] == 'mat':
            mat = scipy.io.loadmat(fname)            
            layer_params = mat['param']            
        else:
            raise('cannot recognize: '+fname)

        layer_id = 0
        num_layers = len(self.model.layers)
        for layer in self.model.layers:
            # squeeze for matlab structure
            #aa=layer.get_params();print aa[0].shape,aa[1].shape
            dims =[np.squeeze(layer_params[layer_id][k]).ndim for k in [0,1]]
            if fname[-3:] == 'mat':
                for id in [0,1]:
                    if dims[id] ==0:
                        layer_params[layer_id][id] = layer_params[layer_id][id][0]
                    
            if dims[0]>=dims[1]:
                layer.set_weights(layer_params[layer_id][0])
                layer.set_biases(layer_params[layer_id][1])
                #tmp = np.squeeze(layer_params[layer_id][1])                
            else:
                layer.set_weights(layer_params[layer_id][1])
                layer.set_biases(layer_params[layer_id][0])
                #tmp = np.squeeze(layer_params[layer_id][0])
            #print "aa:",layer_params[layer_id][1].shape,layer_params[layer_id][0].shape
            #print "sss:",layer_params[layer_id][1][:10]
            #print "ttt:",layer_params[layer_id][0][0]
            layer_id = layer_id + 1                            

    def saveWeight(self,pklname):                
        # save the model
        layer_params = []
        for layer in self.model.layers:
            param = layer.get_params()      
            #print param
            #print param[0].get_value().shape
            #print param[1].get_value().shape
            layer_params.append([param[0].get_value(), param[1].get_value()])
            
        cPickle.dump(layer_params, open(pklname, 'wb'))
    
    def loadAlgo(self,p_algo):
        # setup algo
        #print self.DataLoader.data
        if p_algo.algo_type==0:
            self.algo =  SGD(learning_rate = p_algo.learning_rate,
            cost = p_algo.cost,
            batch_size = p_algo.batch_size,
            monitoring_batches = p_algo.monitoring_batches,
            monitoring_dataset = p_algo.monitoring_dataset,
            monitor_iteration_mode = p_algo.monitor_iteration_mode,
            termination_criterion = p_algo.termination_criterion,
            update_callbacks = p_algo.update_callbacks,
            learning_rule = p_algo.learning_rule,
            init_momentum = p_algo.init_momentum,
            set_batch_size = p_algo.set_batch_size,
            train_iteration_mode = p_algo.train_iteration_mode,
            batches_per_iter = p_algo.batches_per_iter,
            theano_function_mode = p_algo.theano_function_mode,
            monitoring_costs = p_algo.monitoring_costs,
            seed = p_algo.seed)
        elif p_algo.algo_type==1:
                self.algo = BGD(
                cost = p_algo.cost,
                batch_size=p_algo.batch_size, 
                batches_per_iter=p_algo.batches_per_iter,
                updates_per_batch = p_algo.updates_per_batch,
                monitoring_batches=p_algo.monitoring_batches,
                monitoring_dataset=p_algo.monitoring_dataset,
                termination_criterion =p_algo.termination_criterion, 
                set_batch_size = p_algo.set_batch_size,
                reset_alpha = p_algo.reset_alpha, 
                conjugate = p_algo.conjugate,
                min_init_alpha = p_algo.min_init_alpha,
                reset_conjugate = p_algo.reset_conjugate, 
                line_search_mode = p_algo.line_search_mode,
                verbose_optimization=p_algo.verbose_optimization, 
                scale_step=p_algo.scale_step, 
                theano_function_mode=p_algo.theano_function_mode,
                init_alpha = p_algo.init_alpha, 
                seed = p_algo.seed)
        self.algo.setup(self.model, self.DataLoader.data['train'])

    def setup(self):
        self.setupParam()
        self.check_setupParam()

        self.dl_id = str(self.algo_id)+'_'+str(self.model_id)+'_'+str(self.num_dim).strip('[]').replace(', ','_')+'_'+str(self.train_id)+'_'+str(self.num_epoch)
        self.param_pkl = 'dl_p'+self.dl_id+'.pkl'
        self.result_mat = 'result/'+self.dl_id+'/dl_r'+str(self.test_id)+'.mat'
        self.buildModel()
        self.buildLayer()                
        
        self.DataLoader = DBL_Data()
        self.do_test = True
        print self.param_pkl
        if not os.path.exists(self.param_pkl):
            self.do_test = False
            # training
            self.loadData_train()
            self.buildAlgo()


    def setupParam(self):
        raise NotImplementedError(str(type(self)) + " does not implement: setupParam().")
    def check_setupParam(self):
        varnames = ['path_train','path_test','p_data','batch_size']
        for varname in varnames:
            if eval('self.'+varname+'== None'):
                raise ValueError('Need to set "'+varname+'" in setupParam()')
    def buildModel(self):
        raise NotImplementedError(str(type(self)) + " does not implement: buildModel().")
    def buildAlgo(self):
        raise NotImplementedError(str(type(self)) + " does not implement: buildAlgo().")
    def train(self):
        raise NotImplementedError(str(type(self)) + " does not implement: train().")
    def test(self):
        raise NotImplementedError(str(type(self)) + " does not implement: test().")
    def loadData_train(self):
        raise NotImplementedError(str(type(self)) + " does not implement: buildAlgo().")
    def run(self):
        if self.do_test:
            self.test()
        else:
            # training
            self.train()
    
    def buildLayer(self):    
        # setup layer
        self.layers = []
        for param in self.p_layers:            
            if param[0].param_type==0:
                self.layers = self.layers + DBL_ConvLayers(param)
            elif param[0].param_type==1:
                self.layers = self.layers + DBL_FcLayers(param)
            elif param[0].param_type==2:
                self.layers = self.layers + DBL_CfLayers(param)        
        self.model = MLP(self.layers, input_space=self.ishape)

        # load available weight
        pre_dl_id = self.param_pkl[:self.param_pkl.rfind('_')+1]
        fns = glob.glob(pre_dl_id+'*.pkl')
        epoch_max = 0
        if len(fns)==0:
            # first time to do it, load matlab prior
            mat_init = 'init_p'+str(self.model_id)+'_'+str(self.train_id)+'.mat'
            if os.path.exists(mat_init):
                print "load initial mat weight: ", mat_init
                self.loadWeight(mat_init)
        else:
            for fn in fns:
                epoch_id = int(fn[fn.rfind('_')+1:fn.find('.pkl')])
                if (epoch_id>epoch_max and epoch_id<=self.num_epoch):
                    epoch_max = epoch_id
            if epoch_max>0:
                print "load weight at epoch: ", epoch_max
                self.loadWeight(pre_dl_id+str(epoch_max)+'.pkl')
                self.num_epoch -= epoch_max
        self.p_monitor['epoch'] = epoch_max

    def runTrain(self):        
        self.loadAlgo(self.p_algo)
        self.train_monitor = trainMonitor(self.model.monitor,self.p_monitor)
        #self.model.monitor.report_epoch()            
        self.train_monitor.run()
        while self.algo.continue_learning(self.model):
            self.algo.train(self.DataLoader.data['train'])            
            self.train_monitor.run()
            if self.do_savew and (self.train_monitor.monitor._epochs_seen+1)%10 == 0:
                self.saveWeight(self.param_pkl)
            #self.model.monitor()            
        if self.do_savew:
            self.saveWeight(self.param_pkl)


    def runTest(self,data_test=None,metric=-1):
        """
        metric: evaluation metric
        0: classfication error
        1: L1 regression error
        2: L2 regression error
        """
        if data_test == None:
            data_test = self.DataLoader.data['test']
        batch_size = self.batch_size
        # make batches
        m = data_test.X.shape[0]
        extra = (batch_size - m % batch_size) % batch_size
        #print extra,batch_size,m
        assert (m + extra) % batch_size == 0
        #print data_test.X[0]
        if extra > 0:
            data_test.X = np.concatenate((data_test.X, np.zeros((extra, data_test.X.shape[1]),
                    dtype=data_test.X.dtype)), axis=0)
            assert data_test.X.shape[0] % batch_size == 0
        X = self.model.get_input_space().make_batch_theano()
        Y = self.model.fprop(X)
        """
        print 'load param:'
        param = self.model.layers[0].get_params()
        aa = param[0].get_value()
        bb = param[1].get_value()
        print aa[:3,:3],bb[:10]   
        """
        from theano import function
        if metric==0:
            from theano import tensor as T
            y = T.argmax(Y, axis=1)        
            f = function([X], y)
        else:
            f = function([X], Y)
        
        yhat = []
        for i in xrange(data_test.X.shape[0] / batch_size):
            x_arg = data_test.X[i*batch_size:(i+1)*batch_size,:]
            if X.ndim > 2:
                x_arg = data_test.get_topological_view(x_arg)
            yhat.append(f(x_arg.astype(X.dtype)))
        #print "ww:",x_arg.shape
        #print f(x_arg.astype(X.dtype)).shape
        yhat = np.concatenate(yhat)
        yhat = yhat[:m]
        data_test.X = data_test.X[:m,:]
        y = data_test.y
        #print m,extra
        acc = -1
        if y != None:
            if metric == 0:
                if data_test.y.ndim>1:
                    y = np.argmax(data_test.y,axis=1)
                assert len(y)==len(yhat)
                acc = float(np.sum(y-yhat==0))/m
            elif metric == 1:
                acc = float(np.sum(abs(y-yhat)))/m
            elif metric == 2: 
                #print y.shape,yhat.shape
                #print float(np.sum((y-yhat)**2))
                print y[:30]
                print yhat[:30]
                print m
                acc = float(np.sum((y-np.reshape(yhat,y.shape))**2))/m
                #print "y: ",y
                #print "yhat: ",yhat
            print "acc: ",acc
            
        return [[yhat],[acc]]