Example #1
0
 def __init__(self):
     self.W1 = [sharedX(rng.randn(num_features, chunk_width)) for i
         in xrange(num_chunks)]
     disturb_mem.disturb_mem()
     self.W2 = [sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)]
     self._params = safe_union(self.W1, self.W2)
     self.input_space = VectorSpace(num_features)
     self.output_space = VectorSpace(1)
Example #2
0
 def __init__(self):
     self.W1 = [sharedX(rng.randn(num_features, chunk_width)) for i
         in xrange(num_chunks)]
     disturb_mem.disturb_mem()
     self.W2 = [sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)]
     self._params = safe_union(self.W1, self.W2)
     self.input_space = VectorSpace(num_features)
     self.output_space = VectorSpace(1)
Example #3
0
            def __call__(self, model, X, Y=None, **kwargs):
                disturb_mem.disturb_mem()
                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred-Y[:,0]).sum()
Example #4
0
        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches*batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m,1))
            y[:,0] = np.dot(X, w) > 0.

            rval =  DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = "" # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval
Example #5
0
        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches * batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m, 1))
            y[:, 0] = np.dot(X, w) > 0.

            rval = DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = ""  # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval
Example #6
0
            def expr(self, model, data, **kwargs):
                self.get_data_specs(model)[0].validate(data)
                X, Y = data
                disturb_mem.disturb_mem()
                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred-Y[:,0]).sum()
Example #7
0
            def __call__(self, model, X, Y=None, **kwargs):
                disturb_mem.disturb_mem()

                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(
                    mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred - Y[:, 0]).sum()
    def run(replay, log=None):

        if not replay:
            log = StringIO()
        else:
            log = StringIO(log)
        record = Record(replay=replay, file_object=log)

        disturb_mem.disturb_mem()

        mode = RecordMode(record=record)

        b = sharedX(np.zeros((2, )), name='b')
        channels = OrderedDict()

        disturb_mem.disturb_mem()

        v_max = b.max(axis=0)
        v_min = b.min(axis=0)
        v_range = v_max - v_min

        updates = []
        for i, val in enumerate([
                v_max.max(),
                v_max.min(),
                v_range.max(),
        ]):
            disturb_mem.disturb_mem()
            s = sharedX(0., name='s_' + str(i))
            updates.append((s, val))

        for var in theano.gof.graph.ancestors(update for _, update in updates):
            if var.name is not None and var.name is not 'b':
                if var.name[0] != 's' or len(var.name) != 2:
                    var.name = None

        for key in channels:
            updates.append((s, channels[key]))
        f = theano.function([],
                            mode=mode,
                            updates=updates,
                            on_unused_input='ignore',
                            name='f')
        for output in f.maker.fgraph.outputs:
            mode.record.handle_line(var_descriptor(output) + '\n')
        disturb_mem.disturb_mem()
        f()

        mode.record.f.flush()

        if not replay:
            return log.getvalue()
Example #9
0
    def run(replay, log=None):

        if not replay:
            log = StringIO()
        else:
            log = StringIO(log)
        record = Record(replay=replay, file_object=log)

        disturb_mem.disturb_mem()

        mode = RecordMode(record=record)

        b = sharedX(np.zeros((2,)), name='b')
        channels = OrderedDict()

        disturb_mem.disturb_mem()

        v_max = b.max(axis=0)
        v_min = b.min(axis=0)
        v_range = v_max - v_min

        updates = []
        for i, val in enumerate([
                v_max.max(),
                v_max.min(),
                v_range.max(),
                ]):
            disturb_mem.disturb_mem()
            s = sharedX(0., name='s_' + str(i))
            updates.append((s, val))

        for var in theano.gof.graph.ancestors(update for _, update in updates):
            if var.name is not None and var.name is not 'b':
                if var.name[0] != 's' or len(var.name) != 2:
                    var.name = None

        for key in channels:
            updates.append((s, channels[key]))
        f = theano.function([], mode=mode, updates=updates,
                            on_unused_input='ignore', name='f')
        for output in f.maker.fgraph.outputs:
            mode.record.handle_line(var_descriptor(output) + '\n')
        disturb_mem.disturb_mem()
        f()

        mode.record.f.flush()

        if not replay:
            return log.getvalue()
Example #10
0
    def run_sgd(mode):
        # Must be seeded the same both times run_sgd is called
        disturb_mem.disturb_mem()
        rng = np.random.RandomState([2012, 11, 27])

        batch_size = 5
        train_batches = 3
        valid_batches = 4
        num_features = 2

        # Synthesize dataset with a linear decision boundary
        w = rng.randn(num_features)

        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches*batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m,1))
            y[:,0] = np.dot(X, w) > 0.

            rval =  DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = "" # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval

        train = make_dataset(train_batches)
        valid = make_dataset(valid_batches)

        num_chunks = 10
        chunk_width = 2
        class ManyParamsModel(Model):
            """
            Make a model with lots of parameters, so that there are many
            opportunities for their updates to get accidentally re-ordered
            non-deterministically. This makes non-determinism bugs manifest
            more frequently.
            """

            def __init__(self):
                self.W1 = [sharedX(rng.randn(num_features, chunk_width)) for i
                    in xrange(num_chunks)]
                disturb_mem.disturb_mem()
                self.W2 = [sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)]
                self._params = safe_union(self.W1, self.W2)
                self.input_space = VectorSpace(num_features)
                self.output_space = VectorSpace(1)

        disturb_mem.disturb_mem()
        model = ManyParamsModel()
        disturb_mem.disturb_mem()


        class LotsOfSummingCost(Cost):
            """
            Make a cost whose gradient on the parameters involves summing many terms together,
            so that T.grad is more likely to sum things in a random order.
            """

            supervised = True

            def __call__(self, model, X, Y=None, **kwargs):
                disturb_mem.disturb_mem()
                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred-Y[:,0]).sum()

        cost = LotsOfSummingCost()

        disturb_mem.disturb_mem()

        algorithm = SGD(cost=cost,
                batch_size=batch_size,
                init_momentum=.5,
                learning_rate=1e-3,
                monitoring_dataset={'train': train, 'valid':valid},
                update_callbacks=[ExponentialDecay(decay_factor=2., min_lr=.0001)],
                termination_criterion=EpochCounter(max_epochs=5))

        disturb_mem.disturb_mem()

        train_object = Train(
                dataset=train,
                model=model,
                algorithm=algorithm,
                extensions=[
                    PolyakAveraging(start=0),
                    MomentumAdjustor(final_momentum=.9, start=1, saturate=5),
                    ],
                save_freq=0)

        disturb_mem.disturb_mem()

        train_object.main_loop()
Example #11
0
    def run_bgd(mode):
        # Must be seeded the same both times run_bgd is called
        disturb_mem.disturb_mem()
        rng = np.random.RandomState([2012, 11, 27, 8])

        batch_size = 5
        train_batches = 3
        valid_batches = 4
        num_features = 2

        # Synthesize dataset with a linear decision boundary
        w = rng.randn(num_features)

        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches*batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m,1))
            y[:,0] = np.dot(X, w) > 0.

            rval =  DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = "" # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval

        train = make_dataset(train_batches)
        valid = make_dataset(valid_batches)

        num_chunks = 10
        chunk_width = 2
        class ManyParamsModel(Model):
            """
            Make a model with lots of parameters, so that there are many
            opportunities for their updates to get accidentally re-ordered
            non-deterministically. This makes non-determinism bugs manifest
            more frequently.
            """

            def __init__(self):
                super(ManyParamsModel, self).__init__()
                self.W1 = [sharedX(rng.randn(num_features, chunk_width)) for i
                    in xrange(num_chunks)]
                disturb_mem.disturb_mem()
                self.W2 = [sharedX(rng.randn(chunk_width)) for i in
                        xrange(num_chunks)]
                self._params = safe_union(self.W1, self.W2)
                self.input_space = VectorSpace(num_features)
                self.output_space = VectorSpace(1)

        disturb_mem.disturb_mem()
        model = ManyParamsModel()
        disturb_mem.disturb_mem()


        class LotsOfSummingCost(Cost):
            """
            Make a cost whose gradient on the parameters involves summing many
            terms together,
            so that T.grad is more likely to sum things in a random order.
            """

            supervised = True

            def expr(self, model, data, **kwargs):
                self.get_data_specs(model)[0].validate(data)
                X, Y = data
                disturb_mem.disturb_mem()
                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = [non_linearity(z) for z in Z]
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(mlp_pred, [T.nnet.sigmoid,
                    T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred-Y[:,0]).sum()

            def get_data_specs(self, model):
                data = CompositeSpace((model.get_input_space(),
                                       model.get_output_space()))
                source = (model.get_input_source(), model.get_target_source())
                return (data, source)

        cost = LotsOfSummingCost()

        disturb_mem.disturb_mem()

        algorithm = BGD(cost=cost,
                batch_size=batch_size,
                updates_per_batch=5,
                scale_step=.5,
                conjugate=1,
                reset_conjugate=0,
                monitoring_dataset={'train': train, 'valid':valid},
                termination_criterion=EpochCounter(max_epochs=5))

        disturb_mem.disturb_mem()

        train_object = Train(
                dataset=train,
                model=model,
                algorithm=algorithm,
                save_freq=0)

        disturb_mem.disturb_mem()

        train_object.main_loop()
Example #12
0
    def run_sgd(mode):
        # Must be seeded the same both times run_sgd is called
        disturb_mem.disturb_mem()
        rng = np.random.RandomState([2012, 11, 27])

        batch_size = 5
        train_batches = 3
        valid_batches = 4
        num_features = 2

        # Synthesize dataset with a linear decision boundary
        w = rng.randn(num_features)

        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches * batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m, 1))
            y[:, 0] = np.dot(X, w) > 0.

            rval = DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = ""  # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval

        train = make_dataset(train_batches)
        valid = make_dataset(valid_batches)

        num_chunks = 10
        chunk_width = 2

        class ManyParamsModel(Model):
            """
            Make a model with lots of parameters, so that there are many
            opportunities for their updates to get accidentally re-ordered
            non-deterministically. This makes non-determinism bugs manifest
            more frequently.
            """
            def __init__(self):
                self.W1 = [
                    sharedX(rng.randn(num_features, chunk_width))
                    for i in xrange(num_chunks)
                ]
                disturb_mem.disturb_mem()
                self.W2 = [
                    sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)
                ]
                self._params = safe_union(self.W1, self.W2)
                self.input_space = VectorSpace(num_features)
                self.output_space = VectorSpace(1)

        disturb_mem.disturb_mem()
        model = ManyParamsModel()
        disturb_mem.disturb_mem()

        class LotsOfSummingCost(Cost):
            """
            Make a cost whose gradient on the parameters involves summing many terms together,
            so that T.grad is more likely to sum things in a random order.
            """

            supervised = True

            def expr(self, model, data, **kwargs):
                self.get_data_specs(model)[0].validate(data)
                X, Y = data
                disturb_mem.disturb_mem()

                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(
                    mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred - Y[:, 0]).sum()

            def get_data_specs(self, model):
                data = CompositeSpace(
                    (model.get_input_space(), model.get_output_space()))
                source = (model.get_input_source(), model.get_target_source())
                return (data, source)

        cost = LotsOfSummingCost()

        disturb_mem.disturb_mem()

        algorithm = SGD(
            cost=cost,
            batch_size=batch_size,
            init_momentum=.5,
            learning_rate=1e-3,
            monitoring_dataset={
                'train': train,
                'valid': valid
            },
            update_callbacks=[ExponentialDecay(decay_factor=2., min_lr=.0001)],
            termination_criterion=EpochCounter(max_epochs=5))

        disturb_mem.disturb_mem()

        train_object = Train(dataset=train,
                             model=model,
                             algorithm=algorithm,
                             extensions=[
                                 PolyakAveraging(start=0),
                                 MomentumAdjustor(final_momentum=.9,
                                                  start=1,
                                                  saturate=5),
                             ],
                             save_freq=0)

        disturb_mem.disturb_mem()

        train_object.main_loop()
Example #13
0
    def run_bgd(mode):
        # Must be seeded the same both times run_bgd is called
        disturb_mem.disturb_mem()
        rng = np.random.RandomState([2012, 11, 27, 8])

        batch_size = 5
        train_batches = 3
        valid_batches = 4
        num_features = 2

        # Synthesize dataset with a linear decision boundary
        w = rng.randn(num_features)

        def make_dataset(num_batches):
            disturb_mem.disturb_mem()
            m = num_batches * batch_size
            X = rng.randn(m, num_features)
            y = np.zeros((m, 1))
            y[:, 0] = np.dot(X, w) > 0.

            rval = DenseDesignMatrix(X=X, y=y)

            rval.yaml_src = ""  # suppress no yaml_src warning

            X = rval.get_batch_design(batch_size)
            assert X.shape == (batch_size, num_features)

            return rval

        train = make_dataset(train_batches)
        valid = make_dataset(valid_batches)

        num_chunks = 10
        chunk_width = 2

        class ManyParamsModel(Model):
            """
            Make a model with lots of parameters, so that there are many
            opportunities for their updates to get accidentally re-ordered
            non-deterministically. This makes non-determinism bugs manifest
            more frequently.
            """
            def __init__(self):
                self.W1 = [
                    sharedX(rng.randn(num_features, chunk_width))
                    for i in xrange(num_chunks)
                ]
                disturb_mem.disturb_mem()
                self.W2 = [
                    sharedX(rng.randn(chunk_width)) for i in xrange(num_chunks)
                ]
                self._params = safe_union(self.W1, self.W2)
                self.input_space = VectorSpace(num_features)
                self.output_space = VectorSpace(1)

        disturb_mem.disturb_mem()
        model = ManyParamsModel()
        disturb_mem.disturb_mem()

        class LotsOfSummingCost(Cost):
            """
            Make a cost whose gradient on the parameters involves summing many terms together,
            so that T.grad is more likely to sum things in a random order.
            """

            supervised = True

            def __call__(self, model, X, Y=None, **kwargs):
                disturb_mem.disturb_mem()

                def mlp_pred(non_linearity):
                    Z = [T.dot(X, W) for W in model.W1]
                    H = map(non_linearity, Z)
                    Z = [T.dot(h, W) for h, W in safe_izip(H, model.W2)]
                    pred = sum(Z)
                    return pred

                nonlinearity_predictions = map(
                    mlp_pred, [T.nnet.sigmoid, T.nnet.softplus, T.sqr, T.sin])
                pred = sum(nonlinearity_predictions)
                disturb_mem.disturb_mem()

                return abs(pred - Y[:, 0]).sum()

        cost = LotsOfSummingCost()

        disturb_mem.disturb_mem()

        algorithm = BGD(cost=cost,
                        batch_size=batch_size,
                        updates_per_batch=5,
                        scale_step=.5,
                        conjugate=1,
                        reset_conjugate=0,
                        monitoring_dataset={
                            'train': train,
                            'valid': valid
                        },
                        termination_criterion=EpochCounter(max_epochs=5))

        disturb_mem.disturb_mem()

        train_object = Train(dataset=train,
                             model=model,
                             algorithm=algorithm,
                             save_freq=0)

        disturb_mem.disturb_mem()

        train_object.main_loop()