def __init__(self,
                 nnet,
                 nb_pos_steps=25,
                 nb_neg_steps=5,
                 init_chain=None,
                 batch_size=None):
        """PCD Trainer.
        
        Need to provide either the init_chain or the batch_size.
        
        # Arguments:
            nnet: DBM object
            nb_pos_steps: int, optional. Number of updates in the positive, or data phase.
            nb_neg_steps: int, optional. Number of updates in the negative, or model phase.
            init_chain: list of tensors, optional. Initial starting point of model persistent chains.
            batch_size: int, optional. Need batch size to generate appropriate number of persistent chains.
        """
        super().__init__(nnet, nb_pos_steps, nb_neg_steps)

        assert (init_chain is not None) or (batch_size is not None)
        if init_chain is None:
            init_chain = []
            for size in self.nnet.layer_size_list:
                sample = B.eval(
                    B.random_binomial(shape=(batch_size, size), p=0.5))
                init_chain.append(B.variable(sample))
        else:
            init_chain = [B.variable(ic) for ic in init_chain]

            batch_size = B.eval(init_chain[0].shape)[0]
            for ic, size in zip(init_chain, self.nnet.layer_size_list):
                assert B.eval(ic.shape)[0][0] == batch_size
                assert B.eval(ic.shape)[0][1] == size

        self.persist_chain = init_chain
Exemple #2
0
def _test_optimizer(optimizer):

    mbs = 10

    dataset = random.Random('probability')
    data = B.eval(dataset.train.data[0:mbs])
    pixels = data.shape[1]

    W0 = B.variable(np.random.normal(size=(pixels, )),
                    dtype=B.floatx(),
                    name='W0')
    W1 = B.variable(np.random.normal(size=(pixels, )),
                    dtype=B.floatx(),
                    name='W1')
    params = [W0, W1]
    inputs = B.placeholder((mbs, pixels), dtype=B.floatx())
    loss = B.sum(B.dot(inputs, B.square(W0) + B.square(W1)))

    updates = optimizer.get_updates(params, loss)

    f = B.function([inputs], [loss], updates=updates)

    output = f(data)
    assert len(output) == 1
    assert output[0].size == 1
Exemple #3
0
    def predict_on_batch(self, x):
        """Runs a single gradient update on a single batch of data.
        # Arguments
            x: Numpy array of training data,
                or list of Numpy arrays if the model has multiple inputs.
                If all inputs in the model are named,
                you can also pass a dictionary
                mapping input names to Numpy arrays.
        # Returns
            Scalar training loss
            (if the model has a single output and no metrics)
            or list of scalars (if the model has multiple outputs
            and/or metrics).
        """

        # makes the generic indices to access data
        batch_size = B.eval(x.shape)[0]
        self.test_index = B.placeholder(shape=(batch_size,),
                                        dtype=B.intx(), name='test_index')
        self.test_data = x
        index = np.arange(batch_size)

        self._make_test_function()
        outputs = self.test_function(index)

        return outputs
    def run_logZ(self):
        """Performs calculatations of AIS runs.
        
        Must be called before estimates.
        """

        # initial sample
        sample_ls = self.init_sample_ls

        # this is the inital beta=0 case
        log_ais_w = B.eval(self.dbm_a.free_energy_sumover_even(sample_ls, 1.0))

        log_ais_w = B.variable(log_ais_w, name='log_ais_w')
        index = B.variable(1, name='index', dtype=B.intx())

        scan_out, updates = B.scan(self._update,
                                   outputs_info=[log_ais_w, index] + sample_ls,
                                   n_steps=self.n_betas - 2,
                                   name='scan_ais')

        log_ais_w = scan_out[0][-1]
        sample_ls = [s[-1] for s in scan_out[2:]]

        # this is the final beta=1 case
        log_ais_w -= self.dbm_b.free_energy_sumover_even(sample_ls, 1.0)

        logZ_fn = B.function([], [log_ais_w], updates=updates)

        self.logZ = self.logZa + logZ_fn()
Exemple #5
0
    def fit(self,
            x,
            batch_size=100,
            n_epoch=10,
            callbacks=None,
            validation_data=None,
            shuffle=True,
            initial_epoch=0):
        """Trains the model for a fixed number of epochs (iterations on a dataset).
        
        # Arguments
            x: Theano shared array of training data
            batch_size: integer. Number of samples per gradient update.
            n_epoch: integer, the number of times to iterate
                over the training data arrays.
            callbacks: list of callbacks to be called during training.
            validation_data: Theano shared array of data on which to evaluate
                the loss and any model metrics at the end of each epoch.
                The model will not be trained on this data.
            shuffle: boolean, whether to shuffle the training data
                before each epoch.
            initial_epoch: epoch at which to start training
                (useful for resuming a previous training run)
        
        # Returns
            A `History` instance. Its `history` attribute contains
            all information collected during training.
        """
        self.train_data = x
        self.n_train_sample = B.eval(x.shape[0])
        self.validation_data = validation_data

        # makes the generic indices to access data
        self.train_index = B.placeholder(shape=(batch_size,),
                                         dtype=B.intx(), name='train_index')

        # makes the training functions
        self._make_train_function()
        f = self.train_function

        # preps for validation
        out_labels = ['cost']
        if validation_data:
            self.valid_index = B.placeholder(shape=(batch_size,),
                                             dtype=B.intx(), name='valid_index')
            callback_metrics = copy.copy(out_labels) + ['val_' + n for n in out_labels]
            self._make_validation_function()
            val_f = self.validation_function
        else:
            callback_metrics = copy.copy(out_labels)
            val_f = None

        # delegate logic to _fit_loop
        return self._fit_loop(f, out_labels=out_labels,
                              batch_size=batch_size, n_epoch=n_epoch,
                              callbacks=callbacks,
                              val_f=val_f, shuffle=shuffle,
                              callback_metrics=callback_metrics,
                              initial_epoch=initial_epoch)
    def __init__(self, dbm, data, n_runs, n_betas=None, betas=None):
        """
        Initialize an object to perform AIS.
                
        # Arguments:
            dbm: DBM object
            data: numpy array, needed for data base rate model
            n_runs: int, number of parallel AIS estimates to run
            n_betas: int, optional. Will create evenly spaced betas. Need either n_betas or betas.
            betas: numpy array, optional. Betas for intermediate distributions. Need either n_betas or betas.
            
            
        # References:
            1. On the quantitative analysis of deep belief networks by R Salakhutdinov and I Murray. ACM 2008.
            2. Deep boltzmann machines by R Salakhutdinov and G Hinton. AIS, 2009.

        """

        self.dbm_b = dbm
        self.n_runs = n_runs

        if (n_betas is not None) and (betas is None):
            self.n_betas = n_betas
            betas = np.linspace(0, 1, n_betas)
        elif (n_betas is None) and (betas is not None):
            self.n_betas = betas.shape[0]
        else:
            raise ValueError(
                'Need to provide at least one of n_betas or betas')

        self.betas = B.variable(betas, name='betas')

        # this is the data base rate model of reference 1
        # The ais estimate is very sensitive to this base rate model, so the
        # standard practice in the literature is to use this setup
        vis_mean = np.clip(np.mean(data, axis=0), B.epsilon(), 1 - B.epsilon())
        p_ruslan = (vis_mean + 0.05) / 1.05
        b0_a = np.log(p_ruslan / (1 - p_ruslan)).astype(B.floatx())
        self.dbm_a = DBM(layer_size_list=self.dbm_b.layer_size_list,
                         topology_dict=self.dbm_b.topology_dict)
        B.set_value(self.dbm_a.layers[0].b, b0_a)

        # make the initial sample
        # visible layer depends on data base rate bias
        p0 = np.tile(1. / (1 + np.exp(-b0_a)), (n_runs, 1))
        s0 = np.array(p0 > np.random.random_sample(p0.shape), dtype=B.floatx())
        sample_ls = [s0]
        # rest of layers are uniform sample
        for n in self.dbm_b.layer_size_list[1:]:
            s = B.random_binomial((self.n_runs, n), p=0.5)
            sample_ls.append(B.variable(B.eval(s)))
        self.init_sample_ls = sample_ls

        # this is the exact partition function of the base rate model
        self.logZa = np.sum(self.dbm_b.layer_size_list[1:]) * np.log(2)
        self.logZa += np.sum(np.log(1 + np.exp(b0_a)))

        # This is the sampler for the final model
        self.dbm_b_sampler = Sampler(self.dbm_b)
def test_free_energy(nnet_type, beta, propup, fe_type):

    nnet, x = _dbm_prep(nnet_type)

    if propup:
        input_ls = nnet.propup(x, beta=beta)
    else:
        input_ls = x

    if fe_type == 'fe':
        fe = nnet.free_energy(input_ls, beta=beta)
    elif fe_type == 'fe_odd':
        fe = nnet.free_energy_sumover_odd(input_ls, beta=beta)
    elif fe_type == 'fe_even':
        fe = nnet.free_energy_sumover_even(input_ls, beta=beta)

    assert B.eval(fe).shape == B.eval(x.shape)[0]
Exemple #8
0
def test_init_standard(nnet_type):

    nnet = nnet_for_testing(nnet_type)
    dataset = random.Random('probability')

    b_ls = []
    for layer in nnet.layers:
        b_ls.append(B.eval(layer.b.shape))

    W_ls = []
    for synapse in nnet.synapses:
        W_ls.append(B.eval(synapse.W.shape))

    nnet = initializers.init_standard(nnet, dataset)

    for size, layer in zip(b_ls, nnet.layers):
        assert size == B.eval(layer.b.shape)
    for size, synapse in zip(W_ls, nnet.synapses):
        assert_allclose(size, B.eval(synapse.W.shape))
def init_standard(nnet, dataset):
    """My standard initialization for a neural network """

    # use orthogonal initialization of all W's
    for synapse in nnet.synapses:
        W_start = synapse.W
        shape = B.eval(W_start.shape)
        W_final = orthogonal(shape)
        B.set_value(W_start, W_final)

    # set visible bias based on data
    # Reference: A Practical Guide to Training Restricted Boltzmann Machines by Geoffrey Hinton
    pixel_mean = B.mean(dataset.train.data, axis=0)
    p = B.clip(pixel_mean, B.epsilon(), 1 - B.epsilon())
    b_start = nnet.layers[0].b
    b_final = B.eval(B.log(p / (1 - p)))
    B.set_value(b_start, b_final)

    return nnet
def test_base_Sampler(nnet_type, constant, sampler_type):
    beta = 1.0
    nnet = nnet_for_testing(nnet_type)
    batch_size = 30

    constant_ls = []
    if constant is not None:
        constant_ls = [constant]

    sampler = samplers.Sampler(nnet)

    input_ls = [
        B.variable(np.ones((batch_size, size)))
        for size in nnet.layer_size_list
    ]

    sampler.set_param(beta=beta, constant=constant_ls)

    if sampler_type == 'probability':
        prob_ls = sampler.probability(*input_ls)
    elif sampler_type == 'sample':
        prob_ls = sampler.sample(*input_ls)
    elif sampler_type == 'sample_inputs':
        prob_ls = sampler.sample_inputs(*input_ls)
    else:
        raise NotImplementedError

    assert len(prob_ls) == len(input_ls)
    for i, p in enumerate(prob_ls):
        if i in constant_ls:
            assert p == input_ls[i]
        else:
            m = np.ones((batch_size, nnet.layer_size_list[i]))
            pp = B.eval(p)
            if sampler_type == 'sample':
                assert_allclose((pp + np.logical_not(pp)), m)
            else:
                assert_allclose(pp, 0.5 * m)
    def __init__(self,
                 name,
                 init_b,
                 up_dict=None,
                 down_dict=None,
                 activation=B.sigmoid,
                 regularizer=None):
        """Creates the layer of neurons.
        
        # Arguments:
            name: str
            init_b: numpy array, initial bias of neurons
            up_dict: dict, contains weights that connect lower numbered layers to this layer
            down_dict: dict, contains weights that connect higher numbered layers to this layer
            activation: function, internal neuron activity to final activity
            regularizer: Regularizer object
        """
        super().__init__(name)

        self.activation = activation

        self.dim = init_b.shape[0]
        self.b = B.variable(init_b, name=self.name+'_b')
        self.trainable_weights = self.b
        
        self.regularizer = regularizer
        if regularizer is not None:
            self.losses = self.regularizer(self.b)

        up_dict = dict_2_OrderedDict(up_dict)
        down_dict = dict_2_OrderedDict(down_dict)

        self.index_up = list(up_dict.keys())
        self.index_down = list(down_dict.keys())

        self.W_up = list(up_dict.values())
        W_down = list(down_dict.values())
        self.W_down = [W.T for W in W_down]

        # check that the weight shapes are sensible
        W_all = self.W_up + self.W_down
        for W in W_all:
            W_shape = B.eval(W).shape
            msg = 'Dimension mismatch. Expected {} but {} has shape {}'.format(self.dim, W.name, W_shape)
            assert W_shape[1] == self.dim, msg

        # assign layer properties
        n_up = len(up_dict)
        n_down = len(down_dict)
        if n_up > 0 and n_down > 0:
            self.direction = 'both'
            # need to compensate for reduced inputs when only going single direction
            # Reference: Deep boltzmann machines by R Salakhutdinov and G Hinton. AIS, 2009.
            self._z_up_adj = (n_up+n_down)/n_up
            self._z_down_adj = (n_up+n_down)/n_down
        elif n_up > 0 and n_down == 0:
            self.direction = 'up'
        elif n_up == 0 and n_down > 0:
            self.direction = 'down'
        else:
            raise ValueError('Both up_dict and down_dict cannot be empty.')
Exemple #12
0
    def _fit_loop(self,
                  f,
                  out_labels=None,
                  batch_size=100,
                  n_epoch=100,
                  callbacks=None,
                  val_f=None,
                  shuffle=True,
                  callback_metrics=None,
                  initial_epoch=0):
        """Abstract fit function for f.
        Assume that f returns a list, labeled by out_labels.
        
        # Arguments
            f: Backend function returning a list of tensors
            out_labels: list of strings, display names of
                the outputs of `f`
            batch_size: integer batch size
            n_epoch: number of times to iterate over the data
            callbacks: list of callbacks to be called during training
            val_f: Backend function to call for validation
            shuffle: whether to shuffle the data at the beginning of each epoch
            callback_metrics: list of strings, the display names of the metrics
                passed to the callbacks. They should be the
                concatenation of list the display names of the outputs of
                 `f` and the list of display names of the outputs of `f_val`.
            initial_epoch: epoch at which to start training
                (useful for resuming a previous training run)
        # Returns
            `History` object.
        """

        time_start = time.time()

        do_validation = False
        n_valid_sample = 0
        if val_f:
            do_validation = True
            n_valid_sample = B.eval(self.validation_data.shape[0])

        index_array = np.arange(self.n_train_sample, dtype='int32')

        self.history = cbks.History()
        # CSVLogger needs to be second to last callback
        # otherwise AIS results are not recorded 
        callbacks = callbacks or []
        index_csv = None
        for i, cb in enumerate(callbacks):
            if isinstance(cb, CSVLogger):
                index_csv = i
        if index_csv is not None:
            cb_csv = callbacks.pop(index_csv)
            callbacks.append(cb_csv)
        callbacks = [cbks.BaseLogger()] + callbacks + [self.history]
        callbacks = cbks.CallbackList(callbacks)
        out_labels = out_labels or []
        callbacks.set_model(self)
        callbacks.set_params({
                            'batch_size': batch_size,
                            'n_epoch': n_epoch,
                            'n_sample': self.n_train_sample,
                            'do_validation': do_validation,
                            'metrics': callback_metrics or [],
                            })

        callbacks.on_train_begin()

        self.stop_training = False

        for epoch in range(initial_epoch, n_epoch):
            callbacks.on_epoch_begin(epoch)

            if shuffle:
                np.random.shuffle(index_array)

            batches = make_batches(self.n_train_sample, batch_size, epoch)
            epoch_logs = {}
            for batch_index, (batch_start, batch_end) in enumerate(batches):
                batch_ids = index_array[batch_start:batch_end]
                batch_logs = {}
                batch_logs['batch'] = batch_index
                batch_logs['size'] = len(batch_ids)

                callbacks.on_batch_begin(batch_index, batch_logs)

                # actual training
                outs = f(batch_ids)
                if not isinstance(outs, list):
                    outs = [outs]
                for l, o in zip(out_labels, outs):
                    batch_logs[l] = o

                callbacks.on_batch_end(batch_index, batch_logs)

                if batch_index == len(batches) - 1:  # last batch
                    # validation
                    if do_validation:
                        val_outs = self._valid_loop(val_f, n_valid_sample,
                                                    batch_size=batch_size)
                        if not isinstance(val_outs, list):
                            val_outs = [val_outs]
                        # same labels assumed
                        for l, o in zip(out_labels, val_outs):
                            epoch_logs['val_' + l] = o

            callbacks.on_epoch_end(epoch, epoch_logs)

            if self.stop_training:
                break

        # Tracks the timing of everything except train_end
        # Skips train_end otherwise timing can't be included in summary callback
        fit_total_time = time.time() - time_start
        fit_callback_time = callbacks.cb_time
        self.history.fit_total_time = fit_total_time
        self.history.fit_callback_time = fit_callback_time
        self.history.fit_train_time = fit_total_time - fit_callback_time
        
        callbacks.on_train_end()

        return self.history
def test_fast():

    name = 'fast'
    datatype_ls = ['probability', 'sampled', 'threshold']

    # delete files if they exist
    filepath = os.path.dirname(os.path.abspath(__file__))
    folder = os.path.abspath(os.path.join(filepath, '..', '..', 'data'))
    for datatype in datatype_ls:
        filename = os.path.join(folder, name + '_' + datatype + '.npz')
        try:
            os.remove(filename)
        except OSError:
            pass

    train_samples = 1000
    other_samples = 100

    # this checks on creating and loading datasets
    for datatype in datatype_ls:
        data = fast.Fast(datatype)
        assert B.eval(data.train.data).shape[0] == train_samples
        assert B.eval(data.valid.data).shape[0] == other_samples
        assert B.eval(data.test.data).shape[0] == other_samples
        assert B.eval(data.train.lbl).shape[0] == train_samples
        assert B.eval(data.valid.lbl).shape[0] == other_samples
        assert B.eval(data.test.lbl).shape[0] == other_samples

    # this checks on loading existing
    for datatype in datatype_ls:
        data = fast.Fast(datatype)
        assert B.eval(data.train.data).shape[0] == train_samples
        assert B.eval(data.valid.data).shape[0] == other_samples
        assert B.eval(data.test.data).shape[0] == other_samples
        assert B.eval(data.train.lbl).shape[0] == train_samples
        assert B.eval(data.valid.lbl).shape[0] == other_samples
        assert B.eval(data.test.lbl).shape[0] == other_samples