def __init__(self, nnet, nb_pos_steps=25, nb_neg_steps=5, init_chain=None, batch_size=None): """PCD Trainer. Need to provide either the init_chain or the batch_size. # Arguments: nnet: DBM object nb_pos_steps: int, optional. Number of updates in the positive, or data phase. nb_neg_steps: int, optional. Number of updates in the negative, or model phase. init_chain: list of tensors, optional. Initial starting point of model persistent chains. batch_size: int, optional. Need batch size to generate appropriate number of persistent chains. """ super().__init__(nnet, nb_pos_steps, nb_neg_steps) assert (init_chain is not None) or (batch_size is not None) if init_chain is None: init_chain = [] for size in self.nnet.layer_size_list: sample = B.eval( B.random_binomial(shape=(batch_size, size), p=0.5)) init_chain.append(B.variable(sample)) else: init_chain = [B.variable(ic) for ic in init_chain] batch_size = B.eval(init_chain[0].shape)[0] for ic, size in zip(init_chain, self.nnet.layer_size_list): assert B.eval(ic.shape)[0][0] == batch_size assert B.eval(ic.shape)[0][1] == size self.persist_chain = init_chain
def _test_optimizer(optimizer): mbs = 10 dataset = random.Random('probability') data = B.eval(dataset.train.data[0:mbs]) pixels = data.shape[1] W0 = B.variable(np.random.normal(size=(pixels, )), dtype=B.floatx(), name='W0') W1 = B.variable(np.random.normal(size=(pixels, )), dtype=B.floatx(), name='W1') params = [W0, W1] inputs = B.placeholder((mbs, pixels), dtype=B.floatx()) loss = B.sum(B.dot(inputs, B.square(W0) + B.square(W1))) updates = optimizer.get_updates(params, loss) f = B.function([inputs], [loss], updates=updates) output = f(data) assert len(output) == 1 assert output[0].size == 1
def predict_on_batch(self, x): """Runs a single gradient update on a single batch of data. # Arguments x: Numpy array of training data, or list of Numpy arrays if the model has multiple inputs. If all inputs in the model are named, you can also pass a dictionary mapping input names to Numpy arrays. # Returns Scalar training loss (if the model has a single output and no metrics) or list of scalars (if the model has multiple outputs and/or metrics). """ # makes the generic indices to access data batch_size = B.eval(x.shape)[0] self.test_index = B.placeholder(shape=(batch_size,), dtype=B.intx(), name='test_index') self.test_data = x index = np.arange(batch_size) self._make_test_function() outputs = self.test_function(index) return outputs
def run_logZ(self): """Performs calculatations of AIS runs. Must be called before estimates. """ # initial sample sample_ls = self.init_sample_ls # this is the inital beta=0 case log_ais_w = B.eval(self.dbm_a.free_energy_sumover_even(sample_ls, 1.0)) log_ais_w = B.variable(log_ais_w, name='log_ais_w') index = B.variable(1, name='index', dtype=B.intx()) scan_out, updates = B.scan(self._update, outputs_info=[log_ais_w, index] + sample_ls, n_steps=self.n_betas - 2, name='scan_ais') log_ais_w = scan_out[0][-1] sample_ls = [s[-1] for s in scan_out[2:]] # this is the final beta=1 case log_ais_w -= self.dbm_b.free_energy_sumover_even(sample_ls, 1.0) logZ_fn = B.function([], [log_ais_w], updates=updates) self.logZ = self.logZa + logZ_fn()
def fit(self, x, batch_size=100, n_epoch=10, callbacks=None, validation_data=None, shuffle=True, initial_epoch=0): """Trains the model for a fixed number of epochs (iterations on a dataset). # Arguments x: Theano shared array of training data batch_size: integer. Number of samples per gradient update. n_epoch: integer, the number of times to iterate over the training data arrays. callbacks: list of callbacks to be called during training. validation_data: Theano shared array of data on which to evaluate the loss and any model metrics at the end of each epoch. The model will not be trained on this data. shuffle: boolean, whether to shuffle the training data before each epoch. initial_epoch: epoch at which to start training (useful for resuming a previous training run) # Returns A `History` instance. Its `history` attribute contains all information collected during training. """ self.train_data = x self.n_train_sample = B.eval(x.shape[0]) self.validation_data = validation_data # makes the generic indices to access data self.train_index = B.placeholder(shape=(batch_size,), dtype=B.intx(), name='train_index') # makes the training functions self._make_train_function() f = self.train_function # preps for validation out_labels = ['cost'] if validation_data: self.valid_index = B.placeholder(shape=(batch_size,), dtype=B.intx(), name='valid_index') callback_metrics = copy.copy(out_labels) + ['val_' + n for n in out_labels] self._make_validation_function() val_f = self.validation_function else: callback_metrics = copy.copy(out_labels) val_f = None # delegate logic to _fit_loop return self._fit_loop(f, out_labels=out_labels, batch_size=batch_size, n_epoch=n_epoch, callbacks=callbacks, val_f=val_f, shuffle=shuffle, callback_metrics=callback_metrics, initial_epoch=initial_epoch)
def __init__(self, dbm, data, n_runs, n_betas=None, betas=None): """ Initialize an object to perform AIS. # Arguments: dbm: DBM object data: numpy array, needed for data base rate model n_runs: int, number of parallel AIS estimates to run n_betas: int, optional. Will create evenly spaced betas. Need either n_betas or betas. betas: numpy array, optional. Betas for intermediate distributions. Need either n_betas or betas. # References: 1. On the quantitative analysis of deep belief networks by R Salakhutdinov and I Murray. ACM 2008. 2. Deep boltzmann machines by R Salakhutdinov and G Hinton. AIS, 2009. """ self.dbm_b = dbm self.n_runs = n_runs if (n_betas is not None) and (betas is None): self.n_betas = n_betas betas = np.linspace(0, 1, n_betas) elif (n_betas is None) and (betas is not None): self.n_betas = betas.shape[0] else: raise ValueError( 'Need to provide at least one of n_betas or betas') self.betas = B.variable(betas, name='betas') # this is the data base rate model of reference 1 # The ais estimate is very sensitive to this base rate model, so the # standard practice in the literature is to use this setup vis_mean = np.clip(np.mean(data, axis=0), B.epsilon(), 1 - B.epsilon()) p_ruslan = (vis_mean + 0.05) / 1.05 b0_a = np.log(p_ruslan / (1 - p_ruslan)).astype(B.floatx()) self.dbm_a = DBM(layer_size_list=self.dbm_b.layer_size_list, topology_dict=self.dbm_b.topology_dict) B.set_value(self.dbm_a.layers[0].b, b0_a) # make the initial sample # visible layer depends on data base rate bias p0 = np.tile(1. / (1 + np.exp(-b0_a)), (n_runs, 1)) s0 = np.array(p0 > np.random.random_sample(p0.shape), dtype=B.floatx()) sample_ls = [s0] # rest of layers are uniform sample for n in self.dbm_b.layer_size_list[1:]: s = B.random_binomial((self.n_runs, n), p=0.5) sample_ls.append(B.variable(B.eval(s))) self.init_sample_ls = sample_ls # this is the exact partition function of the base rate model self.logZa = np.sum(self.dbm_b.layer_size_list[1:]) * np.log(2) self.logZa += np.sum(np.log(1 + np.exp(b0_a))) # This is the sampler for the final model self.dbm_b_sampler = Sampler(self.dbm_b)
def test_free_energy(nnet_type, beta, propup, fe_type): nnet, x = _dbm_prep(nnet_type) if propup: input_ls = nnet.propup(x, beta=beta) else: input_ls = x if fe_type == 'fe': fe = nnet.free_energy(input_ls, beta=beta) elif fe_type == 'fe_odd': fe = nnet.free_energy_sumover_odd(input_ls, beta=beta) elif fe_type == 'fe_even': fe = nnet.free_energy_sumover_even(input_ls, beta=beta) assert B.eval(fe).shape == B.eval(x.shape)[0]
def test_init_standard(nnet_type): nnet = nnet_for_testing(nnet_type) dataset = random.Random('probability') b_ls = [] for layer in nnet.layers: b_ls.append(B.eval(layer.b.shape)) W_ls = [] for synapse in nnet.synapses: W_ls.append(B.eval(synapse.W.shape)) nnet = initializers.init_standard(nnet, dataset) for size, layer in zip(b_ls, nnet.layers): assert size == B.eval(layer.b.shape) for size, synapse in zip(W_ls, nnet.synapses): assert_allclose(size, B.eval(synapse.W.shape))
def init_standard(nnet, dataset): """My standard initialization for a neural network """ # use orthogonal initialization of all W's for synapse in nnet.synapses: W_start = synapse.W shape = B.eval(W_start.shape) W_final = orthogonal(shape) B.set_value(W_start, W_final) # set visible bias based on data # Reference: A Practical Guide to Training Restricted Boltzmann Machines by Geoffrey Hinton pixel_mean = B.mean(dataset.train.data, axis=0) p = B.clip(pixel_mean, B.epsilon(), 1 - B.epsilon()) b_start = nnet.layers[0].b b_final = B.eval(B.log(p / (1 - p))) B.set_value(b_start, b_final) return nnet
def test_base_Sampler(nnet_type, constant, sampler_type): beta = 1.0 nnet = nnet_for_testing(nnet_type) batch_size = 30 constant_ls = [] if constant is not None: constant_ls = [constant] sampler = samplers.Sampler(nnet) input_ls = [ B.variable(np.ones((batch_size, size))) for size in nnet.layer_size_list ] sampler.set_param(beta=beta, constant=constant_ls) if sampler_type == 'probability': prob_ls = sampler.probability(*input_ls) elif sampler_type == 'sample': prob_ls = sampler.sample(*input_ls) elif sampler_type == 'sample_inputs': prob_ls = sampler.sample_inputs(*input_ls) else: raise NotImplementedError assert len(prob_ls) == len(input_ls) for i, p in enumerate(prob_ls): if i in constant_ls: assert p == input_ls[i] else: m = np.ones((batch_size, nnet.layer_size_list[i])) pp = B.eval(p) if sampler_type == 'sample': assert_allclose((pp + np.logical_not(pp)), m) else: assert_allclose(pp, 0.5 * m)
def __init__(self, name, init_b, up_dict=None, down_dict=None, activation=B.sigmoid, regularizer=None): """Creates the layer of neurons. # Arguments: name: str init_b: numpy array, initial bias of neurons up_dict: dict, contains weights that connect lower numbered layers to this layer down_dict: dict, contains weights that connect higher numbered layers to this layer activation: function, internal neuron activity to final activity regularizer: Regularizer object """ super().__init__(name) self.activation = activation self.dim = init_b.shape[0] self.b = B.variable(init_b, name=self.name+'_b') self.trainable_weights = self.b self.regularizer = regularizer if regularizer is not None: self.losses = self.regularizer(self.b) up_dict = dict_2_OrderedDict(up_dict) down_dict = dict_2_OrderedDict(down_dict) self.index_up = list(up_dict.keys()) self.index_down = list(down_dict.keys()) self.W_up = list(up_dict.values()) W_down = list(down_dict.values()) self.W_down = [W.T for W in W_down] # check that the weight shapes are sensible W_all = self.W_up + self.W_down for W in W_all: W_shape = B.eval(W).shape msg = 'Dimension mismatch. Expected {} but {} has shape {}'.format(self.dim, W.name, W_shape) assert W_shape[1] == self.dim, msg # assign layer properties n_up = len(up_dict) n_down = len(down_dict) if n_up > 0 and n_down > 0: self.direction = 'both' # need to compensate for reduced inputs when only going single direction # Reference: Deep boltzmann machines by R Salakhutdinov and G Hinton. AIS, 2009. self._z_up_adj = (n_up+n_down)/n_up self._z_down_adj = (n_up+n_down)/n_down elif n_up > 0 and n_down == 0: self.direction = 'up' elif n_up == 0 and n_down > 0: self.direction = 'down' else: raise ValueError('Both up_dict and down_dict cannot be empty.')
def _fit_loop(self, f, out_labels=None, batch_size=100, n_epoch=100, callbacks=None, val_f=None, shuffle=True, callback_metrics=None, initial_epoch=0): """Abstract fit function for f. Assume that f returns a list, labeled by out_labels. # Arguments f: Backend function returning a list of tensors out_labels: list of strings, display names of the outputs of `f` batch_size: integer batch size n_epoch: number of times to iterate over the data callbacks: list of callbacks to be called during training val_f: Backend function to call for validation shuffle: whether to shuffle the data at the beginning of each epoch callback_metrics: list of strings, the display names of the metrics passed to the callbacks. They should be the concatenation of list the display names of the outputs of `f` and the list of display names of the outputs of `f_val`. initial_epoch: epoch at which to start training (useful for resuming a previous training run) # Returns `History` object. """ time_start = time.time() do_validation = False n_valid_sample = 0 if val_f: do_validation = True n_valid_sample = B.eval(self.validation_data.shape[0]) index_array = np.arange(self.n_train_sample, dtype='int32') self.history = cbks.History() # CSVLogger needs to be second to last callback # otherwise AIS results are not recorded callbacks = callbacks or [] index_csv = None for i, cb in enumerate(callbacks): if isinstance(cb, CSVLogger): index_csv = i if index_csv is not None: cb_csv = callbacks.pop(index_csv) callbacks.append(cb_csv) callbacks = [cbks.BaseLogger()] + callbacks + [self.history] callbacks = cbks.CallbackList(callbacks) out_labels = out_labels or [] callbacks.set_model(self) callbacks.set_params({ 'batch_size': batch_size, 'n_epoch': n_epoch, 'n_sample': self.n_train_sample, 'do_validation': do_validation, 'metrics': callback_metrics or [], }) callbacks.on_train_begin() self.stop_training = False for epoch in range(initial_epoch, n_epoch): callbacks.on_epoch_begin(epoch) if shuffle: np.random.shuffle(index_array) batches = make_batches(self.n_train_sample, batch_size, epoch) epoch_logs = {} for batch_index, (batch_start, batch_end) in enumerate(batches): batch_ids = index_array[batch_start:batch_end] batch_logs = {} batch_logs['batch'] = batch_index batch_logs['size'] = len(batch_ids) callbacks.on_batch_begin(batch_index, batch_logs) # actual training outs = f(batch_ids) if not isinstance(outs, list): outs = [outs] for l, o in zip(out_labels, outs): batch_logs[l] = o callbacks.on_batch_end(batch_index, batch_logs) if batch_index == len(batches) - 1: # last batch # validation if do_validation: val_outs = self._valid_loop(val_f, n_valid_sample, batch_size=batch_size) if not isinstance(val_outs, list): val_outs = [val_outs] # same labels assumed for l, o in zip(out_labels, val_outs): epoch_logs['val_' + l] = o callbacks.on_epoch_end(epoch, epoch_logs) if self.stop_training: break # Tracks the timing of everything except train_end # Skips train_end otherwise timing can't be included in summary callback fit_total_time = time.time() - time_start fit_callback_time = callbacks.cb_time self.history.fit_total_time = fit_total_time self.history.fit_callback_time = fit_callback_time self.history.fit_train_time = fit_total_time - fit_callback_time callbacks.on_train_end() return self.history
def test_fast(): name = 'fast' datatype_ls = ['probability', 'sampled', 'threshold'] # delete files if they exist filepath = os.path.dirname(os.path.abspath(__file__)) folder = os.path.abspath(os.path.join(filepath, '..', '..', 'data')) for datatype in datatype_ls: filename = os.path.join(folder, name + '_' + datatype + '.npz') try: os.remove(filename) except OSError: pass train_samples = 1000 other_samples = 100 # this checks on creating and loading datasets for datatype in datatype_ls: data = fast.Fast(datatype) assert B.eval(data.train.data).shape[0] == train_samples assert B.eval(data.valid.data).shape[0] == other_samples assert B.eval(data.test.data).shape[0] == other_samples assert B.eval(data.train.lbl).shape[0] == train_samples assert B.eval(data.valid.lbl).shape[0] == other_samples assert B.eval(data.test.lbl).shape[0] == other_samples # this checks on loading existing for datatype in datatype_ls: data = fast.Fast(datatype) assert B.eval(data.train.data).shape[0] == train_samples assert B.eval(data.valid.data).shape[0] == other_samples assert B.eval(data.test.data).shape[0] == other_samples assert B.eval(data.train.lbl).shape[0] == train_samples assert B.eval(data.valid.lbl).shape[0] == other_samples assert B.eval(data.test.lbl).shape[0] == other_samples