def init_variables(self): self.variables.update( h_samples=theano.shared( name='algo:rbm/matrix:hidden-samples', value=asint(np.zeros((self.batch_size, self.n_hidden))), ), )
def test_asint(self): int2float_types = { 'float32': 'int32', 'float64': 'int64', } int_type = int2float_types[theano.config.floatX] # Sparse matrix sparse_matrix = csr_matrix((3, 4), dtype=np.int8) self.assertIs(sparse_matrix, asint(sparse_matrix)) # Numpy array-like elements x = np.array([1, 2, 3], dtype=int_type) self.assertIs(x, asint(x)) x = np.array([1, 2, 3], dtype=np.int8) self.assertIsNot(x, asint(x)) # Python list x = [1, 2, 3] self.assertEqual(asint(x).shape, (3, )) # Theano variables x = theano.tensor.fmatrix() self.assertNotEqual(x.dtype, int_type) self.assertEqual(asint(x).dtype, int_type)
def test_asint(self): int2float_types = { 'float32': 'int32', 'float64': 'int64', } int_type = int2float_types[theano.config.floatX] # Sparse matrix sparse_matrix = csr_matrix((3, 4), dtype=np.int8) self.assertIs(sparse_matrix, asint(sparse_matrix)) # Numpy array-like elements x = np.array([1, 2, 3], dtype=int_type) self.assertIs(x, asint(x)) x = np.array([1, 2, 3], dtype=np.int8) self.assertIsNot(x, asint(x)) # Python list x = [1, 2, 3] self.assertEqual(asint(x).shape, (3,)) # Theano variables x = theano.tensor.fmatrix() self.assertNotEqual(x.dtype, int_type) self.assertEqual(asint(x).dtype, int_type)
def categorical_hinge(expected, predicted, delta=1): """ Computes the multi-class hinge loss between predictions and targets. .. math:: hinge_{i}(t, o) = \\max_{j \\not = o_i} (0, t_j - t_{o_i} + \\delta) Parameters ---------- expected : Theano 2D tensor or 1D tensor Either a vector of int giving the correct class index per data point or a 2D tensor of one-hot encoding of the correct class in the same layout as predictions (non-binary targets in [0, 1] do not work!). predicted : Theano 2D tensor Predictions in (0, 1), such as softmax output of a neural network, with data points in rows and class probabilities in columns. delta : scalar The hinge loss margin. Defaults to ``1``. Returns ------- Theano 1D tensor An expression for the average multi-class hinge loss. Notes ----- This is an alternative to the categorical cross-entropy loss for multi-class classification problems. """ n_classes = predicted.shape[1] if expected.ndim == (predicted.ndim - 1): expected = T.extra_ops.to_one_hot(asint(expected), n_classes) if expected.ndim != predicted.ndim: raise TypeError('Rank mismatch between expected and prediced values') invalid_class_indeces = expected.nonzero() valid_class_indeces = (1 - expected).nonzero() new_shape = (-1, n_classes - 1) rest = T.reshape(predicted[valid_class_indeces], new_shape) rest = T.max(rest, axis=1) corrects = predicted[invalid_class_indeces] error = T.nnet.relu(rest - corrects + delta) return error.mean()
def output(self, Q, input_state_1, input_state_2): # Number of samples dependce on the state batch size. # Each iteration we can try to predict direction from # multiple different starting points at the same time. n_states = input_state_1.shape[1] # Output is a matrix that has n_samples * n_states rows # and n_filters (which is Q.shape[1]) columns. return Q[ # Numer of repetitions depends on the size of # the state batch T.extra_ops.repeat(T.arange(Q.shape[0]), n_states), # Extract all channels :, # Each state is a coordinate (x and y) # that point to some place on a grid. asint(input_state_1.flatten()), asint(input_state_2.flatten()), ]
def init_methods(self): def free_energy(visible_sample): wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias visible_bias_term = T.dot(visible_sample, self.visible_bias) hidden_term = T.log(asfloat(1) + T.exp(wx_b)).sum(axis=1) return -visible_bias_term - hidden_term def visible_to_hidden(visible_sample): wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias return T.nnet.sigmoid(wx_b) def hidden_to_visible(hidden_sample): wx_b = T.dot(hidden_sample, self.weight.T) + self.visible_bias return T.nnet.sigmoid(wx_b) def sample_hidden_from_visible(visible_sample): theano_random = self.theano_random hidden_prob = visible_to_hidden(visible_sample) hidden_sample = theano_random.binomial(n=1, p=hidden_prob, dtype=theano.config.floatX) return hidden_sample def sample_visible_from_hidden(hidden_sample): theano_random = self.theano_random visible_prob = hidden_to_visible(hidden_sample) visible_sample = theano_random.binomial(n=1, p=visible_prob, dtype=theano.config.floatX) return visible_sample network_input = self.variables.network_input n_samples = asfloat(network_input.shape[0]) theano_random = self.theano_random weight = self.weight h_bias = self.hidden_bias v_bias = self.visible_bias h_samples = self.variables.h_samples step = asfloat(self.step) sample_indeces = theano_random.random_integers( low=0, high=n_samples - 1, size=(self.batch_size, )) v_pos = ifelse( T.eq(n_samples, self.batch_size), network_input, # In case if final batch has less number of # samples then expected network_input[sample_indeces]) h_pos = visible_to_hidden(v_pos) v_neg = sample_visible_from_hidden(h_samples) h_neg = visible_to_hidden(v_neg) weight_update = v_pos.T.dot(h_pos) - v_neg.T.dot(h_neg) h_bias_update = (h_pos - h_neg).mean(axis=0) v_bias_update = (v_pos - v_neg).mean(axis=0) # Stochastic pseudo-likelihood feature_index_to_flip = theano_random.random_integers( low=0, high=self.n_visible - 1, ) rounded_input = T.round(network_input) rounded_input = network_input rounded_input_flip = T.set_subtensor( rounded_input[:, feature_index_to_flip], 1 - rounded_input[:, feature_index_to_flip]) error = T.mean(self.n_visible * T.log( T.nnet.sigmoid( free_energy(rounded_input_flip) - free_energy(rounded_input)))) self.methods.update(train_epoch=theano.function( [network_input], error, name='algo:rbm/func:train-epoch', updates=[ (weight, weight + step * weight_update / n_samples), (h_bias, h_bias + step * h_bias_update), (v_bias, v_bias + step * v_bias_update), (h_samples, asint(theano_random.binomial(n=1, p=h_neg))), ]), prediction_error=theano.function( [network_input], error, name='algo:rbm/func:prediction-error', ), visible_to_hidden=theano.function( [network_input], visible_to_hidden(network_input), name='algo:rbm/func:visible-to-hidden', ), hidden_to_visible=theano.function( [network_input], hidden_to_visible(network_input), name='algo:rbm/func:hidden-to-visible', ), gibbs_sampling=theano.function( [network_input], sample_visible_from_hidden( sample_hidden_from_visible(network_input)), name='algo:rbm/func:gibbs-sampling', ))
def output(self, input_value): return self.weight[asint(input_value)]
def init_methods(self): def free_energy(visible_sample): wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias visible_bias_term = T.dot(visible_sample, self.visible_bias) hidden_term = T.log(asfloat(1) + T.exp(wx_b)).sum(axis=1) return -visible_bias_term - hidden_term def visible_to_hidden(visible_sample): wx_b = T.dot(visible_sample, self.weight) + self.hidden_bias return T.nnet.sigmoid(wx_b) def hidden_to_visible(hidden_sample): wx_b = T.dot(hidden_sample, self.weight.T) + self.visible_bias return T.nnet.sigmoid(wx_b) def sample_hidden_from_visible(visible_sample): theano_random = self.theano_random hidden_prob = visible_to_hidden(visible_sample) hidden_sample = theano_random.binomial(n=1, p=hidden_prob, dtype=theano.config.floatX) return hidden_sample def sample_visible_from_hidden(hidden_sample): theano_random = self.theano_random visible_prob = hidden_to_visible(hidden_sample) visible_sample = theano_random.binomial(n=1, p=visible_prob, dtype=theano.config.floatX) return visible_sample network_input = self.variables.network_input n_samples = asfloat(network_input.shape[0]) theano_random = self.theano_random weight = self.weight h_bias = self.hidden_bias v_bias = self.visible_bias h_samples = self.variables.h_samples step = asfloat(self.step) sample_indeces = theano_random.random_integers( low=0, high=n_samples - 1, size=(self.batch_size,) ) v_pos = ifelse( T.eq(n_samples, self.batch_size), network_input, # In case if final batch has less number of # samples then expected network_input[sample_indeces] ) h_pos = visible_to_hidden(v_pos) v_neg = sample_visible_from_hidden(h_samples) h_neg = visible_to_hidden(v_neg) weight_update = v_pos.T.dot(h_pos) - v_neg.T.dot(h_neg) h_bias_update = (h_pos - h_neg).mean(axis=0) v_bias_update = (v_pos - v_neg).mean(axis=0) # Stochastic pseudo-likelihood feature_index_to_flip = theano_random.random_integers( low=0, high=self.n_visible - 1, ) rounded_input = T.round(network_input) rounded_input = network_input rounded_input_flip = T.set_subtensor( rounded_input[:, feature_index_to_flip], 1 - rounded_input[:, feature_index_to_flip] ) error = T.mean( self.n_visible * T.log(T.nnet.sigmoid( free_energy(rounded_input_flip) - free_energy(rounded_input) )) ) self.methods.update( train_epoch=theano.function( [network_input], error, name='algo:rbm/func:train-epoch', updates=[ (weight, weight + step * weight_update / n_samples), (h_bias, h_bias + step * h_bias_update), (v_bias, v_bias + step * v_bias_update), (h_samples, asint(theano_random.binomial(n=1, p=h_neg))), ] ), prediction_error=theano.function( [network_input], error, name='algo:rbm/func:prediction-error', ), visible_to_hidden=theano.function( [network_input], visible_to_hidden(network_input), name='algo:rbm/func:visible-to-hidden', ), hidden_to_visible=theano.function( [network_input], hidden_to_visible(network_input), name='algo:rbm/func:hidden-to-visible', ), gibbs_sampling=theano.function( [network_input], sample_visible_from_hidden( sample_hidden_from_visible(network_input) ), name='algo:rbm/func:gibbs-sampling', ) )
def loss_function(expected, predicted): epsilon = 1e-7 log_predicted = T.log(T.clip(predicted, epsilon, 1.0 - epsilon)) errors = log_predicted[T.arange(expected.size), asint(expected.flatten())] return -T.mean(errors)
def init_variables(self): self.init_layers() self.variables.update(h_samples=theano.shared( name='h_samples', value=asint(np.zeros((self.batch_size, self.n_hidden))), ), )