def expected_energy(self, V_hat, H_hat): """ expected energy of the model under the mean field distribution defined by V_hat and H_hat alternately, could be expectation of the energy function across a batch of examples, where every element of V_hat and H_hat is a binary observation """ V_name = make_name(V_hat, 'anon_V_hat') assert isinstance(H_hat, (list,tuple)) H_names = [] for i in xrange(len(H_hat)): H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) )) m = V_hat.shape[0] m.name = V_name + '.shape[0]' assert len(H_hat) == len(self.rbms) v = T.mean(V_hat, axis=0) v_bias_contrib = T.dot(v, self.bias_vis) exp_vh = T.dot(V_hat.T,H_hat[0]) / m v_weights_contrib = T.sum(self.W[0] * exp_vh) v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')' total = v_bias_contrib + v_weights_contrib for i in xrange(len(H_hat) - 1): lower_H = H_hat[i] low = T.mean(lower_H, axis = 0) higher_H = H_hat[i+1] exp_lh = T.dot(lower_H.T, higher_H) / m lower_bias = self.bias_hid[i] W = self.W[i+1] lower_bias_contrib = T.dot(low, lower_bias) weights_contrib = T.sum( W * exp_lh) / m total = total + lower_bias_contrib + weights_contrib highest_bias_contrib = T.dot(T.mean(H_hat[-1],axis=0), self.bias_hid[-1]) total = total + highest_bias_contrib assert len(total.type.broadcastable) == 0 rval = - total #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')' return rval
def __call__(self, X, Y=None, X_space=None): """ .. todo:: WRITEME Note that calling this repeatedly will yield the same random numbers each time. """ assert X_space is not None self.called = True assert X.dtype == config.floatX if not hasattr(self, 'seed'): self.seed = default_seed theano_rng = RandomStreams(self.seed) if X.ndim == 2 and self.sync_channels: raise NotImplementedError() p = self.drop_prob if not hasattr(self, 'drop_prob_y') or self.drop_prob_y is None: yp = p else: yp = self.drop_prob_y batch_size = X_space.batch_size(X) if self.balance: flip = theano_rng.binomial(size=(batch_size, ), p=0.5, n=1, dtype=X.dtype) yp = flip * (1 - p) + (1 - flip) * p dimshuffle_args = ['x'] * X.ndim if X.ndim == 2: dimshuffle_args[0] = 0 assert not self.sync_channels else: dimshuffle_args[X_space.axes.index('b')] = 0 if self.sync_channels: del dimshuffle_args[X_space.axes.index('c')] flip = flip.dimshuffle(*dimshuffle_args) p = flip * (1 - p) + (1 - flip) * p #size needs to have a fixed length at compile time or the #theano random number generator will be angry size = tuple([X.shape[i] for i in xrange(X.ndim)]) if self.sync_channels: del size[X_space.axes.index('c')] drop_mask = theano_rng.binomial(size=size, p=p, n=1, dtype=X.dtype) X_name = make_name(X, 'anon_X') drop_mask.name = 'drop_mask(%s)' % X_name if Y is not None: assert isinstance(yp, float) or yp.ndim < 2 drop_mask_Y = theano_rng.binomial(size=(batch_size, ), p=yp, n=1, dtype=X.dtype) assert drop_mask_Y.ndim == 1 Y_name = make_name(Y, 'anon_Y') drop_mask_Y.name = 'drop_mask_Y(%s)' % Y_name #drop_mask = Print('drop_mask',attrs=['sum'])(drop_mask) #drop_mask_Y = Print('drop_mask_Y',attrs=['sum'])(drop_mask_Y) return drop_mask, drop_mask_Y return drop_mask
def expected_energy_batch(self, V_hat, H_hat, no_v_bias = False): """ expected energy of the model under the mean field distribution defined by V_hat and H_hat alternately, could be expectation of the energy function across a batch of examples, where every element of V_hat and H_hat is a binary observation if no_v_bias is True, ignores the contribution from biases on visible units """ warnings.warn("TODO: write unit test verifying expected_energy_batch/m = expected_energy") V_name = make_name(V_hat, 'anon_V_hat') assert isinstance(H_hat, (list,tuple)) H_names = [] for i in xrange(len(H_hat)): H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) )) assert len(H_hat) == len(self.rbms) if no_v_bias: v_bias_contrib = 0. else: v_bias_contrib = T.dot(V_hat, self.bias_vis) assert len(V_hat.type.broadcastable) == 2 assert len(self.W[0].type.broadcastable) == 2 assert len(H_hat[0].type.broadcastable) == 2 interm1 = T.dot(V_hat, self.W[0]) assert len(interm1.type.broadcastable) == 2 interm2 = interm1 * H_hat[0] assert len(interm2.type.broadcastable) == 2 v_weights_contrib = interm2.sum(axis=1) v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')' assert len(v_weights_contrib.type.broadcastable) == 1 total = v_bias_contrib + v_weights_contrib for i in xrange(len(H_hat) - 1): lower_H = H_hat[i] higher_H = H_hat[i+1] #exp_lh = T.dot(lower_H.T, higher_H) / m lower_bias = self.bias_hid[i] W = self.W[i+1] lower_bias_contrib = T.dot(lower_H, lower_bias) #weights_contrib = T.sum( W * exp_lh) / m weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1) cur_contrib = lower_bias_contrib + weights_contrib assert len(cur_contrib.type.broadcastable) == 1 total = total + cur_contrib highest_bias_contrib = T.dot(H_hat[-1], self.bias_hid[-1]) total = total + highest_bias_contrib assert len(total.type.broadcastable) == 1 rval = - total #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')' return rval
def expected_energy(self, V_hat, H_hat, Y_hat = None, no_v_bias = False): """ expected energy of the model under the mean field distribution defined by V_hat and H_hat alternately, could be expectation of the energy function across a batch of examples, where every element of V_hat and H_hat is a binary observation if no_v_bias is True, ignores the contribution from biases on visible units """ assert (Y_hat is None) == (self.num_classes == 0) V_name = make_name(V_hat, 'anon_V_hat') assert isinstance(H_hat, (list,tuple)) H_names = [] for i in xrange(len(H_hat)): H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) )) m = V_hat.shape[0] m.name = V_name + '.shape[0]' assert len(H_hat) == len(self.rbms) v = T.mean(V_hat, axis=0) if no_v_bias: v_bias_contrib = 0. else: v_bias_contrib = T.dot(v, self.bias_vis) #exp_vh = T.dot(V_hat.T,H_hat[0]) / m #v_weights_contrib = T.sum(self.W[0] * exp_vh) v_weights_contrib = (T.dot(V_hat, self.W[0]) * H_hat[0]).sum(axis=1).mean() v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')' total = v_bias_contrib + v_weights_contrib for i in xrange(len(H_hat) - 1): lower_H = H_hat[i] low = T.mean(lower_H, axis = 0) higher_H = H_hat[i+1] #exp_lh = T.dot(lower_H.T, higher_H) / m lower_bias = self.bias_hid[i] W = self.W[i+1] lower_bias_contrib = T.dot(low, lower_bias) #weights_contrib = T.sum( W * exp_lh) / m weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1).mean() total = total + lower_bias_contrib + weights_contrib highest_bias_contrib = T.dot(T.mean(H_hat[-1],axis=0), self.bias_hid[-1]) total = total + highest_bias_contrib assert len(total.type.broadcastable) == 0 if Y_hat is not None: weights_contrib = (T.dot(H_hat[-1], self.W_class) * Y_hat).sum(axis=1).mean() bias_contrib = T.dot(T.mean(Y_hat,axis=0), self.bias_class) total = total + weights_contrib + bias_contrib rval = - total #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')' return rval
def expected_energy_batch(self, V_hat, H_hat, Y_hat = None, no_v_bias = False): """ expected energy of the model under the mean field distribution defined by V_hat and H_hat alternately, could be expectation of the energy function across a batch of examples, where every element of V_hat and H_hat is a binary observation if no_v_bias is True, ignores the contribution from biases on visible units """ warnings.warn("TODO: write unit test verifying expected_energy_batch/m = expected_energy") assert (Y_hat is None) == (self.num_classes == 0) V_name = make_name(V_hat, 'anon_V_hat') assert isinstance(H_hat, (list,tuple)) H_names = [] for i in xrange(len(H_hat)): H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) )) assert len(H_hat) == len(self.rbms) if no_v_bias: v_bias_contrib = 0. else: v_bias_contrib = T.dot(V_hat, self.bias_vis) assert len(V_hat.type.broadcastable) == 2 assert len(self.W[0].type.broadcastable) == 2 assert len(H_hat[0].type.broadcastable) == 2 interm1 = T.dot(V_hat, self.W[0]) assert len(interm1.type.broadcastable) == 2 interm2 = interm1 * H_hat[0] assert len(interm2.type.broadcastable) == 2 v_weights_contrib = interm2.sum(axis=1) v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')' assert len(v_weights_contrib.type.broadcastable) == 1 total = v_bias_contrib + v_weights_contrib for i in xrange(len(H_hat) - 1): lower_H = H_hat[i] higher_H = H_hat[i+1] #exp_lh = T.dot(lower_H.T, higher_H) / m lower_bias = self.bias_hid[i] W = self.W[i+1] lower_bias_contrib = T.dot(lower_H, lower_bias) #weights_contrib = T.sum( W * exp_lh) / m weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1) cur_contrib = lower_bias_contrib + weights_contrib assert len(cur_contrib.type.broadcastable) == 1 total = total + cur_contrib highest_bias_contrib = T.dot(H_hat[-1], self.bias_hid[-1]) total = total + highest_bias_contrib if Y_hat is not None: weights_contrib = (T.dot(H_hat[-1], self.W_class) * Y_hat).sum(axis=1) assert weights_contrib.ndim == 1 bias_contrib = T.dot(Y_hat, self.bias_class) assert bias_contrib.ndim == 1 total = total + weights_contrib + bias_contrib assert len(total.type.broadcastable) == 1 rval = - total #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')' return rval
def expected_energy(self, V_hat, H_hat, Y_hat = None, no_v_bias = False): """ .. todo:: WRITEME properly expected energy of the model under the mean field distribution defined by V_hat and H_hat alternately, could be expectation of the energy function across a batch of examples, where every element of V_hat and H_hat is a binary observation if no_v_bias is True, ignores the contribution from biases on visible units """ assert (Y_hat is None) == (self.num_classes == 0) V_name = make_name(V_hat, 'anon_V_hat') assert isinstance(H_hat, (list,tuple)) H_names = [] for i in xrange(len(H_hat)): H_names.append( make_name(H_hat[i], 'anon_H_hat[%d]' %(i,) )) m = V_hat.shape[0] m.name = V_name + '.shape[0]' assert len(H_hat) == len(self.rbms) v = T.mean(V_hat, axis=0) if no_v_bias: v_bias_contrib = 0. else: v_bias_contrib = T.dot(v, self.bias_vis) #exp_vh = T.dot(V_hat.T,H_hat[0]) / m #v_weights_contrib = T.sum(self.W[0] * exp_vh) v_weights_contrib = (T.dot(V_hat, self.W[0]) * H_hat[0]).sum(axis=1).mean() v_weights_contrib.name = 'v_weights_contrib('+V_name+','+H_names[0]+')' total = v_bias_contrib + v_weights_contrib for i in xrange(len(H_hat) - 1): lower_H = H_hat[i] low = T.mean(lower_H, axis = 0) higher_H = H_hat[i+1] #exp_lh = T.dot(lower_H.T, higher_H) / m lower_bias = self.bias_hid[i] W = self.W[i+1] lower_bias_contrib = T.dot(low, lower_bias) #weights_contrib = T.sum( W * exp_lh) / m weights_contrib = (T.dot(lower_H, W) * higher_H).sum(axis=1).mean() total = total + lower_bias_contrib + weights_contrib highest_bias_contrib = T.dot(T.mean(H_hat[-1],axis=0), self.bias_hid[-1]) total = total + highest_bias_contrib assert len(total.type.broadcastable) == 0 if Y_hat is not None: weights_contrib = (T.dot(H_hat[-1], self.W_class) * Y_hat).sum(axis=1).mean() bias_contrib = T.dot(T.mean(Y_hat,axis=0), self.bias_class) total = total + weights_contrib + bias_contrib rval = - total #rval.name = 'dbm_expected_energy('+V_name+','+str(H_names)+')' return rval
def __call__(self, X, Y = None, X_space=None): """ Provides the mask for multi-prediction training. A 1 in the mask corresponds to a variable that should be used as an input to the inference process. A 0 corresponds to a variable that should be used as a prediction target of the multi-prediction training criterion. Parameters ---------- X : Variable A batch of input features to mask for multi-prediction training Y : Variable A batch of input class labels to mask for multi-prediction Training Returns ------- drop_mask : Variable A Theano expression for a random binary mask in the same shape as `X` drop_mask_Y : Variable, only returned if `Y` is not None A Theano expression for a random binary mask in the same shape as `Y` Notes ----- Calling this repeatedly will yield the same random numbers each time. """ assert X_space is not None self.called = True assert X.dtype == config.floatX theano_rng = make_theano_rng(getattr(self, 'seed', None), default_seed, which_method="binomial") if X.ndim == 2 and self.sync_channels: raise NotImplementedError() p = self.drop_prob if not hasattr(self, 'drop_prob_y') or self.drop_prob_y is None: yp = p else: yp = self.drop_prob_y batch_size = X_space.batch_size(X) if self.balance: flip = theano_rng.binomial( size = (batch_size,), p = 0.5, n = 1, dtype = X.dtype) yp = flip * (1-p) + (1-flip) * p dimshuffle_args = ['x'] * X.ndim if X.ndim == 2: dimshuffle_args[0] = 0 assert not self.sync_channels else: dimshuffle_args[X_space.axes.index('b')] = 0 if self.sync_channels: del dimshuffle_args[X_space.axes.index('c')] flip = flip.dimshuffle(*dimshuffle_args) p = flip * (1-p) + (1-flip) * p # size needs to have a fixed length at compile time or the # theano random number generator will be angry size = tuple([ X.shape[i] for i in xrange(X.ndim) ]) if self.sync_channels: del size[X_space.axes.index('c')] drop_mask = theano_rng.binomial( size = size, p = p, n = 1, dtype = X.dtype) X_name = make_name(X, 'anon_X') drop_mask.name = 'drop_mask(%s)' % X_name if Y is not None: assert isinstance(yp, float) or yp.ndim < 2 drop_mask_Y = theano_rng.binomial( size = (batch_size, ), p = yp, n = 1, dtype = X.dtype) assert drop_mask_Y.ndim == 1 Y_name = make_name(Y, 'anon_Y') drop_mask_Y.name = 'drop_mask_Y(%s)' % Y_name return drop_mask, drop_mask_Y return drop_mask
def __call__(self, X, Y = None, X_space=None): """ .. todo:: WRITEME Note that calling this repeatedly will yield the same random numbers each time. """ assert X_space is not None self.called = True assert X.dtype == config.floatX if not hasattr(self, 'seed'): self.seed = default_seed theano_rng = RandomStreams(self.seed) if X.ndim == 2 and self.sync_channels: raise NotImplementedError() p = self.drop_prob if not hasattr(self, 'drop_prob_y') or self.drop_prob_y is None: yp = p else: yp = self.drop_prob_y batch_size = X_space.batch_size(X) if self.balance: flip = theano_rng.binomial( size = (batch_size,), p = 0.5, n = 1, dtype = X.dtype) yp = flip * (1-p) + (1-flip) * p dimshuffle_args = ['x'] * X.ndim if X.ndim == 2: dimshuffle_args[0] = 0 assert not self.sync_channels else: dimshuffle_args[X_space.axes.index('b')] = 0 if self.sync_channels: del dimshuffle_args[X_space.axes.index('c')] flip = flip.dimshuffle(*dimshuffle_args) p = flip * (1-p) + (1-flip) * p #size needs to have a fixed length at compile time or the #theano random number generator will be angry size = tuple([ X.shape[i] for i in xrange(X.ndim) ]) if self.sync_channels: del size[X_space.axes.index('c')] drop_mask = theano_rng.binomial( size = size, p = p, n = 1, dtype = X.dtype) X_name = make_name(X, 'anon_X') drop_mask.name = 'drop_mask(%s)' % X_name if Y is not None: assert isinstance(yp, float) or yp.ndim < 2 drop_mask_Y = theano_rng.binomial( size = (batch_size, ), p = yp, n = 1, dtype = X.dtype) assert drop_mask_Y.ndim == 1 Y_name = make_name(Y, 'anon_Y') drop_mask_Y.name = 'drop_mask_Y(%s)' % Y_name #drop_mask = Print('drop_mask',attrs=['sum'])(drop_mask) #drop_mask_Y = Print('drop_mask_Y',attrs=['sum'])(drop_mask_Y) return drop_mask, drop_mask_Y return drop_mask