def get_pseudo_likelihood_cost(self, updates): """Stochastic approximation to the pseudo-likelihood""" # index of bit i in expression p(x_i | x_{\i}) bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = tensor.round(self.input) # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns # the result to xi_flip, instead of working in place on xi. xi_flip = tensor.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) cost = - tensor.mean(self.n_visible * nnet.softplus(fe_xi - fe_xi_flip)) # increment bit_i_idx % number as part of updates updates[bit_i_idx] = (bit_i_idx + 1) % self.n_visible return cost
def free_energy_given_s_h(self, s, h, With_fast=False): alpha = self.get_alpha(With_fast) mu = self.get_mu(With_fast) W = self.get_filters(With_fast) h_bias = self.get_h_bias(With_fast) conv_v_bias = self.get_conv_v_bias(With_fast) out_softplus = 0.5*alpha*(s**2) - alpha*mu*s*h + 0.5*alpha*(mu**2)*h - h_bias*h rval = tensor.sum(out_softplus,axis=[1,2,3]) - tensor.sum(nnet.softplus(self.convdot(s*h, W)+conv_v_bias),axis=[1,2,3]) assert rval.ndim==1 return rval
def free_energy_given_v(self, v): # This is accurate up to a multiplicative constant # because I dropped some terms involving 2pi def pre_sigmoid(x): assert x.owner and x.owner.op == nnet.sigmoid return x.owner.inputs[0] pre_convhs_h = pre_sigmoid(self.mean_convhs_h_given_v(v)) rval = tensor.add( -tensor.sum(nnet.softplus(pre_convhs_h),axis=[1,2,3,4]), #the shape of pre_convhs_h: 64 x 11 x 32 x 8 x 8 (0.5/self.sigma) * tensor.sum((v-self.bias_v)**2, axis=[1,2,3]), #shape: 64 x 1 x 98 x 98 ) assert rval.ndim==1 return rval
def free_energy(self, sample, type='vis'): ''' Function to compute the free energy ''' assert type in ('vis','hid') if type is 'vis': wx_b = T.dot(sample, self.W) + self.hbias bias_term = T.dot(sample, self.vbias) else: wx_b = T.dot(sample, self.W.T) + self.vbias bias_term = T.dot(sample, self.hbias) hidden_term = T.sum(nnet.softplus(wx_b),axis = 1) #hidden_term = T.sum(T.log(1 + T.exp(wx_b)),axis = 1) return -hidden_term - bias_term
def free_energy_given_v(self, v, With_fast=False): # This is accurate up to a multiplicative constant # because I dropped some terms involving 2pi def pre_sigmoid(x): assert x.owner and x.owner.op == nnet.sigmoid return x.owner.inputs[0] pre_convhs_h = pre_sigmoid(self.mean_convhs_h_given_v(v,With_fast)) rval = tensor.add( -tensor.sum(nnet.softplus(pre_convhs_h),axis=[1,2,3,4]), #the shape of pre_convhs_h: 64 x 11 x 32 x 8 x 8 0.5 * tensor.sum(self.get_v_prec(With_fast) * (v**2), axis=[1,2,3]), #shape: 64 x 1 x 98 x 98 ) assert rval.ndim==1 return rval
def free_energy_given_v(self, v, With_fast=False): # This is accurate up to a multiplicative constant # because I dropped some terms involving 2pi def pre_sigmoid(x): assert x.owner and x.owner.op == nnet.sigmoid return x.owner.inputs[0] pre_convhs_h = pre_sigmoid(self.mean_convhs_h_given_v(v, With_fast)) rval = tensor.add( -tensor.sum(nnet.softplus(pre_convhs_h), axis=[ 1, 2, 3, 4 ]), #the shape of pre_convhs_h: 64 x 11 x 32 x 8 x 8 0.5 * tensor.sum(self.get_v_prec(With_fast) * (v**2), axis=[1, 2, 3]), #shape: 64 x 1 x 98 x 98 ) assert rval.ndim == 1 return rval
def free_energy_given_v(self, v, With_fast=False): # This is accurate up to a multiplicative constant # because I dropped some terms involving 2pi alpha = self.get_conv_alpha(With_fast) W = self.get_filters_hs(With_fast) vW = self.convdot(v, W) vW_broadcastable = vW.dimshuffle(0,3,4,1,2) #change 64 x 11 x 32 x 8 x 8 to 64 x 8 x 8 x 11 x 32 for broadcasting pre_convhs_h_parts = self.get_conv_mu(With_fast)*vW_broadcastable + self.get_conv_bias_hs(With_fast) + 0.5*(vW_broadcastable**2)/alpha pre_convhs_h = tensor.add( pre_convhs_h_parts.dimshuffle(0,3,4,1,2), -0.5*self.conv_problem_term(v,With_fast)) rval = tensor.add( -tensor.sum(nnet.softplus(pre_convhs_h),axis=[1,2,3,4]), #the shape of pre_convhs_h: 64 x 11 x 32 x 8 x 8 0.5 * tensor.sum(self.get_v_prec(With_fast) * (v**2), axis=[1,2,3]), #shape: 64 x 1 x 98 x 98 ) assert rval.ndim==1 return rval
def free_energy_given_h(self, h): """ Calculate the free energy of a hidden unit configuration by marginalizing over the visible units. Parameters ---------- h : tensor_like Theano symbolic representing the hidden unit states, with the first dimension indexing training examples and the second indexing data dimensions. Returns ------- f : tensor_like 1-dimensional tensor (vector) representing the free energy associated with each row of v. """ sigmoid_arg = self.input_to_v_from_h(h) return -tensor.dot(h, self.bias_hid) - nnet.softplus(sigmoid_arg).sum(axis=1)
def free_energy_given_h(self, h): """ Calculate the free energy of a hidden unit configuration by marginalizing over the visible units. Parameters ---------- h : tensor_like Theano symbolic representing the hidden unit states, with the first dimension indexing training examples and the second indexing data dimensions. Returns ------- f : tensor_like 1-dimensional tensor (vector) representing the free energy associated with each row of v. """ sigmoid_arg = self.input_to_v_from_h(h) return (-tensor.dot(h, self.hidbias) - nnet.softplus(sigmoid_arg).sum(axis=1))
def free_energy_given_v(self, v): """ Calculate the free energy of a visible unit configuration by marginalizing over the hidden units. Parameters ---------- v : tensor_like Theano symbolic representing the hidden unit states for a batch of training examples, with the first dimension indexing training examples and the second indexing data dimensions. Returns ------- f : tensor_like 0-dimensional tensor (i.e. effectively a scalar) representing the free energy of the visible unit configuration. """ hid_inp = self.input_to_h_from_v(v) squared_term = (self.visbias - v) ** 2 / self.sigma return squared_term.sum(axis=1) - nnet.softplus(hid_inp).sum(axis=1)
def free_energy_given_v(self, v): sigmoid_arg = self.input_to_h_from_v(v) return (-T.dot(v, self.get_bv()) - nnet.softplus(sigmoid_arg).sum())
def theano_softplus(self, x): return nets.softplus(x)
def __init__(self, rng, input, filter_shape, poolsize=(2,2), stride=None, if_pool=False, act=None, share_with=None, tied=None, border_mode='valid'): self.input = input if share_with: self.W = share_with.W self.b = share_with.b self.W_delta = share_with.W_delta self.b_delta = share_with.b_delta elif tied: self.W = tied.W.dimshuffle(1,0,2,3) self.b = tied.b self.W_delta = tied.W_delta.dimshuffle(1,0,2,3) self.b_delta = tied.b_delta else: fan_in = np.prod(filter_shape[1:]) poolsize_size = np.prod(poolsize) if poolsize else 1 fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.W_delta = theano.shared( np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True ) # b_update_values = np.zeros((5,filter_shape[0]), dtype=theano.config.floatX) self.b_delta = theano.shared(value=b_values, borrow=True) #EHA: define update history for momentum gradient # self.W_update = theano.shared( # np.zeros(filter_shape, dtype=theano.config.floatX), # borrow=True # ) # # # b_update_values = np.zeros((5,filter_shape[0]), dtype=theano.config.floatX) # self.b_update = theano.shared(value=b_values, borrow=True) #ipdb.set_trace() conv_out = nnet.conv2d( input=input, filters=self.W, filter_shape=filter_shape, border_mode=border_mode) #if poolsize: if if_pool: pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize, st=stride, ignore_border=True) tmp = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') else: tmp = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') # if act == ConvolutionLayer.ACT_TANH: # self.output = T.tanh(tmp) # elif act == ConvolutionLayer.ACT_SIGMOID: # self.output = nnet.sigmoid(tmp) # elif act == ConvolutionLayer.ACT_ReLu: # self.output = tmp * (tmp>0) # elif act == ConvolutionLayer.ACT_SoftPlus: # self.output = T.log2(1+T.exp(tmp)) # else: # self.output = tmp if act == 'tanh': self.output = T.tanh(tmp) elif act == 'sigmoid': self.output = nnet.sigmoid(tmp) elif act == 'relu': # self.output = tmp * (tmp>0) # self.output = nnet.relu(tmp) self.output = 0.5 * (tmp + abs(tmp)) + 1e-9 elif act == 'softplus': # self.output = T.log2(1+T.exp(tmp)) self.output = nnet.softplus(tmp) elif act == 'linear': self.output = tmp # store parameters of this layer self.params = [self.W, self.b] #EHA: parameter update- list of 5 previous updates # self.params_update = [5*[self.W_update], 5*[self.b_update]] self.deltas = [self.W_delta, self.b_delta]
def __init__(self, rng, input, signal_shape, filter_shape, poolsize=(2, 2, 2), stride=None, if_pool=False, if_hidden_pool=False, act=None, share_with=None, tied=None, border_mode='valid'): self.input = input if share_with: self.W = share_with.W self.b = share_with.b self.W_delta = share_with.W_delta self.b_delta = share_with.b_delta elif tied: self.W = tied.W.dimshuffle(1,0,2,3) self.b = tied.b self.W_delta = tied.W_delta.dimshuffle(1,0,2,3) self.b_delta = tied.b_delta else: fan_in = np.prod(filter_shape[1:]) poolsize_size = np.prod(poolsize) if poolsize else 1 fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.W_delta = theano.shared( np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True ) self.b_delta = theano.shared(value=b_values, borrow=True) # convolution conv_out = nnet.conv3d( input, filters=self.W, input_shape=signal_shape, filter_shape=filter_shape, border_mode=border_mode) #if poolsize: if if_pool: conv_out = conv_out.dimshuffle(0,2,1,3,4) #maxpool3d works on last 3 dimesnions pooled_out = pools.pool_3d( input=conv_out, ds=poolsize, ignore_border=True) tmp_out = pooled_out.dimshuffle(0,2,1,3,4) tmp = tmp_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') elif if_hidden_pool: pooled_out = pools.pool_2d( input=conv_out, ds=poolsize[:2], st=stride, ignore_border=True) tmp = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') else: tmp = conv_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') if act == 'tanh': self.output = T.tanh(tmp) elif act == 'sigmoid': self.output = nnet.sigmoid(tmp) elif act == 'relu': # self.output = tmp * (tmp>0) self.output = 0.5 * (tmp + abs(tmp)) + 1e-9 elif act == 'softplus': # self.output = T.log2(1+T.exp(tmp)) self.output = nnet.softplus(tmp) else: self.output = tmp self.get_activation = theano.function( [self.input], self.output, updates=None, name='get hidden activation') # store parameters of this layer self.params = [self.W, self.b] self.deltas = [self.W_delta, self.b_delta]
def free_energy_given_h(self, h): sigmoid_arg = self.input_to_v_from_h(h) return (-T.dot(h, self.get_bh()) - nnet.softplus(sigmoid_arg).sum())
def free_energy(self, v_sample): ''' Function to compute the free energy ''' wx_b = tensor.dot(v_sample, self.W) + self.hbias vbias_term = tensor.dot(v_sample, self.vbias) hidden_term = tensor.sum(nnet.softplus(wx_b), axis=1) return -hidden_term - vbias_term
def __init__(self, rng, input, signal_shape, filter_shape, poolsize=(2, 2, 2), stride=None, if_pool=False, if_hidden_pool=False, act=None, share_with=None, tied=None, border_mode='valid'): self.input = input if share_with: self.W = share_with.W self.b = share_with.b self.W_delta = share_with.W_delta self.b_delta = share_with.b_delta elif tied: self.W = tied.W.dimshuffle(1,0,2,3) self.b = tied.b self.W_delta = tied.W_delta.dimshuffle(1,0,2,3) self.b_delta = tied.b_delta else: fan_in = np.prod(filter_shape[1:]) poolsize_size = np.prod(poolsize) if poolsize else 1 fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.W_delta = theano.shared( np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True ) self.b_delta = theano.shared(value=b_values, borrow=True) # convolution conv_out = conv3d2d.conv3d( signals=input, filters=self.W, signals_shape=signal_shape, filters_shape=filter_shape, border_mode=border_mode) #if poolsize: if if_pool: conv_out = conv_out.dimshuffle(0,2,1,3,4) #maxpool3d works on last 3 dimesnions pooled_out = maxpool3d.max_pool_3d( input=conv_out, ds=poolsize, ignore_border=True) tmp_out = pooled_out.dimshuffle(0,2,1,3,4) tmp = tmp_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') elif if_hidden_pool: pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize[:2], st=stride, ignore_border=True) tmp = pooled_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') else: tmp = conv_out + self.b.dimshuffle('x', 'x', 0, 'x', 'x') if act == 'tanh': self.output = T.tanh(tmp) elif act == 'sigmoid': self.output = nnet.sigmoid(tmp) elif act == 'relu': # self.output = tmp * (tmp>0) self.output = 0.5 * (tmp + abs(tmp)) + 1e-9 elif act == 'softplus': # self.output = T.log2(1+T.exp(tmp)) self.output = nnet.softplus(tmp) else: self.output = tmp self.get_activation = theano.function( [self.input], self.output, updates=None, name='get hidden activation') # store parameters of this layer self.params = [self.W, self.b] self.deltas = [self.W_delta, self.b_delta]
def free_energy(self, v_sample): wx_b = tensor.dot(v_sample, self.W) + self.hbias vbias_term = 0.5*tensor.sqr(v_sample - self.vbias).sum(axis=1) hidden_term = nnet.softplus(wx_b).sum(axis=1) return -hidden_term + vbias_term
def __init__(self, rng, input, filter_shape, image_shape, poolsize): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. :type rng: np.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.dtensor4 :param input: symbolic image tensor, of shape image_shape :type filter_shape: tuple or list of length 4 :param filter_shape: (number of filters, num input feature maps, filter height, filter width) :type image_shape: tuple or list of length 4 :param image_shape: (batch size, num input feature maps, image height, image width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ assert image_shape[1] == filter_shape[1] self.input = input self.filter_shape = filter_shape # there are "num input feature maps * filter height * filter width" # inputs to each hidden unit fan_in = np.prod(filter_shape[1:]) # each unit in the lower layer receives a gradient from: # "num output feature maps * filter height * filter width" / # pooling size fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) // np.prod(poolsize)) # initialize weights with random weights W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared( np.asarray( rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX ), borrow=True ) # the bias is a 1D tensor -- one bias per output feature map b_values = np.zeros((filter_shape[0],), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) # convolve input feature maps with filters conv_out = conv2d( input=input, filters=self.W, filter_shape=filter_shape, input_shape=image_shape ) # downsample each feature map individually, using maxpooling pooled_out = downsample.max_pool_2d( input=conv_out, ds=poolsize, ignore_border=True ) # add the bias term. Since the bias is a vector (1D array), we first # reshape it to a tensor of shape (1, n_filters, 1, 1). Each bias will # thus be broadcasted across mini-batches and feature map # width & height self.output = softplus(pooled_out + self.b.dimshuffle('x', 0, 'x', 'x')) # store parameters of this layer self.params = [self.W, self.b] # keep track of model input self.input = input
def __init__(self, rng, input, filter_shape, poolsize=(2, 2), stride=None, if_pool=False, act=None, share_with=None, tied=None, border_mode='valid'): self.input = input if share_with: self.W = share_with.W self.b = share_with.b self.W_delta = share_with.W_delta self.b_delta = share_with.b_delta elif tied: self.W = tied.W.dimshuffle(1, 0, 2, 3) self.b = tied.b self.W_delta = tied.W_delta.dimshuffle(1, 0, 2, 3) self.b_delta = tied.b_delta else: fan_in = np.prod(filter_shape[1:]) poolsize_size = np.prod(poolsize) if poolsize else 1 fan_out = (filter_shape[0] * np.prod(filter_shape[2:]) / poolsize_size) W_bound = np.sqrt(6. / (fan_in + fan_out)) self.W = theano.shared(np.asarray(rng.uniform(low=-W_bound, high=W_bound, size=filter_shape), dtype=theano.config.floatX), borrow=True) b_values = np.zeros((filter_shape[0], ), dtype=theano.config.floatX) self.b = theano.shared(value=b_values, borrow=True) self.W_delta = theano.shared(np.zeros(filter_shape, dtype=theano.config.floatX), borrow=True) # b_update_values = np.zeros((5,filter_shape[0]), dtype=theano.config.floatX) self.b_delta = theano.shared(value=b_values, borrow=True) #EHA: define update history for momentum gradient # self.W_update = theano.shared( # np.zeros(filter_shape, dtype=theano.config.floatX), # borrow=True # ) # # # b_update_values = np.zeros((5,filter_shape[0]), dtype=theano.config.floatX) # self.b_update = theano.shared(value=b_values, borrow=True) #ipdb.set_trace() conv_out = nnet.conv2d(input=input, filters=self.W, filter_shape=filter_shape, border_mode=border_mode) #if poolsize: if if_pool: pooled_out = downsample.max_pool_2d(input=conv_out, ds=poolsize, st=stride, ignore_border=True) tmp = pooled_out + self.b.dimshuffle('x', 0, 'x', 'x') else: tmp = conv_out + self.b.dimshuffle('x', 0, 'x', 'x') # if act == ConvolutionLayer.ACT_TANH: # self.output = T.tanh(tmp) # elif act == ConvolutionLayer.ACT_SIGMOID: # self.output = nnet.sigmoid(tmp) # elif act == ConvolutionLayer.ACT_ReLu: # self.output = tmp * (tmp>0) # elif act == ConvolutionLayer.ACT_SoftPlus: # self.output = T.log2(1+T.exp(tmp)) # else: # self.output = tmp if act == 'tanh': self.output = T.tanh(tmp) elif act == 'sigmoid': self.output = nnet.sigmoid(tmp) elif act == 'relu': # self.output = tmp * (tmp>0) # self.output = nnet.relu(tmp) self.output = 0.5 * (tmp + abs(tmp)) + 1e-9 elif act == 'softplus': # self.output = T.log2(1+T.exp(tmp)) self.output = nnet.softplus(tmp) elif act == 'linear': self.output = tmp # store parameters of this layer self.params = [self.W, self.b] #EHA: parameter update- list of 5 previous updates # self.params_update = [5*[self.W_update], 5*[self.b_update]] self.deltas = [self.W_delta, self.b_delta]