def get_nade_k_LL_ensemble_theano(self, k, n_orderings): # 1/M sum_M log (sum_K 1/k p(x_m | o_k)) # only support matrix x with first dim 1 ordering = T.imatrix('ordering') # (O,D) ordering.tag.test_value = numpy.repeat(numpy.arange( self.n_visible)[numpy.newaxis, :], n_orderings, axis=0).astype('int32') # (O,D) #input_mask_init = T.fmatrix('input_mask') #input_mask_init.tag.test_value = numpy.zeros((10,self.n_visible),dtype=floatX) input_mask_init = constantX( numpy.zeros((n_orderings, self.n_visible), dtype=floatX)) x = T.fmatrix('samples') x.tag.test_value = numpy.random.binomial( n=1, p=0.5, size=(1, self.n_visible)).astype(floatX) def compute_LL_one_column( this_bit_vector, # vector input_mask, # [1, 0, 0 ,1, 0, 0, 1 ] with 1 indicates bits already sampled x, # testset minibatches W1, Wflags, c): one = theano.tensor.constant(1, dtype=floatX) # a list of (k,O,D) x_ = T.addbroadcast(x, 0) means = self.get_nade_k_mean_field(x_, input_mask, k) # use the mean coming from the last step of mean field # (O,D) use_mean = means[-1] mean_column = use_mean[T.arange(use_mean.shape[0]), \ this_bit_vector]*constantX(0.9999)+ \ constantX(0.0001*0.5) x_column = x.flatten()[this_bit_vector] LL = x_column*T.log(mean_column) + \ (constantX(1)-x_column)*T.log(constantX(1)-mean_column) # set the new input mask: (O,D) input_mask = T.set_subtensor( input_mask[T.arange(input_mask.shape[0]), this_bit_vector], one) return LL, input_mask [LLs, input_mask], updates = theano.scan( fn=compute_LL_one_column, outputs_info=[None, input_mask_init], sequences=[ordering.T], non_sequences=[x, self.W1, self.Wflags, self.c], ) # LLs: (D,O) LL = utils.log_sum_exp_theano(LLs.sum(axis=0), axis=-1) - T.log( ordering.shape[1]) f = theano.function(inputs=[x, ordering], outputs=LL, updates=updates, name='LL_on_one_example_fn') return f
def get_nade_k_LL_ensemble_theano_minibatch(self, k, n_orderings): # As a mixture model, Equ (18) in the paper ordering = T.imatrix('ordering') # (O,D) ordering.tag.test_value = numpy.repeat(numpy.arange( self.n_visible)[numpy.newaxis, :], n_orderings, axis=0).astype('int32') # (O,D) input_mask_init = constantX( numpy.zeros((n_orderings, self.n_visible), dtype=floatX)) x = T.fmatrix('samples') x.tag.test_value = numpy.random.binomial( n=1, p=0.5, size=(self.minibatch_size, self.n_visible)).astype(floatX) x_ = x.dimshuffle(0, 'x', 1) def compute_LL_one_column( this_bit_vector, # vector input_mask, # [1, 0, 0 ,1, 0, 0, 1 ] with 1 indicates bits already sampled x, x_, # testset minibatches W1, Wflags, c): one = theano.tensor.constant(1, dtype=floatX) #means = self.get_nade_k_mean_field(x_, input_mask.dimshuffle('x',0,1), k) means = self.get_nade_k_mean_field(x_, input_mask, k) # use the mean coming from the last step of mean field # (M,O,D) use_mean = means[-1] # (M,O) use_mean_shape = use_mean.shape use_mean = use_mean.reshape( [use_mean_shape[0], use_mean_shape[1] * use_mean_shape[2]]) idx = use_mean_shape[2] * T.arange( use_mean_shape[1]) + this_bit_vector mean_column = use_mean[:, idx] * constantX(0.9999) + constantX( 0.0001 * 0.5) #mean_column = use_mean[:,T.arange(use_mean.shape[1]), \ # this_bit_vector]*constantX(0.9999)+ \ # constantX(0.0001*0.5) x_column = x_.reshape([x_.shape[0], x_.shape[2]])[:, this_bit_vector] # (M,O) LL = x_column*T.log(mean_column) + \ (constantX(1)-x_column)*T.log(constantX(1)-mean_column) # set the new input mask: (O,D) input_mask_shape = input_mask.shape input_mask = input_mask.flatten() idx = input_mask_shape[1] * T.arange( input_mask_shape[0]) + this_bit_vector input_mask = T.set_subtensor(input_mask[idx], one) input_mask = input_mask.reshape(input_mask_shape) #input_mask = T.set_subtensor(input_mask[T.arange(input_mask.shape[0]), # this_bit_vector],one) return LL, input_mask [LLs, input_mask], updates = theano.scan( fn=compute_LL_one_column, outputs_info=[None, input_mask_init], sequences=[ordering.T], non_sequences=[x, x_, self.W1, self.Wflags, self.c], ) # LLs: (D,M,O) LL = utils.log_sum_exp_theano(LLs.sum(axis=0), axis=-1) - T.log( ordering.shape[0]) LL_orders = LLs.sum(axis=0) f = theano.function(inputs=[x, ordering], outputs=[LL, LL_orders], updates=updates, name='LL_on_one_example_fn') return f