Exemple #1
0
    def get_nade_k_LL_ensemble_theano(self, k, n_orderings):
        # 1/M sum_M log (sum_K 1/k p(x_m | o_k))
        # only support matrix x with first dim 1
        ordering = T.imatrix('ordering')
        # (O,D)
        ordering.tag.test_value = numpy.repeat(numpy.arange(
            self.n_visible)[numpy.newaxis, :],
                                               n_orderings,
                                               axis=0).astype('int32')
        # (O,D)
        #input_mask_init = T.fmatrix('input_mask')
        #input_mask_init.tag.test_value = numpy.zeros((10,self.n_visible),dtype=floatX)
        input_mask_init = constantX(
            numpy.zeros((n_orderings, self.n_visible), dtype=floatX))
        x = T.fmatrix('samples')
        x.tag.test_value = numpy.random.binomial(
            n=1, p=0.5, size=(1, self.n_visible)).astype(floatX)

        def compute_LL_one_column(
                this_bit_vector,  # vector
                input_mask,  # [1,  0, 0 ,1,  0, 0, 1 ] with 1 indicates bits already sampled
                x,  # testset minibatches
                W1,
                Wflags,
                c):
            one = theano.tensor.constant(1, dtype=floatX)
            # a list of (k,O,D)
            x_ = T.addbroadcast(x, 0)
            means = self.get_nade_k_mean_field(x_, input_mask, k)
            # use the mean coming from the last step of mean field
            # (O,D)
            use_mean = means[-1]
            mean_column = use_mean[T.arange(use_mean.shape[0]), \
                                    this_bit_vector]*constantX(0.9999)+ \
                                    constantX(0.0001*0.5)
            x_column = x.flatten()[this_bit_vector]
            LL = x_column*T.log(mean_column) + \
                   (constantX(1)-x_column)*T.log(constantX(1)-mean_column)
            # set the new input mask: (O,D)
            input_mask = T.set_subtensor(
                input_mask[T.arange(input_mask.shape[0]), this_bit_vector],
                one)
            return LL, input_mask

        [LLs, input_mask], updates = theano.scan(
            fn=compute_LL_one_column,
            outputs_info=[None, input_mask_init],
            sequences=[ordering.T],
            non_sequences=[x, self.W1, self.Wflags, self.c],
        )
        # LLs: (D,O)
        LL = utils.log_sum_exp_theano(LLs.sum(axis=0), axis=-1) - T.log(
            ordering.shape[1])
        f = theano.function(inputs=[x, ordering],
                            outputs=LL,
                            updates=updates,
                            name='LL_on_one_example_fn')
        return f
Exemple #2
0
    def get_nade_k_LL_ensemble_theano_minibatch(self, k, n_orderings):
        # As a mixture model, Equ (18) in the paper
        ordering = T.imatrix('ordering')
        # (O,D)
        ordering.tag.test_value = numpy.repeat(numpy.arange(
            self.n_visible)[numpy.newaxis, :],
                                               n_orderings,
                                               axis=0).astype('int32')
        # (O,D)
        input_mask_init = constantX(
            numpy.zeros((n_orderings, self.n_visible), dtype=floatX))
        x = T.fmatrix('samples')
        x.tag.test_value = numpy.random.binomial(
            n=1, p=0.5,
            size=(self.minibatch_size, self.n_visible)).astype(floatX)
        x_ = x.dimshuffle(0, 'x', 1)

        def compute_LL_one_column(
                this_bit_vector,  # vector
                input_mask,  # [1,  0, 0 ,1,  0, 0, 1 ] with 1 indicates bits already sampled
                x,
                x_,  # testset minibatches
                W1,
                Wflags,
                c):
            one = theano.tensor.constant(1, dtype=floatX)
            #means = self.get_nade_k_mean_field(x_, input_mask.dimshuffle('x',0,1), k)
            means = self.get_nade_k_mean_field(x_, input_mask, k)
            # use the mean coming from the last step of mean field
            # (M,O,D)
            use_mean = means[-1]
            # (M,O)
            use_mean_shape = use_mean.shape
            use_mean = use_mean.reshape(
                [use_mean_shape[0], use_mean_shape[1] * use_mean_shape[2]])

            idx = use_mean_shape[2] * T.arange(
                use_mean_shape[1]) + this_bit_vector

            mean_column = use_mean[:, idx] * constantX(0.9999) + constantX(
                0.0001 * 0.5)

            #mean_column = use_mean[:,T.arange(use_mean.shape[1]), \
            #                        this_bit_vector]*constantX(0.9999)+ \
            #                        constantX(0.0001*0.5)
            x_column = x_.reshape([x_.shape[0], x_.shape[2]])[:,
                                                              this_bit_vector]

            # (M,O)
            LL = x_column*T.log(mean_column) + \
                   (constantX(1)-x_column)*T.log(constantX(1)-mean_column)
            # set the new input mask: (O,D)
            input_mask_shape = input_mask.shape
            input_mask = input_mask.flatten()
            idx = input_mask_shape[1] * T.arange(
                input_mask_shape[0]) + this_bit_vector
            input_mask = T.set_subtensor(input_mask[idx], one)
            input_mask = input_mask.reshape(input_mask_shape)
            #input_mask = T.set_subtensor(input_mask[T.arange(input_mask.shape[0]),
            #                                        this_bit_vector],one)
            return LL, input_mask

        [LLs, input_mask], updates = theano.scan(
            fn=compute_LL_one_column,
            outputs_info=[None, input_mask_init],
            sequences=[ordering.T],
            non_sequences=[x, x_, self.W1, self.Wflags, self.c],
        )
        # LLs: (D,M,O)
        LL = utils.log_sum_exp_theano(LLs.sum(axis=0), axis=-1) - T.log(
            ordering.shape[0])
        LL_orders = LLs.sum(axis=0)
        f = theano.function(inputs=[x, ordering],
                            outputs=[LL, LL_orders],
                            updates=updates,
                            name='LL_on_one_example_fn')
        return f