Exemple #1
0
    def test_random_integers_vector(self):
        random = RandomStreams(utt.fetch_seed())
        low = tensor.lvector()
        high = tensor.lvector()
        out = random.random_integers(low=low, high=high)
        assert out.ndim == 1
        f = function([low, high], out)

        low_val = [100, 200, 300]
        high_val = [110, 220, 330]
        seed_gen = numpy.random.RandomState(utt.fetch_seed())
        numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30)))

        # Arguments of size (3,)
        val0 = f(low_val, high_val)
        numpy_val0 = numpy.asarray([numpy_rng.random_integers(low=lv, high=hv)
            for lv, hv in zip(low_val, high_val)])
        assert numpy.all(val0 == numpy_val0)

        # arguments of size (2,)
        val1 = f(low_val[:-1], high_val[:-1])
        numpy_val1 = numpy.asarray([numpy_rng.random_integers(low=lv, high=hv)
            for lv, hv in zip(low_val[:-1], high_val[:-1])])
        assert numpy.all(val1 == numpy_val1)

        # Specifying the size explicitly
        g = function([low, high], random.random_integers(low=low, high=high, size=(3,)))
        val2 = g(low_val, high_val)
        numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30)))
        numpy_val2 = numpy.asarray([numpy_rng.random_integers(low=lv, high=hv)
            for lv, hv in zip(low_val, high_val)])
        assert numpy.all(val2 == numpy_val2)
        self.assertRaises(ValueError, g, low_val[:-1], high_val[:-1])
    def test_random_integers_vector(self):
        random = RandomStreams(utt.fetch_seed())
        low = tensor.lvector()
        high = tensor.lvector()
        out = random.random_integers(low=low, high=high)
        assert out.ndim == 1
        f = function([low, high], out)

        low_val = [100, 200, 300]
        high_val = [110, 220, 330]
        seed_gen = numpy.random.RandomState(utt.fetch_seed())
        numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30)))

        # Arguments of size (3,)
        val0 = f(low_val, high_val)
        numpy_val0 = numpy.asarray([numpy_rng.randint(low=lv, high=hv+1)
            for lv, hv in zip(low_val, high_val)])
        assert numpy.all(val0 == numpy_val0)

        # arguments of size (2,)
        val1 = f(low_val[:-1], high_val[:-1])
        numpy_val1 = numpy.asarray([numpy_rng.randint(low=lv, high=hv+1)
            for lv, hv in zip(low_val[:-1], high_val[:-1])])
        assert numpy.all(val1 == numpy_val1)

        # Specifying the size explicitly
        g = function([low, high], random.random_integers(low=low, high=high, size=(3,)))
        val2 = g(low_val, high_val)
        numpy_rng = numpy.random.RandomState(int(seed_gen.randint(2**30)))
        numpy_val2 = numpy.asarray([numpy_rng.randint(low=lv, high=hv+1)
            for lv, hv in zip(low_val, high_val)])
        assert numpy.all(val2 == numpy_val2)
        self.assertRaises(ValueError, g, low_val[:-1], high_val[:-1])
Exemple #3
0
class Sample2DLayer(Layer):
    """
        Sample random patches from 2D input. Result is batch of patches with shape
         (patches_per_example * input_batch_size, num_channels, image_h, image_w).
    """
    def __init__(self,
                 incoming,
                 patches_per_example,
                 patch_size,
                 pad=True,
                 **kwargs):
        self.rng = RandomStreams()
        super(Sample2DLayer, self).__init__(incoming, **kwargs)
        self.patch_size = patch_size
        self.patches_per_example = patches_per_example
        self.pad = pad

    def get_output_for(self, input, **kwargs):
        def sample_one_image(img, y, x):
            return theano.map(
                lambda x, y, image: image[:, y:(y + self.patch_size[0]), x:
                                          (x + self.patch_size[1])],
                sequences=[x, y],
                non_sequences=img)[0]

        if self.pad:
            shp = (input.shape[0], input.shape[1],
                   input.shape[2] + self.patch_size[0] * 2 - 2,
                   input.shape[3] + self.patch_size[1] * 2 - 2)

            padded_input = T.zeros(shp)
            padded_input = T.set_subtensor(
                padded_input[:, :, (self.patch_size[0] -
                                    1):(-self.patch_size[0] + 1),
                             (self.patch_size[1] - 1):(-self.patch_size[1] +
                                                       1)], input)

            input = padded_input

        y = self.rng.random_integers(size=(input.shape[0],
                                           self.patches_per_example),
                                     low=0,
                                     high=input.shape[2] - self.patch_size[0])
        x = self.rng.random_integers(size=(input.shape[0],
                                           self.patches_per_example),
                                     low=0,
                                     high=input.shape[3] - self.patch_size[1])

        return theano.map(sample_one_image,
                          sequences=[input, y, x])[0].reshape(
                              (-1, input.shape[1], self.patch_size[0],
                               self.patch_size[1]))

    def get_output_shape_for(self, input_shape):
        if input_shape[0] is None:
            return (None, input_shape[1], self.patch_size[0],
                    self.patch_size[1])
        else:
            return (input_shape[0] * self.patches_per_example, input_shape[1],
                    self.patch_size[0], self.patch_size[1])
    def __init__(self, latent_dim, hidden_dim, exploration_probability, clip_value, value_decay, data,
                 batch_size, exploration_decay_rate):
        self.latent_dim = latent_dim
        self.words = data["words"]
        self.depth = 1 + max(len(w) for w in self.words)
        depth = self.depth
        self.hidden_dim = hidden_dim
        self.characters = data["characters"]
        self.charset = data["charset"]
        self.charmap = data["charmap"]
        self.wordcount = len(self.words)
        self.charcount = len(self.charset)
        self.generator = Generator("generator", latent_dim, depth, self.charcount, hidden_dim, exploration_probability,
                                   exploration_decay_rate)
        self.discriminator = Discriminator("discriminator", depth, self.charcount, hidden_dim)
        self.clip_value = np.float32(clip_value)
        self.value_decay = theano.shared(np.float32(value_decay), "value_decay")

        self.batch_size = batch_size
        self.word_vectors = np.vstack([self.word_to_vector(word).reshape((1, -1)) for word in self.words]).astype(
            np.int32)
        xreal = Input((depth,), name="xreal", dtype="int32")
        batch_n = T.iscalar("batch_n")
        srng = RandomStreams(seed=234)
        z = srng.normal(size=(batch_n, latent_dim))
        e = srng.uniform(size=(batch_n, depth), low=0, high=1)
        ex = srng.random_integers(size=(batch_n, latent_dim), low=0, high=self.charcount)
        # z = Input((latent_dim,), name="z", dtype="float32")
        # e = Input((depth,), name="e", dtype="float32")
        # ex = Input((depth,), name="ex", dtype="int32")
        # xreal = T.imatrix("xreal")
        # z = T.fmatrix("z")
        # e = T.fmatrix("e")
        # ex = T.imatrix("ex")
        _, xfake = self.generator.policy(z, e, ex)
        xfake = theano.gradient.zero_grad(xfake)
        # print("xfake: {}, {}".format(xfake, xfake.type))
        # print("xreal: {}, {}".format(xreal, xreal.type))
        _, yfake = self.discriminator.discriminator(xfake)
        _, yreal = self.discriminator.discriminator(xreal)
        dloss = T.mean(yfake, axis=None) - T.mean(yreal, axis=None)
        dconstraints = {p: ClipConstraint(self.clip_value) for p in self.discriminator.clip_params}
        dopt = Adam(1e-4)
        dupdates = dopt.get_updates(self.discriminator.params, dconstraints, dloss)

        n = z.shape[0]
        outputs_info = [T.zeros((n,), dtype='float32')]
        yfaker = T.transpose(yfake[:, ::-1], (1, 0))
        vtarget, _ = theano.scan(reward_function, outputs_info=outputs_info, sequences=yfaker,
                                 non_sequences=self.value_decay)
        vtarget = T.transpose(vtarget, (1, 0))[:, ::-1]
        # print("vtarget: {}, {}, {}".format(vtarget, vtarget.ndim, vtarget.type))
        _, vpred = self.generator.value(z, xfake)
        gloss = T.mean(T.abs_(vtarget - vpred), axis=None)
        gopt = Adam(1e-5)
        gupdates = gopt.get_updates(self.generator.params, {}, gloss)
        self.discriminator_train_function = theano.function([xreal, batch_n], [dloss], updates=dupdates)
        self.generator_train_function = theano.function([batch_n], [gloss], updates=gupdates)
        self.generator_sample_function = theano.function([batch_n], [xfake])
        self.test_function = theano.function([xreal, batch_n], [dloss, gloss])
Exemple #5
0
def get_conv_xy(layer, deterministic=True):
    w_np = layer.W.get_value()
    input_layer = layer.input_layer
    if layer.pad == 'same':
        input_layer = L.PadLayer(layer.input_layer,
                                 width=np.array(w_np.shape[2:])/2,
                                 batch_ndim=2)
    input_shape = L.get_output_shape(input_layer)
    max_x = input_shape[2] - w_np.shape[2]
    max_y = input_shape[3] - w_np.shape[3]
    srng = RandomStreams()
    patch_x = srng.random_integers(low=0, high=max_x)
    patch_y = srng.random_integers(low=0, high=max_y)

    #print("input_shape shape: ", input_shape)
    #print("pad: \"%s\""% (layer.pad,))
    #print(" stride: " ,layer.stride)
    #print("max_x %d max_y %d"%(max_x,max_y))

    x = L.get_output(input_layer, deterministic=deterministic)
    x = x[:, :,
          patch_x:patch_x + w_np.shape[2], patch_y:patch_y + w_np.shape[3]]
    x = T.flatten(x, 2)  # N,D

    w = layer.W
    if layer.flip_filters:
        w = w[:, :, ::-1, ::-1]
    w = T.flatten(w, outdim=2).T  # D,O
    y = T.dot(x, w) # N,O
    if layer.b is not None:
        y += T.shape_padaxis(layer.b, axis=0)
    return x, y
Exemple #6
0
    def call(self, x, **kwargs):
        from theano import tensor as T
        from theano.tensor.shared_randomstreams import RandomStreams
        if K.backend() == "theano":
            import theano
            mask_rng = RandomStreams(self.seed)

            ints = mask_rng.random_integers(size=K.expand_dims(x.shape[0], 0),
                                            high=x.shape[1] - 1)

            def set_value_at_position(i, ns_x):
                zeros = T.zeros_like(ns_x[0, :])
                return T.set_subtensor(zeros[:i], 1)

            result, updates = theano.scan(fn=set_value_at_position,
                                          outputs_info=None,
                                          sequences=ints,
                                          non_sequences=x)
            mask = mask_rng.shuffle_row_elements(result)
        elif K.backend() == "tensorflow":
            import tensorflow as tf
            tf.set_random_seed(self.seed)
            ints = tf.random_uniform(shape=K.expand_dims(tf.shape(x)[0], 0),
                                     maxval=x.shape[1],
                                     dtype=tf.int32)
            result = tf.sequence_mask(ints, maxlen=x.shape[1])
            parallel_iterations = self._deterministic and 1 or 10
            mask = tf.cast(
                tf.map_fn(tf.random_shuffle,
                          result,
                          parallel_iterations=parallel_iterations), K.floatx())
        else:
            raise NotImplementedError()
        return K.concatenate([x * mask, mask])
class RomainLayer(lasagne.layers.Layer):
    def __init__(self, incoming, **kwargs):
        super(RomainLayer, self).__init__(incoming, **kwargs)
        self.mask = ones(self.input_shape[2], dtype='float32')
        self.mask[0] = 0
        self.mask[1] = 0
        self.snrg = RandomStreams()

    def get_output_for(self, input, deterministic=False, **kwargs):
        input = input * self.mask.reshape((1, 1, -1, 1))
        if (deterministic == True):
            return input
        shift_temps = self.snrg.random_integers(low=-44000, high=44000)
        shift_freq = self.snrg.random_integers(low=-1, high=1)
        return theano.tensor.roll(theano.tensor.roll(input,
                                                     shift_temps,
                                                     axis=3),
                                  shift_freq,
                                  axis=2)
Exemple #8
0
    def cost_from_X_wrong(self, data):
        X, Y = data
        theano_rng = RandomStreams(seed = self.rng.randint(2 ** 15))
        noise = theano_rng.random_integers(size = (X.shape[0] * self.k,), low=0, high = self.dict_size - 1)
        p_n = 1. / self.dict_size

        pos = T.nnet.sigmoid(self.delta(data) - T.log(self.k * p_n))
        neg = T.nnet.sigmoid(self.delta((T.tile(X, (self.k, 1)), noise)) - T.log(self.k * p_n))
        neg =neg.reshape((X.shape[0], self.k))

        rval = -T.log(pos) - T.log(1 - neg).sum(axis=1)
        return rval.mean()
Exemple #9
0
class OneHotDistribution(Distribution):
    """Randomly samples from a distribution of one-hot vectors."""
    def __init__(self, space, rng=None):
        super(OneHotDistribution, self).__init__(space)

        self.dim = space.get_total_dimension()
        self.formatter = OneHotFormatter(self.dim, dtype=space.dtype)

        self.rng = RandomStreams() if rng is None else rng

    def sample(self, n):
        idxs = self.rng.random_integers((n, 1), low=0, high=self.dim - 1)
        return self.formatter.theano_expr(idxs, mode='concatenate')
Exemple #10
0
    def score(self, Y, Y_hat):
        # TODO fix me later when using IndexSpace

        assert hasattr(Y_hat, 'owner')
        owner = Y_hat.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            Y_hat, = owner.inputs
            owner = Y_hat.owner
            op = owner.op
        assert isinstance(op, T.nnet.Softmax)
        state_below, = owner.inputs
        assert state_below.ndim == 2

        # TODO make this more generic like above
        state_below = state_below.owner.inputs[0].owner.inputs[0]

        Y = T.argmax(Y, axis = 1)
        k = self.num_noise_samples

        if self.noise_prob is None:
            theano_rng = RandomStreams(seed = self.mlp.rng.randint(2 ** 15))
            noise = theano_rng.random_integers(size = (state_below.shape[0], self.num_noise_samples,), low=0, high = self.n_classes - 1)
            p_n = 1. / self.n_classes
            p_w = T.nnet.sigmoid((state_below * self.W[:, Y].T).sum(axis=1) + self.b[Y])
            p_x = T.nnet.sigmoid((T.concatenate([state_below] * k) * self.W[:, noise.flatten()].T).sum(axis=1) + self.b[noise.flatten()])
            # TODO is this reshape necessary?
            p_x = p_x.reshape((state_below.shape[0], k))

            #pos = k * p_n / (p_w + k * p_n) * T.log(p_w)
            #neg = (p_x / (p_x + k * p_n) * T.log(p_x)).sum(axis=1)
        else:
            #import ipdb
            #ipdb.set_trace()
            theano_rng = MRG_RandomStreams(max(self.mlp.rng.randint(2 ** 15), 1))
            assert self.mlp.batch_size is not None
            noise = theano_rng.multinomial(pvals = np.tile(self.noise_prob.get_value(), (k * self.mlp.batch_size, 1)))
            noise = T.argmax(noise, axis = 1)
            p_n = self.noise_prob
            p_w = T.nnet.sigmoid((state_below * self.W[:, Y].T).sum(axis=1) + self.b[Y])
            p_x = T.nnet.sigmoid((T.concatenate([state_below] * k) * self.W[:, noise.flatten()].T).sum(axis=1) + self.b[noise.flatten()])
            p_x = p_x.reshape((state_below.shape[0], k))

            pos = k * p_n[Y] / (p_w + k * p_n[Y]) * T.log(p_w)
            neg = (p_x / (p_x + k * p_n[noise].reshape(p_x.shape)) * T.log(p_x)).sum(axis=1)


        #return -(pos - neg).mean()
        return p_w, p_x
Exemple #11
0
    def test_dtype(self):
        random = RandomStreams(utt.fetch_seed())
        low = tensor.lscalar()
        high = tensor.lscalar()
        out = random.random_integers(low=low, high=high, size=(20,), dtype='int8')
        assert out.dtype == 'int8'
        f = function([low, high], out)

        val0 = f(0, 9)
        assert val0.dtype == 'int8'

        val1 = f(255, 257)
        assert val1.dtype == 'int8'
        assert numpy.all(abs(val1) <= 1)
    def test_dtype(self):
        random = RandomStreams(utt.fetch_seed())
        low = tensor.lscalar()
        high = tensor.lscalar()
        out = random.random_integers(low=low, high=high, size=(20,), dtype='int8')
        assert out.dtype == 'int8'
        f = function([low, high], out)

        val0 = f(0, 9)
        assert val0.dtype == 'int8'

        val1 = f(255, 257)
        assert val1.dtype == 'int8'
        assert numpy.all(abs(val1) <= 1)
class OneHotDistribution(Distribution):
    """Randomly samples from a distribution of one-hot vectors."""

    def __init__(self, space, rng=None):
        super(OneHotDistribution, self).__init__(space)

        self.dim = space.get_total_dimension()
        self.formatter = OneHotFormatter(self.dim, dtype=space.dtype)

        self.rng = RandomStreams() if rng is None else rng

    def sample(self, n):
        idxs = self.rng.random_integers((n, 1), low=0, high=self.dim - 1)
        return self.formatter.theano_expr(idxs, mode='concatenate')
    def test_random_integers(self):
        """Test that RandomStreams.random_integers generates the same results as numpy"""
        # Check over two calls to see if the random state is correctly updated.
        random = RandomStreams(utt.fetch_seed())
        fn = function([], random.random_integers((20, 20), -5, 5))
        fn_val0 = fn()
        fn_val1 = fn()

        rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
        rng = numpy.random.RandomState(int(rng_seed)) #int() is for 32bit
        numpy_val0 = rng.random_integers(-5, 5, size=(20,20))
        numpy_val1 = rng.random_integers(-5, 5, size=(20,20))

        assert numpy.all(fn_val0 == numpy_val0)
        assert numpy.all(fn_val1 == numpy_val1)
    def test_random_integers(self):
        """Test that RandomStreams.random_integers generates the same results as numpy"""
        # Check over two calls to see if the random state is correctly updated.
        random = RandomStreams(utt.fetch_seed())
        fn = function([], random.random_integers((20, 20), -5, 5))
        fn_val0 = fn()
        fn_val1 = fn()

        rng_seed = numpy.random.RandomState(utt.fetch_seed()).randint(2**30)
        rng = numpy.random.RandomState(int(rng_seed))  # int() is for 32bit
        numpy_val0 = rng.random_integers(-5, 5, size=(20, 20))
        numpy_val1 = rng.random_integers(-5, 5, size=(20, 20))

        assert numpy.all(fn_val0 == numpy_val0)
        assert numpy.all(fn_val1 == numpy_val1)
Exemple #16
0
    def get_gradients(self, model, data, ** kwargs):

        space,  sources = self.get_data_specs(model)
        space.validate(data)
        X, Y = data


        theano_rng = RandomStreams(seed = model.rng.randint(2 ** 15))
        noise = theano_rng.random_integers(size = (X.shape[0] * model.k,), low=0, high = model.dict_size - 1)


        delta = model.delta(data)
        p = model.score(X, Y)
        params = model.get_params()

        pos_ = T.jacobian(model.score(X, Y), params, disconnected_inputs='ignore')
        pos_coeff = 1 - T.nnet.sigmoid(model.delta(data))
        pos = []
        for param in pos_:
            axes = [0]
            axes.extend(['x' for item in range(param.ndim - 1)])
            pos.append(pos_coeff.dimshuffle(axes) * param)
        del pos_, pos_coeff

        noise_x = T.tile(X, (model.k, 1))
        neg_ = T.jacobian(model.score(noise_x, noise), params, disconnected_inputs='ignore')
        neg_coeff = T.nnet.sigmoid(model.delta((noise_x, noise)))
        neg = []
        for param in neg_:
            axes = [0]
            axes.extend(['x' for item in range(param.ndim - 1)])
            tmp = neg_coeff.dimshuffle(axes) * param
            new_shape = [X.shape[0], model.k]
            new_shape.extend([tmp.shape[i] for i in range(1, tmp.ndim)])
            neg.append(tmp.reshape(new_shape).sum(axis=1))
        del neg_, neg_coeff


        grads = [(pos_ - neg_).mean(axis=0) for pos_, neg_ in zip(pos, neg)]
        gradients = OrderedDict(izip(params, grads))
        updates = OrderedDict()

        return gradients, updates
    def build_graph_logloss(self):
        #initialize for randomness
        if self.seed==None:
            self.seed = numpy.random.randint(2**30)
        theano_rng = RandomStreams(self.seed)
        self.randstate = numpy.random.RandomState(self.seed)

        #define parameters
        init_p_before_sigmoid = numpy.linspace(start=-self.init_p_width,stop=self.init_p_width, num=self.dim+1)
        self.p_before_sigmoid = theano.shared(init_p_before_sigmoid.astype(theano.config.floatX),name='p_before_sigmoid')
        self.params = [self.p_before_sigmoid]

        #define inputs
        self.x1_idxs = T.ivector()
        self.x2_idxs = T.ivector()
        self.x1_idxs.tag.test_value = numpy.asarray([0,1],dtype=numpy.int32)
        self.x2_idxs.tag.test_value = numpy.asarray([1,2],dtype=numpy.int32)

        #define negative inputs
        choice = theano_rng.binomial(size=self.x1_idxs.shape)
        alternative = theano_rng.random_integers(size=self.x1_idxs.shape,low=0,high=self.n_entities-1)
        self.x1_idxs_negative = T.switch(choice,self.x1_idxs,alternative)
        self.x2_idxs_negative = T.switch(choice,alternative,self.x2_idxs)

        #define graph from inputs to probabilities and to log loss
        def get_embed(index_tensor):
            return self.embeddings[index_tensor].reshape((index_tensor.shape[0],self.dim))

        self.x1_emb = get_embed(self.x1_idxs)
        self.x2_emb = get_embed(self.x2_idxs)
        self.x1neg_emb = get_embed(self.x1_idxs_negative)
        self.x2neg_emb = get_embed(self.x2_idxs_negative)

        def get_prob(embed_tensor1,embed_tensor2):
            distances = T.sum(embed_tensor1*embed_tensor2 + (1-embed_tensor1)*(1-embed_tensor2), axis=1)
            return sigmoid(self.p_before_sigmoid[distances])

        self.pos_probs = get_prob(self.x1_emb,self.x2_emb)
        self.neg_probs = get_prob(self.x1neg_emb,self.x2neg_emb)
        self.loss = -T.mean(T.log(self.pos_probs) + T.log(1.0-self.neg_probs))
Exemple #18
0
    def cost_(self, Y, Y_hat):
        # TODO fix me later when using IndexSpace

        assert hasattr(Y_hat, 'owner')
        owner = Y_hat.owner
        assert owner is not None
        op = owner.op
        if isinstance(op, Print):
            assert len(owner.inputs) == 1
            Y_hat, = owner.inputs
            owner = Y_hat.owner
            op = owner.op
        assert isinstance(op, T.nnet.Softmax)
        state_below, = owner.inputs
        assert state_below.ndim == 2

        # TODO make this more generic like above
        state_below = state_below.owner.inputs[0].owner.inputs[0]

        #import ipdb
        #ipdb.set_trace()
        Y = T.argmax(Y, axis = 1)
        #Y = Y.astype('uint32')
        theano_rng = RandomStreams(seed = self.mlp.rng.randint(2 ** 15))
        noise = theano_rng.random_integers(size = (state_below.shape[0], self.num_noise_samples,), low=0, high = self.n_classes - 1)
        k = self.num_noise_samples
        p_n = 1. / self.n_classes

        pos = T.nnet.sigmoid((state_below * self.W[:, Y].T).sum(axis=1) + self.b[Y] - T.log(k * p_n))
        neg = T.nnet.sigmoid((T.concatenate([state_below] * k) * self.W[:, noise.flatten()].T).sum(axis=1) + self.b[noise.flatten()] - T.log(k * p_n))
        # TODO is this reshape necessary?
        neg = neg.reshape((state_below.shape[0], k)).sum(axis=1)


        rval =  -T.log(pos) - T.log(1 - neg)
        return rval.mean()
Exemple #19
0
class LDmodel():
	
	'''
	Models discreet-time continuous data as a linear transformation
	of a linear dynamical system with sparse "noise".
	
	x: data
	s: latent variable
	u: sparse noise
	n: gaussian noise
	W: generative matrix
	M: dynamical matrix
	
	s_(t+1) = M*s_t + u
	x_t = W*s_t + n
	
	Approximate EM learning is performed via minibatched gradient 
	ascent on the log-likelihood. Inference/sampling is achieved with
	particle filtering. The proposal distribution in the particle filter
	ignores (for now) the predictive part and samples directly from the
	posterior specified by the generative part (as if the top equation
	didn't exist.)
	
	'''
	
	
	def __init__(self, nx, ns, npcl, xvar=1.0):
		
		#generative matrix
		init_W=np.asarray(np.random.randn(nx,ns)/0.1,dtype='float32')
		
		#normalize the columns of W to be unit length
		#(maybe unnecessary if sampling?)
		init_W=init_W/np.sqrt(np.sum(init_W**2,axis=0))
		
		#dynamical matrix
		init_M=np.asarray(np.eye(ns),dtype='float32')
		
		#sparsity parameters
		#parametrized as the exponent of ln_b to ensure positivity
		init_ln_b=np.asarray(np.zeros(ns),dtype='float32')
		
		self.W=theano.shared(init_W)
		self.M=theano.shared(init_M)
		self.ln_b=theano.shared(init_ln_b)
		
		#for ease of use
		self.b=T.exp(self.ln_b)
		
		#square root of covariance matrix of proposal distribution
		#initialized to the true root covariance
		init_cov_inv=np.dot(init_W.T, init_W)/(xvar**2) + 0.5*np.eye(ns)*np.exp(-2.0*init_ln_b)
		init_cov=spla.inv(init_cov_inv)
		init_C=spla.sqrtm(init_cov)
		init_C=np.asarray(np.real(init_C),dtype='float32')
		
		init_s_now=np.asarray(np.zeros((npcl,ns)),dtype='float32')
		init_weights_now=np.asarray(np.ones(npcl)/float(npcl),dtype='float32')
		
		init_s_past=np.asarray(np.zeros((npcl,ns)),dtype='float32')
		init_weights_past=np.asarray(np.ones(npcl)/float(npcl),dtype='float32')
		
		self.C=theano.shared(init_C)
		
		self.s_now=theano.shared(init_s_now)
		self.weights_now=theano.shared(init_weights_now)
		
		self.s_past=theano.shared(init_s_past)
		self.weights_past=theano.shared(init_weights_past)
		
		self.xvar=np.asarray(xvar,dtype='float32')
		
		self.nx=nx		#dimensionality of observed variables
		self.ns=ns		#dimensionality of latent variables
		self.npcl=npcl	#numer of particles in particle filter
		
		#this is used for the resampling
		nummat=np.repeat(np.reshape(np.arange(npcl),(npcl,1)),npcl,axis=1)
		self.idx_mat=theano.shared(nummat.T)
		
		#for ease of use and efficient computation (these are used a lot)
		self.CCT=T.dot(self.C, self.C.T)
		self.cov_inv=T.dot(self.W.T, self.W)/(self.xvar**2) + 0.5*T.eye(self.ns)/(self.b**2)
		
		self.theano_rng = RandomStreams()
		
		self.init_multi_samp=theano.shared(np.asarray(np.arange(npcl),dtype='int64'))
		
		self.params=							[self.W, self.M, self.ln_b]
		self.rel_lrates=theano.shared(np.asarray([  1.0,    1.0,     1.0]   ,dtype='float32'))
		
		self.meta_params=     							[self.C]
		self.meta_rel_lrates=theano.shared(np.asarray([   1.0  ], dtype='float32'))
	
	
	def sample_proposal_s(self, s, xp):
		
		#s is npcl-by-ns
		#xp is 1-by-nx
		
		n=self.theano_rng.normal(size=T.shape(self.s_now))
		
		mean_term=T.dot(xp, self.W)/(self.xvar**2) + T.dot(s,self.M.T*0.5/(self.b**2))
		prop_mean=T.dot(mean_term, self.CCT)
		
		s_prop=prop_mean + T.dot(n, self.C)
		
		#I compute the term inside the exponent for the pdf of the proposal distrib
		prop_term=-T.sum(n**2)/2.0
		
		#return T.cast(s_prop,'float32'), T.cast(s_pred,'float32'), T.cast(prop_term,'float32'), prop_mean
		return s_prop, prop_term, prop_mean
	
	
	def forward_filter_step(self, xp):
		
		#need to sample from the proposal distribution first
		s_samps, prop_terms, prop_means = self.sample_proposal_s(self.s_now, xp)
		
		updates={}
		
		#now that we have samples from the proposal distribution, we need to reweight them
		
		recons=T.dot(self.W, s_samps.T)
		s_pred=self.get_prediction(self.s_now)
		
		x_terms=-T.sum((recons-T.reshape(xp,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2)
		s_terms=-T.sum(T.abs_((s_samps-s_pred)/self.b),axis=1)
		
		energies=x_terms+s_terms-prop_terms
		
		#to avoid exponentiating large or very small numbers, I 
		#"re-center" the reweighting factors by adding a constant, 
		#as this has no impact on the resulting new weights
		
		#energies_recentered=energies-T.max(energies)
		
		alpha=T.exp(energies) #these are the reweighting factors
		
		new_weights=self.weights_now*alpha
		#normalizer=T.sum(new_weights_unnorm)
		#new_weights=new_weights_unnorm/normalizer  #need to normalize new weights
		
		updates[self.s_past]=T.cast(self.s_now,'float32')
		
		updates[self.s_now]=T.cast(s_samps,'float32')
		
		updates[self.weights_past]=T.cast(self.weights_now,'float32')
		updates[self.weights_now]=T.cast(new_weights,'float32')
		
		#return normalizer, energies_recentered, s_samps, s_pred, T.dot(self.W.T,(xp-self.c)), updates
		#return normalizer, energies_recentered, updates
		#return h_samps, updates
		return updates
	
	
	def proposal_loss(self,C):
		
		#calculates how far off self.CCT is from the true posterior covariance
		CCT=T.dot(C, C.T)
		prod=T.dot(CCT, self.cov_inv)
		diff=prod-T.eye(self.ns)
		tot=T.sum(T.sum(diff**2))  #frobenius norm
		
		return tot
	
	
	def prop_update_step(self, C_now, lr):
		
		loss=self.proposal_loss(C_now)
		gr=T.grad(loss, C_now)
		return [C_now-lr*gr], theano.scan_module.until(loss<1e-6)
	
	
	def update_proposal_distrib(self, n_steps, lr):
		
		#does some gradient descent on self.C, so that self.CCT becomes
		#closer to the true posterior covariance
		C0=self.C
		Cs, updates = theano.scan(fn=self.prop_update_step,
									outputs_info=[C0],
									non_sequences=[lr],
									n_steps=n_steps)
		
		updates[self.C]=Cs[-1]
		
		loss=self.proposal_loss(Cs[-1])
		
		#updates={}
		#updates[self.C]=self.prop_update_step(self.C,lr)
		#loss=self.proposal_loss(self.C)
		
		return loss, updates
		
	
	
	def get_prediction(self, s):
		
		s_pred=T.dot(s, self.M)
		
		return s_pred
	
	
	def sample_joint(self, sp):
		
		#t2_samp=self.theano_rng.multinomial(pvals=T.reshape(self.weights_now,(1,self.npcl))).T
		#s2_samp=T.cast(T.sum(self.s_now*T.addbroadcast(t2_samp,1),axis=0),'float32')
		t2_samp=self.sample_multinomial(self.weights_now,3)
		s2_samp=self.s_now[t2_samp]
		
		diffs=(s2_samp-sp)
		abs_term=T.sum(T.abs_(diffs)/self.b,axis=1)
		alpha=T.exp(-abs_term)
		probs=self.weights_past*alpha
		#probs=probs_unnorm/T.sum(probs_unnorm)
		
		#t1_samp=self.theano_rng.multinomial(pvals=T.reshape(probs,(1,self.npcl))).T
		#s1_samp=T.cast(T.sum(self.s_past*T.addbroadcast(t1_samp,1),axis=0),'float32')
		t1_samp=self.sample_multinomial(probs,3)
		s1_samp=self.s_past[t1_samp]
		
		return [s1_samp, s2_samp]
	
	
	def update_params(self, x1, x2, n_samps, lrate):
		
		#this function samples from the joint posterior and performs
		# a step of gradient ascent on the log-likelihood
		
		sp=self.get_prediction(self.s_past)
									
		#sp should be np by ns
		
		[s1_samps, s2_samps], updates = theano.scan(fn=self.sample_joint,
									outputs_info=[None, None],
									non_sequences=[sp],
									n_steps=n_samps)
		
		
		
		x2_recons=T.dot(self.W, s2_samps.T)
		
		s_pred = self.get_prediction(s1_samps)
		
		sterm=-T.mean(T.sum(T.abs_((s2_samps-s_pred)/self.b),axis=1)) - T.sum(T.log(self.b))
		
		#xterm1=-T.mean(T.sum((x1_recons-T.reshape(x1,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2))
		xterm2=-T.mean(T.sum((x2_recons-T.reshape(x2,(self.nx,1)))**2,axis=0)/(2.0*self.xvar**2))
		
		#energy = hterm1 + xterm1 + hterm2 + xterm2 + sterm -T.sum(T.sum(self.A**2))
		#energy = hterm1 + xterm2 + sterm 
		energy = xterm2 + sterm 
		
		learning_params=[self.params[i] for i in range(len(self.params)) if self.rel_lrates[i]!=0.0]
		learning_rel_lrates=[self.rel_lrates[i] for i in range(len(self.params)) if self.rel_lrates[i]!=0.0]
		gparams=T.grad(energy, learning_params, consider_constant=[s1_samps, s2_samps])
		
		# constructs the update dictionary
		for gparam, param, rel_lr in zip(gparams, learning_params, learning_rel_lrates):
			#gnat=T.dot(param, T.dot(param.T,param))
			if param==self.M:
				#I do this so the derivative of M doesn't depend on the sparsity parameters
				updates[param] = T.cast(param + gparam*T.reshape(self.b,(1,self.ns))*lrate*rel_lr,'float32')
			elif param==self.b:
				updates[param] = T.cast(param + gparam*T.reshape(1.0/self.b,(1,self.ns))*lrate*rel_lr,'float32')
			else:
				updates[param] = T.cast(param + gparam*lrate*rel_lr,'float32')
		
		return energy, updates
		
	
	def get_ESS(self):
		
		return 1.0/T.sum(self.weights_now**2)
	
	
	def resample(self):
		
		updates={}
		#samp=self.theano_rng.multinomial(size=self.weights_now.shape,pvals=self.weights_now)
		idxs=self.sample_multinomial(self.weights_now,3)
		#idxs=T.cast(T.sum(samp*self.idx_mat,axis=1),'int32')
		s_samps=self.s_now[idxs]
		updates[self.s_now]=s_samps
		updates[self.weights_now]=T.cast(T.ones_like(self.weights_now)/T.cast(self.npcl,'float32'),'float32') #dtype paranoia
		
		return updates
	
	
	def simulate_step(self, s):
		
		s=T.reshape(s,(1,self.ns))
		
		sp=self.get_prediction(s)
		
		xp=T.dot(self.W, sp.T)
		
		return T.cast(sp,'float32'), T.cast(xp,'float32')
		
	
	def simulate_forward(self, n_steps):
		
		
		s0=T.sum(self.s_now*T.reshape(self.weights_now,(self.npcl,1)),axis=0)
		s0=T.reshape(s0,(1,self.ns))
		[sp, xp], updates = theano.scan(fn=self.simulate_step,
										outputs_info=[s0, None],
										n_steps=n_steps)
		
		return sp, xp, updates
	
	
	def multinomial_step(self,samp,weights):
		
		u=self.theano_rng.uniform(size=self.weights_now.shape)
		i=self.theano_rng.random_integers(size=self.weights_now.shape, low=0, high=self.npcl-1)
		Wnow=weights[samp]
		Wstep=weights[i]
		probs=Wstep/Wnow
		out=T.switch(u<probs, i, samp)
		return out
	
	
	def sample_multinomial(self,weights,nsteps):
		
		#this function samples from a multinomial distribution using
		#the Metropolis method as in [Murray, Lee, Jacob 2013]
		#weights are unnormalized
		#this is biased for small nsteps, but could be faster than the
		#native theano multinomial sampler and the use of unnormalized
		#weights improves numerical stability
		samp0=self.init_multi_samp
		samps, updates = theano.scan(fn=self.multinomial_step,
										outputs_info=[samp0],
										non_sequences=[weights],
										n_steps=nsteps)
		
		return samps[-1]
	
	
	def set_rel_lrates(self, new_rel_lrates):
		updates={}
		updates[self.rel_lrates]=new_rel_lrates
		return updates
Exemple #20
0
class AverageSGM(object):

    ''' A toy example showing the usage of `extheano.NodeDescriptor` and
    `extheano.jit`.
    This class performs the stochastic gradient method (SGM) to find the
    average of given data.

    Usage:
        >> data = np.arange(1000)
        >> a = AverageSGM(data)
        >> for _ in xrange(10000): a.calc_loss_with_onestep_SGM()
        >> est = a.get_estimation()
    '''

    # node descriptors for shared variables
    # whole data of which we will compute the average
    data = extheano.NodeDescriptor()
    # estimate of the average
    mu = extheano.NodeDescriptor()
    # learning rate (will be discounted as SGM goes on)
    lrate = extheano.NodeDescriptor()

    def __init__(self, data, batch_size=10, init_val=0., lrate=0.05,
                 degree=0.75, seed=None):
        '''Set parameters for the SGM

        :param data:        array-like with its dimension one
        :param batch_size:  size of the mini batch in integer
        :param init_val:    initial guess of the average in float
        :param lrate:       initial learning rate in float
        :param degree:      degree of learning rate decreasing in float
        :param seed:        seed for RNG in integer
        '''

        # pure-python variables (assumed to be invariant until recompilation)
        self.batch_size = batch_size
        self.n_batches = len(data) / batch_size
        self.degree = degree
        self.init_lrate = lrate

        # initialize the nodes
        self.data = theano.shared(data.astype(float), 'data', borrow=True)
        self.mu = theano.shared(float(init_val), 'mu')
        self.lrate = theano.shared(float(lrate), 'lrate')

        # shared random streams
        self.rng = RandomStreams(seed)

    def quadratic_loss(self, minibatch):
        '''Get the quadratic loss against the given input'''
        return ((minibatch - self.mu) ** 2).mean()

    def gradient_descent(self, loss, lrate):
        '''Perform one step of the gradient descent on the given loss

        Note that you can update `self.mu` with the normal assignment
        operation since it is a descriptor.
        '''
        # calculate the gradient
        grad = -T.grad(loss, self.mu)
        # update the estimation
        self.mu = self.mu + lrate * grad

    def next_lrate(self, lr):
        '''Return the discounted learning rate

        The learning rate will be proportional to the number of iterations with
        minus `self.degree` on the exponent.
        '''
        time = (self.init_lrate / lr) ** (1. / self.degree)
        ratio = (1. - 1. / (1. + time)) ** self.degree
        return lr * ratio

    # With the decorator `@extheano.jit`, you can compile your theano-function
    # 'just in time'. Use `@extheano.jit.parse` instead if it has arguments with
    # default values.
    @extheano.jit.parse
    def calc_loss_with_onestep_SGM(self, scale=1.):
        '''Calculate the quadratic loss and perform one step of the SGM
        '''
        # assign a random batch to the input
        batch_start = self.batch_size * \
            self.rng.random_integers(low=0, high=self.n_batches - 1)
        batch_stop = batch_start + self.batch_size
        minibatch = self.data[batch_start: batch_stop]

        # perform SGM and discount the learning rate
        loss = self.quadratic_loss(minibatch)
        self.gradient_descent(loss, self.lrate * scale)
        self.lrate = self.next_lrate(self.lrate)
        return loss

    @extheano.jit
    def set_estimation(self, val):
        '''Set the estimation of the average'''
        self.mu = T.cast(val, theano.config.floatX)

    @extheano.jit
    def get_estimation(self):
        '''Get the estimation of the average'''
        return self.mu
Exemple #21
0
    def __init__(self, dim, n_entities, batch_size=None, validation_samples=2):

        self.__dict__.update(locals())
        del self.self

        theano_rng = RandomStreams(numpy.random.randint(2**30))

        #Start by defining the graph

        ##Parameter setup
        self.emb = theano.shared((numpy.random.uniform(
            -1.0, 1.0,
            (self.n_entities, self.dim))).astype(theano.config.floatX))
        self.emb.tag.test_value = (numpy.random.uniform(
            -1.0, 1.0,
            (self.n_entities, self.dim))).astype(theano.config.floatX)

        self.a = theano.shared(numpy.asarray(1.0).astype(theano.config.floatX))
        self.b = theano.shared(numpy.asarray(0.0).astype(theano.config.floatX))

        self.params = [self.emb, self.a, self.b]

        ### Input setup!
        self.x1_idxs = T.ivector()
        self.x2_idxs = T.ivector()
        self.x1_idxs.tag.test_value = numpy.asarray([0, 1], dtype=numpy.int32)
        self.x2_idxs.tag.test_value = numpy.asarray([1, 2], dtype=numpy.int32)

        #generate negative samples
        choice = theano_rng.binomial(size=self.x1_idxs.shape)
        alternative = theano_rng.random_integers(size=self.x1_idxs.shape,
                                                 low=0,
                                                 high=n_entities - 1)
        self.x1_idxs_negative = T.switch(choice, self.x1_idxs, alternative)
        self.x2_idxs_negative = T.switch(choice, alternative, self.x2_idxs)

        ### Define graph from input to predictive loss
        def get_embed(index_tensor):
            return sigmoid(self.emb[index_tensor].reshape(
                (index_tensor.shape[0], self.dim)))

        x1_emb = get_embed(self.x1_idxs)
        x2_emb = get_embed(self.x2_idxs)
        x1neg_emb = get_embed(self.x1_idxs_negative)
        x2neg_emb = get_embed(self.x2_idxs_negative)

        def get_prob1(embed_tensor1, embed_tensor2):
            return sigmoid(
                self.a * T.mean(embed_tensor1 * embed_tensor2 +
                                (1 - embed_tensor1) * (1 - embed_tensor2),
                                axis=1) +
                self.b)  #probability of a link, 0 to 1.'

        self.loss = T.mean(-T.log(get_prob1(x1_emb, x2_emb)) -
                           T.log(1 - get_prob1(x1neg_emb, x2neg_emb)))

        ###Define graph from input to sampled/validated loss
        randomizationA = theano_rng.uniform(size=(self.validation_samples,
                                                  self.dim))
        randomizationB = theano_rng.uniform(size=(self.validation_samples,
                                                  self.dim))
Exemple #22
0
class Neural_network_layer:
    '''Represents the units within a layer and the units
       activations and dropout functions.
    '''
    def __init__(self, size, activation_function, dropout_type, dropout,
                 dropout_decay, batch_size, frequency):

        self.drop_count = 0
        self.size = size
        self.frequency = frequency
        self.dropout = dropout
        self.dropout_init = dropout
        self.dropout_decay = dropout_decay
        self.dropout_type = dropout_type
        self.rdm = RandomStreams(seed=1234)
        self.batch_size = batch_size
        self.sample_range = 100000
        self.create_dropout_sample_functions()
        self.activation_crossvalidation = activation_function
        self.activation_function = self.set_dropout(dropout,
                                                    activation_function)
        self.activation_derivative = lambda X: g(T.mul(X, (1.0 - X)))
        self.activation_tracker = self.set_activation_tracker(
            activation_function)

        pass

    def set_dropout(self, dropout, activation_function):
        action_with_drop = None
        if dropout > 0:
            action_with_drop = lambda X: T.mul(activation_function(X), self.
                                               dropout_function)
            self.activation_cv_dropout = lambda X: T.mul(
                activation_function(X), self.dropout_function_cv)
        else:
            action_with_drop = activation_function
            self.activation_cv_dropout = activation_function

        return action_with_drop

    def set_activation_tracker(self, activation_function):
        '''Sets a tracker function that logs the activations that exceed 0.75.
        '''
        if activation_function == Activation_function.sigmoid:
            activation_tracker = lambda X: T.gt(activation_function(X), 0.75)
        else:
            activation_tracker = None
        return activation_tracker

    def create_dropout_sample_functions(self, reset=False):
        '''Creates functions of sample vectors which can be index with random
           integers to create a pseudo random sample for dropout. This greatly
           speeds up sampling as no new samples have to be created.
        '''
        if reset:
            self.dropout = self.dropout_init
            print 'Reset dropout to ' + str(self.dropout)

        self.dropout_function = None
        sample_function = None
        if self.dropout > 0:
            if self.dropout_type == Dropout.drop_activation:
                if reset:
                    self.bino_sample_vector.set_value(np.matrix(
                        np.float32(
                            np.random.binomial(1, 1 - self.dropout,
                                               (10000000, 1)))),
                                                      borrow=True)
                else:
                    self.bino_sample_vector = shared(np.matrix(
                        np.float32(
                            np.random.binomial(1, 1 - self.dropout,
                                               (10000000, 1)))),
                                                     'float32',
                                                     borrow=True)

                sample_function = lambda rand: g(
                    T.reshape(
                        self.bino_sample_vector[rand:rand +
                                                (self.batch_size * self.size)],
                        (self.batch_size, self.size)))
                sample_function_cv = lambda rand: g(
                    T.reshape(
                        self.bino_sample_vector[rand:rand +
                                                (4200 * self.size)],
                        (4200, self.size)))
                self.dropout_function = sample_function(
                    self.rdm.random_integers(low=0, high=self.sample_range))
                self.dropout_function_cv = sample_function_cv(
                    self.rdm.random_integers(low=0, high=self.sample_range))

    def handle_dropout_decay(self, epoch):
        '''Handles automatically the dropout decay by decreasing the dropout by
           the given amount after the given number of epochs.
        '''
        if self.dropout_function and self.frequency[
                self.drop_count] > 0 and epoch % self.frequency[
                    self.drop_count] == 0 and epoch > 0:
            print 'Setting dropout from  ' + str(self.dropout) + ' to ' + str(
                np.float32(self.dropout *
                           (1 - self.dropout_decay[self.drop_count])))

            self.dropout = np.float32(
                self.dropout * (1 - self.dropout_decay[self.drop_count]))

            if self.dropout_type == Dropout.drop_activation:
                self.bino_sample_vector.set_value(np.matrix(
                    np.float32(
                        np.random.binomial(1, 1 - self.dropout,
                                           (10000000, 1)))),
                                                  borrow=True)
            self.drop_count += 1
            if self.drop_count > len(self.dropout_decay) - 1:
                self.drop_count -= 1
Exemple #23
0
    def __init__(self, num_words, num_rels, vocab_embed_size, lr=0.01, tensor_activation=T.tanh, num_noise_samples=1, init_dense_vocab=None):
        numpy_rng = numpy.random.RandomState(89677)
        theano_rng = RandomStreams(12783)
        rng_box_limit = 4 * numpy.sqrt(6. / (vocab_embed_size + vocab_embed_size + num_rels))
        rng_box_low = 0
        rng_box_high = rng_box_limit
        init_box = numpy.asarray(numpy_rng.uniform(low=rng_box_low, high=rng_box_high, size=(vocab_embed_size, vocab_embed_size, num_rels)))
        rng_proj_low = -4 * numpy.sqrt(6. / (num_words + vocab_embed_size))
        rng_proj_high = 4 * numpy.sqrt(6. / (num_words + vocab_embed_size))
        if init_dense_vocab is None:
            init_dense_vocab = numpy.asarray(numpy_rng.uniform(low=rng_proj_low, high=rng_proj_high, size=(num_words, vocab_embed_size)))
        init_rev_dense_vocab = numpy.asarray(numpy_rng.uniform(low=rng_proj_low, high=rng_proj_high, size=(vocab_embed_size, num_words)))
        self.B = theano.shared(value=init_box, name='B')
        self.P = theano.shared(value=init_dense_vocab, name='P')
        self.P_hat = theano.shared(value=init_rev_dense_vocab, name='P_hat')
        self.vocab = T.eye(num_words)
        word_activation = T.nnet.softmax
        self.rel = T.eye(num_rels)
        rel_activation = T.nnet.softmax

        self.lr = lr

        self.x_ind, self.y_ind, self.r_ind = T.iscalars('x_ind', 'y_ind', 'r_ind')
        x = self.vocab[self.x_ind]
        self.x_rep = T.dot(x, self.P)
        y = self.vocab[self.y_ind]
        self.y_rep = T.dot(y, self.P)
        r = self.rel[self.r_ind]
        # Assumption: Corresponding dimensions: 0 -> x, 1 -> y, 2 -> r
        # TODO: Where do we apply activations? Do we have to, at all?
        pred_xy = tensor_activation(T.tensordot(r, self.B, axes=(0,2)))
        pred_y = T.dot(T.tensordot(self.x_rep, pred_xy, axes=(0,0)), self.P_hat)
        self.prob_y = word_activation(pred_y)
        pred_x = T.dot(T.tensordot(self.y_rep, pred_xy, axes=(0,1)), self.P_hat)
        self.prob_x = word_activation(pred_x)
        pred_yr = tensor_activation(T.tensordot(self.x_rep, self.B, axes=(0,0)))
        self.prob_r = rel_activation(T.tensordot(self.y_rep, pred_yr, axes=(0,0)))

        self.score = T.dot(y, T.dot(T.tensordot(self.x_rep, T.tensordot(r, self.B, axes=(0,2)), axes=(0,0)), self.P_hat).T)
        # y \times (((x \times P) \times (r \otimes B)) \times P_hat)
        rand_margin_score = T.constant(0)
        noise_log_likelihood = T.constant(0)
        # The noise distribution is one where words and the relation are independent of each other.  The probability of the right tuple and the corrupted tuple are both equal in this distribution.
        noise_prob = num_noise_samples/float(num_words * num_words * num_rels)
        rand_x_ind = theano_rng.random_integers(low=0, high=num_words-1)
        rand_y_ind = theano_rng.random_integers(low=0, high=num_words-1)
        rand_r_ind = theano_rng.random_integers(low=0, high=num_rels-1)
        rand_x = self.vocab[rand_x_ind]
        rand_x_rep = T.dot(rand_x, self.P)
        rand_y = self.vocab[rand_y_ind]
        rand_y_rep = T.dot(rand_y, self.P)
        rand_r = self.rel[rand_r_ind]
        rand_score = T.dot(rand_y, T.dot(T.tensordot(rand_x_rep, T.tensordot(rand_r, self.B, axes=(0,2)), axes=(0,0)), self.P_hat).T)
        for _ in range(num_noise_samples):
            rand_margin_score += rand_score
            noise_log_likelihood += T.log(noise_prob/(T.abs_(rand_score) + noise_prob))
        self.nce_margin_loss = T.maximum(0, 1 - self.score + rand_margin_score)
        
        # NCE negative log likelihood:-1 * {log(score/(score + num_noise_samples*noise_prob)) + \sum_{i=1}^k (log(noise_prob/(rand_score + noise_prob)))}
        self.nce_prob_loss = -(T.log(T.abs_(self.score)/(T.abs_(self.score) + noise_prob)) + noise_log_likelihood)
        self.cost_inputs = [self.x_ind, self.y_ind, self.r_ind]
        self.params = [self.B, self.P, self.P_hat]

        self.x_loss = self.ce(x, self.prob_x)
        self.y_loss = self.ce(y, self.prob_y)
        self.r_loss = self.ce(r, self.prob_r)
Exemple #24
0
    def __init__(self, numargs, embed_size, pred_vocab_size, arg_vocab_size, initial_pred_rep=None, initial_arg_rep = None, margin = 5, lr=0.01, activation=T.nnet.sigmoid):
        numpy_rng = numpy.random.RandomState(12345)
        theano_rng = RandomStreams(54321)
        self.lr = lr
        #margin = 5
        # Initializing predicate representations
        if initial_pred_rep is not None:
            num_preds, pred_dim = initial_pred_rep.shape
            assert pred_vocab_size == num_arrays, "Initial predicate representation is not the same size as pred_vocab_size"
            assert embed_size == pred_dim, "Initial predicate representation does not have the same dimensionality as embed_size"
        else:
            initial_pred_rep_range = 4 * numpy.sqrt(6. / (pred_vocab_size + embed_size))
            initial_pred_rep = numpy.asarray(numpy_rng.uniform(low = -initial_pred_rep_range, high = initial_pred_rep_range, size = (pred_vocab_size, embed_size)))
            
        self.pred_rep = theano.shared(value=initial_pred_rep, name='P')
        
        # Initializing argument representations
        if initial_arg_rep is not None:
            arg_rep_len, arg_dim = initial_arg_rep.shape
            assert arg_vocab_size == arg_rep_len, "Initial argument representation is not the same size as arg_vocab_size"
            assert embed_size == arg_dim, "Initial argument representation does not have the same dimensionality as embed_size"
        else:
            initial_arg_rep_range = 4 * numpy.sqrt(6. / (arg_vocab_size + embed_size))
            initial_arg_rep = numpy.asarray(numpy_rng.uniform(low = -initial_arg_rep_range, high = initial_arg_rep_range, size = (arg_vocab_size, embed_size)))
            
        self.arg_rep = theano.shared(value=initial_arg_rep, name='A')
        
        # Initialize scorer
        scorer_dim = embed_size * (numargs + 1) # Predicate is +1
        initial_scorer_range = 4 * numpy.sqrt(6. / scorer_dim)
        initial_scorer = numpy.asarray(numpy_rng.uniform(low = -initial_scorer_range, high = initial_scorer_range, size = scorer_dim))
        self.scorer = theano.shared(value=initial_scorer, name='s')
        
        # Initialize indicator
        indicator_dim = embed_size * (numargs + 1) # Predicate is +1
        initial_indicator_range = 4 * numpy.sqrt(6. / (indicator_dim + numargs))
        initial_indicator = numpy.asarray(numpy_rng.uniform(low = -initial_indicator_range, high = initial_indicator_range, size = (indicator_dim, numargs)))
        self.indicator = theano.shared(value=initial_indicator, name='I')
        
        # Define symbolic pred-arg
        self.pred_ind = T.iscalar('p')
        self.arg_inds = T.iscalars(numargs)
        pred = self.pred_rep[self.pred_ind].reshape((1, embed_size))
        args = self.arg_rep[self.arg_inds].reshape((1, embed_size * numargs))
        pred_arg = activation(T.concatenate([pred, args], axis=1))
        
        # Define symbolic rand pred-arg for training scorer
        rand_pred_ind = theano_rng.random_integers(low=0, high=pred_vocab_size-1)
        rand_arg_inds = theano_rng.random_integers([1, numargs], low=0, high=arg_vocab_size-1)
        rand_pred = self.pred_rep[rand_pred_ind].reshape((1, embed_size))
        rand_args = self.arg_rep[rand_arg_inds].reshape((1, embed_size * numargs))
        rand_pred_arg = activation(T.concatenate([rand_pred, rand_args], axis=1))

        # Define symbolic pred_rand-arg for training indicator
        pred_rand_arg = activation(T.concatenate([pred, rand_args], axis=1))

        # Define scores and loss
        self.corr_score = T.sum(T.dot(pred_arg, self.scorer))
        rand_score = T.sum(T.dot(rand_pred_arg, self.scorer))
        self.margin_loss = T.maximum(0, margin - self.corr_score + rand_score)
        
        # Define indicator values and loss
        orig_ind_labels = T.constant(numpy.zeros(numargs))
        self.indicator_pred = T.nnet.sigmoid(T.dot(pred_arg, self.indicator))
        rand_ind_labels = T.constant(numpy.ones(numargs))
        rand_indicator_pred = T.nnet.sigmoid(T.dot(pred_rand_arg, self.indicator))
        self.indicator_loss = T.mean((self.indicator_pred - orig_ind_labels) ** 2) + T.mean((rand_indicator_pred - rand_ind_labels) ** 2)
        
        # Define params and inputs
        self.score_params = [self.pred_rep, self.arg_rep, self.scorer]
        self.indicator_params = [self.pred_rep, self.arg_rep, self.indicator]
        self.score_ind_inputs = [self.pred_ind] + list(self.arg_inds)
Exemple #25
0
    def __init__(self,
                 cooccurrence,
                 z_k,
                 opt,
                 pz_weight_regularizer=None,
                 pz_regularizer=None,
                 eps=1e-8,
                 scale=1e-2,
                 beta=0.01,
                 batch_gibbs=True):
        srng = RandomStreams(123)
        cooccurrence = cooccurrence.astype(np.float32)
        self.cooccurrence = cooccurrence
        self.z_k = z_k
        self.opt = opt
        x_k = cooccurrence.shape[0]
        self.x_k = x_k
        self.pz_weight_regularizer = pz_weight_regularizer
        self.pz_regularizer = pz_regularizer
        self.batch_gibbs = batch_gibbs

        # cooccurrence matrix
        n = np.sum(cooccurrence, axis=None)
        _co = cooccurrence / n
        co = T.constant(_co, name="co")  # (x_k, x_k)
        _co_m = np.sum(_co, axis=1, keepdims=True)
        co_m = T.constant(_co_m, name="co_m")  # (x_k,1)
        _co_c = _co / (eps + _co_m)
        _co_h = np.sum(_co * -np.log(eps + _co_c), axis=1,
                       keepdims=True)  # (x_k, 1)
        print "H(Y|X): {}".format(np.sum(_co_h))
        co_h = T.constant(_co_h, name="co_h")

        # parameters
        # P(z1=k,z2=k)
        tril = np.tril_indices(n=x_k, k=-1)
        initial_param = np.random.normal(loc=0,
                                         scale=scale,
                                         size=(tril[0].shape[0], )).astype(
                                             np.float32)
        param = K.variable(initial_param, name="param", dtype='float32')
        pz = T.zeros((x_k, x_k))
        pz = T.set_subtensor(pz[tril], param)
        pz += T.transpose(pz, (1, 0))  # symmetric
        pz = T.nnet.sigmoid(pz)  # (x_k, x_k) squash
        params = [param]

        # current sample
        initial_sample = np.random.random_integers(low=0,
                                                   high=z_k - 1,
                                                   size=(x_k, )).astype(
                                                       np.int32)
        current_sample = K.variable(initial_sample,
                                    name="current_sample",
                                    dtype='int32')
        current_oh = tensor_one_hot(current_sample, k=z_k)  # (x_k, z_k)

        # probability of sample
        matches = T.eq(current_sample.dimshuffle((0, 'x')),
                       current_sample.dimshuffle(('x', 0)))  # (x_k, x_k)
        p1 = T.nnet.sigmoid(param)
        p2 = matches[tril]
        lp = (p2 * T.log(eps + p1)) + (
            (1. - p2) * T.log(eps + 1 - p1))  # (tril,)
        sample_logp = T.sum(lp)

        # gibbs sampling
        if batch_gibbs:
            idx = T.ivector()
            pzidx = pz[idx, :]  # (n, x_k)
            current_masked = T.set_subtensor(current_oh[idx,
                                                        current_sample[idx]],
                                             0)  # (x_k, z_k)
            # todo: test this p calculation
            e_add = T.dot(
                T.log(eps + pzidx) - T.log(eps + 1 - pzidx),
                current_masked)  # (n, z_k)
            #e_add = T.dot(T.log(eps + pzidx), current_masked)  # (n, z_k)
            p_add = softmax_nd(e_add)
            cs = T.cumsum(p_add, axis=1)
            rnd = srng.uniform(low=0., high=1., size=(idx.shape[0], ))
            bucket = T.sum(T.gt(rnd.dimshuffle((0, 'x')), cs), axis=1)  # (n,)
            bucket = T.clip(bucket, 0, z_k - 1)  # (n,)
            new_sample = T.set_subtensor(current_sample[idx], bucket)
            gibbs_updates = [(current_sample, new_sample)]
            self.gibbs_fun = theano.function([idx], [], updates=gibbs_updates)
        else:
            idx = srng.random_integers(low=0, high=x_k - 1)  # scalar
            pzidx = pz[idx, :]  # (x_k,)
            current_masked = T.set_subtensor(current_oh[idx,
                                                        current_sample[idx]],
                                             0)  # (x_k, z_k)
            # todo: test this p calculation
            e_add = T.dot(
                T.log(eps + pzidx) - T.log(eps + 1 - pzidx),
                current_masked)  # (Z_k,)
            p_add = softmax_nd(e_add)
            cs = T.cumsum(p_add)
            rnd = srng.uniform(low=0., high=1.)
            bucket = T.sum(T.gt(rnd, cs))
            bucket = T.clip(bucket, 0, z_k - 1)
            new_sample = T.set_subtensor(current_sample[idx], bucket)
            gibbs_updates = [(current_sample, new_sample)]
            self.gibbs_fun = theano.function([], [], updates=gibbs_updates)

        # loss of sample
        p_b = T.dot(T.transpose(current_oh, (1, 0)), co)  # (z_k, x_k)
        marg = T.sum(p_b, axis=1, keepdims=True)  # (z_k, 1)
        cond = p_b / (marg + eps)  # (z_k, x_k)
        current_nll = T.sum(p_b * -T.log(eps + cond), axis=None)  # scalar
        current_nll = theano.gradient.zero_grad(current_nll)

        avg_nll = K.variable(0., name='avg_nll', dtype='float32')
        new_avg = ((1. - beta) * avg_nll) + (beta * current_nll)
        avg_updates = [(avg_nll, new_avg)]

        # REINFORCE
        glp = T.grad(sample_logp, param)
        # todo: check sign
        sampled_grad = -(current_nll - avg_nll) * glp

        self.regularize = False

        assert isinstance(opt, keras.optimizers.Optimizer)

        def get_gradients(loss, params):
            assert len(params) == 1
            assert params[0] == param
            return [sampled_grad]

        opt.get_gradients = get_gradients
        updates = opt.get_updates(loss=current_nll, params=params)

        self.val_fun = theano.function([], current_nll)
        self.encodings_fun = theano.function([], current_sample)  # (x_k,)
        self.train_fun = theano.function([],
                                         current_nll,
                                         updates=updates + avg_updates)
        self.weights = params + opt.weights + [current_sample, avg_nll]

        t = self.calc_utilization()
Exemple #26
0
    def __init__(self,
                 numargs,
                 embed_size,
                 pred_vocab_size,
                 arg_vocab_size,
                 initial_pred_rep=None,
                 initial_arg_rep=None,
                 margin=5,
                 lr=0.01,
                 activation=T.nnet.sigmoid):
        numpy_rng = numpy.random.RandomState(12345)
        theano_rng = RandomStreams(54321)
        self.lr = lr
        #margin = 5
        # Initializing predicate representations
        if initial_pred_rep is not None:
            num_preds, pred_dim = initial_pred_rep.shape
            assert pred_vocab_size == num_arrays, "Initial predicate representation is not the same size as pred_vocab_size"
            assert embed_size == pred_dim, "Initial predicate representation does not have the same dimensionality as embed_size"
        else:
            initial_pred_rep_range = 4 * numpy.sqrt(
                6. / (pred_vocab_size + embed_size))
            initial_pred_rep = numpy.asarray(
                numpy_rng.uniform(low=-initial_pred_rep_range,
                                  high=initial_pred_rep_range,
                                  size=(pred_vocab_size, embed_size)))

        self.pred_rep = theano.shared(value=initial_pred_rep, name='P')

        # Initializing argument representations
        if initial_arg_rep is not None:
            arg_rep_len, arg_dim = initial_arg_rep.shape
            assert arg_vocab_size == arg_rep_len, "Initial argument representation is not the same size as arg_vocab_size"
            assert embed_size == arg_dim, "Initial argument representation does not have the same dimensionality as embed_size"
        else:
            initial_arg_rep_range = 4 * numpy.sqrt(
                6. / (arg_vocab_size + embed_size))
            initial_arg_rep = numpy.asarray(
                numpy_rng.uniform(low=-initial_arg_rep_range,
                                  high=initial_arg_rep_range,
                                  size=(arg_vocab_size, embed_size)))

        self.arg_rep = theano.shared(value=initial_arg_rep, name='A')

        # Initialize scorer
        scorer_dim = embed_size * (numargs + 1)  # Predicate is +1
        initial_scorer_range = 4 * numpy.sqrt(6. / scorer_dim)
        initial_scorer = numpy.asarray(
            numpy_rng.uniform(low=-initial_scorer_range,
                              high=initial_scorer_range,
                              size=scorer_dim))
        self.scorer = theano.shared(value=initial_scorer, name='s')

        # Initialize indicator
        indicator_dim = embed_size * (numargs + 1)  # Predicate is +1
        initial_indicator_range = 4 * numpy.sqrt(6. /
                                                 (indicator_dim + numargs))
        initial_indicator = numpy.asarray(
            numpy_rng.uniform(low=-initial_indicator_range,
                              high=initial_indicator_range,
                              size=(indicator_dim, numargs)))
        self.indicator = theano.shared(value=initial_indicator, name='I')

        # Define symbolic pred-arg
        self.pred_ind = T.iscalar('p')
        self.arg_inds = T.iscalars(numargs)
        pred = self.pred_rep[self.pred_ind].reshape((1, embed_size))
        args = self.arg_rep[self.arg_inds].reshape((1, embed_size * numargs))
        pred_arg = activation(T.concatenate([pred, args], axis=1))

        # Define symbolic rand pred-arg for training scorer
        rand_pred_ind = theano_rng.random_integers(low=0,
                                                   high=pred_vocab_size - 1)
        rand_arg_inds = theano_rng.random_integers([1, numargs],
                                                   low=0,
                                                   high=arg_vocab_size - 1)
        rand_pred = self.pred_rep[rand_pred_ind].reshape((1, embed_size))
        rand_args = self.arg_rep[rand_arg_inds].reshape(
            (1, embed_size * numargs))
        rand_pred_arg = activation(
            T.concatenate([rand_pred, rand_args], axis=1))

        # Define symbolic pred_rand-arg for training indicator
        pred_rand_arg = activation(T.concatenate([pred, rand_args], axis=1))

        # Define scores and loss
        self.corr_score = T.sum(T.dot(pred_arg, self.scorer))
        rand_score = T.sum(T.dot(rand_pred_arg, self.scorer))
        self.margin_loss = T.maximum(0, margin - self.corr_score + rand_score)

        # Define indicator values and loss
        orig_ind_labels = T.constant(numpy.zeros(numargs))
        self.indicator_pred = T.nnet.sigmoid(T.dot(pred_arg, self.indicator))
        rand_ind_labels = T.constant(numpy.ones(numargs))
        rand_indicator_pred = T.nnet.sigmoid(
            T.dot(pred_rand_arg, self.indicator))
        self.indicator_loss = T.mean(
            (self.indicator_pred - orig_ind_labels)**2) + T.mean(
                (rand_indicator_pred - rand_ind_labels)**2)

        # Define params and inputs
        self.score_params = [self.pred_rep, self.arg_rep, self.scorer]
        self.indicator_params = [self.pred_rep, self.arg_rep, self.indicator]
        self.score_ind_inputs = [self.pred_ind] + list(self.arg_inds)
Exemple #27
0
rn_b = srng.binomial(size=(3,),n=100,p=.7) #Say we want to generate an array of 3 independent binomial rvs

binom = function([], rn_b, no_default_updates=True)
print "First Binomial vector ", binom()
print "Second Binomial without changing random number generator", binom()

############Normal RV

rn_n = srng.normal(size=(), avg=0.0, std=2.3)
norm = function([],rn_n)
print "Single Normal ", norm()

#############Random integer list

rn_i = srng.random_integers(size = (4, ), low=1, high=900)
inte = function([], rn_i)
print "Integer list ", inte()

#############Generating a permutation unifromly at random

rn_p = srng.permutation(size=(), n = 10)
perm = function([], rn_p)
print "Random permutation of 0 to 9", perm()

#############choosing from a list randomly

rn_list = srng.choice(size=(), a=[2,3, 4.5, 6], replace=True, p=[.5, 0, .5, 0], dtype='float64')
lis = function([], rn_list)
print "Choosing 3 times from the specified list ", lis()
print lis()
Exemple #28
0
def selectRandomJ_(i, m):
    rstr = RandomStreams()
    # TODO: make somehow sure that the random integer is not i
    randint = rstr.random_integers(None, 0, m -1, ndim=0)
#    randint = tPrint("Taking random j: ")(randint)
    return randint
Exemple #29
0
    def build_graph(self):
        if self.seed == None:
            self.seed = numpy.random.randint(2**30)
        theano_rng = RandomStreams(self.seed)
        randstate = numpy.random.RandomState(self.seed)

        ##################
        ##Parameter setup
        ##################
        self.emb = theano.shared(
            (randstate.uniform(-1.0, 1.0, (self.n_entities, self.dim))).astype(
                theano.config.floatX))
        self.emb.tag.test_value = (randstate.uniform(
            -1.0, 1.0,
            (self.n_entities, self.dim))).astype(theano.config.floatX)

        self.a = theano.shared(
            numpy.asarray(self.init_a).astype(theano.config.floatX))
        self.b = theano.shared(
            numpy.asarray(self.init_b).astype(theano.config.floatX))

        self.params = [self.emb, self.a, self.b]

        if self.embedding_type == 'REAL_TRAINED':
            self.coef_defaults = [2.0, -.5, -.5, -.5, -.5]
            self.coefs = [
                theano.shared(
                    numpy.asarray(coef_default).astype(theano.config.floatX))
                for coef_default in self.coef_defaults
            ]
            self.params = self.params + self.coefs
        if self.embedding_type == 'REAL_TRAINED_L1':
            self.coef_defaults = [2.0, -.5, -.5, 0, 0]
            self.coefs = [
                theano.shared(
                    numpy.asarray(coef_default).astype(theano.config.floatX))
                for coef_default in self.coef_defaults
            ]
            self.params = self.params + self.coefs[:-2]

        ################
        ### Input setup!
        #################
        self.x1_idxs = T.ivector()
        self.x2_idxs = T.ivector()
        self.x1_idxs.tag.test_value = numpy.asarray([0, 1], dtype=numpy.int32)
        self.x2_idxs.tag.test_value = numpy.asarray([1, 2], dtype=numpy.int32)

        #generate negative samples
        choice = theano_rng.binomial(size=self.x1_idxs.shape)
        alternative = theano_rng.random_integers(size=self.x1_idxs.shape,
                                                 low=0,
                                                 high=self.n_entities - 1)
        self.x1_idxs_negative = T.switch(choice, self.x1_idxs, alternative)
        self.x2_idxs_negative = T.switch(choice, alternative, self.x2_idxs)

        ### Define graph from input to predictive loss
        def get_embed(index_tensor):
            #index_tensor: (samples)
            if self.parameterization == 'SIGMOID':
                return sigmoid(self.emb[index_tensor].reshape(
                    (index_tensor.shape[0], self.dim)))
            elif self.parameterization == 'DIRECT':
                return self.emb[index_tensor].reshape(
                    (index_tensor.shape[0], self.dim))

        self.x1_emb = get_embed(self.x1_idxs)
        self.x2_emb = get_embed(self.x2_idxs)
        self.x1neg_emb = get_embed(self.x1_idxs_negative)
        self.x2neg_emb = get_embed(self.x2_idxs_negative)

        def get_prob(embed_tensor1, embed_tensor2):
            #embed_tensorX: (n_batches,dim,*)
            if self.embedding_type == 'BIT':
                return sigmoid(
                    self.a * T.mean(embed_tensor1 * embed_tensor2 +
                                    (1 - embed_tensor1) * (1 - embed_tensor2),
                                    axis=1) + self.b)  #returns (n_batches,_,*)
            if self.embedding_type == 'BIT_INTERNALB':
                return sigmoid(
                    self.a *
                    (T.mean(2.0 * embed_tensor1 * embed_tensor2 -
                            embed_tensor1 - embed_tensor2 + 1.0,
                            axis=1) + self.b))  #returns (n_batches,_,*)
            if self.embedding_type == 'BIT_AND':
                return sigmoid(
                    self.a *
                    T.mean(2.0 * embed_tensor1 * embed_tensor2, axis=1) +
                    self.b)  #returns (n_batches,_,*)
            elif self.embedding_type == 'REAL':
                return sigmoid(
                    self.a * T.mean(2.0 * embed_tensor1 * embed_tensor2 -
                                    embed_tensor1**2 - embed_tensor2**2,
                                    axis=1) + self.b)  #returns (n_batches,_,*)
            elif self.embedding_type == 'REAL_INTERNALB':
                return sigmoid(
                    self.a *
                    (T.mean(2.0 * embed_tensor1 * embed_tensor2 -
                            embed_tensor1**2 - embed_tensor2**2,
                            axis=1) + self.b))  #returns (n_batches,_,*)
            elif self.embedding_type == 'REAL_SQRT':
                return sigmoid(self.a *
                               (T.mean(1.0 - (embed_tensor1 - embed_tensor2) *
                                       (embed_tensor1 - embed_tensor2),
                                       axis=1))**.5 +
                               self.b)  #returns (n_batches,_,*)
            elif self.embedding_type == 'REAL_L1':
                return sigmoid(self.a * (T.mean(
                    1.0 - T.abs_(embed_tensor1 - embed_tensor2), axis=1)) +
                               self.b)  #returns (n_batches,_,*)
            elif self.embedding_type == 'REAL_TRAINED' or self.embedding_type == 'REAL_TRAINED_L1':
                terms = [
                    embed_tensor1 * embed_tensor2, embed_tensor1,
                    embed_tensor2, embed_tensor1**2, embed_tensor2**2
                ]
                expr = sum(
                    [term * coef for term, coef in zip(terms, self.coefs)])
                return sigmoid(self.a * T.mean(expr, axis=1) + self.b)

        def get_prob_sampled(embed_tensor1, embed_tensor2, n_samples):
            randomizationA = theano_rng.uniform(
                size=(embed_tensor1.shape[0], embed_tensor1.shape[1],
                      n_samples))  #(n_batches,dim,val)
            randomizationB = theano_rng.uniform(
                size=(embed_tensor2.shape[0], embed_tensor2.shape[1],
                      n_samples))  #(n_batches,dim,val)
            bithash_1 = T.switch(
                T.lt(randomizationA, embed_tensor1.dimshuffle(0, 1, 'x')), 1,
                0)  #(val,dim)
            bithash_2 = T.switch(
                T.lt(randomizationB, embed_tensor2.dimshuffle(0, 1, 'x')), 1,
                0)  #(val,dim)
            return ([bithash_1, bithash_2], get_prob(bithash_1, bithash_2))

        def get_mean(embed_tensor1, embed_tensor2):
            return self.a * T.mean(
                2.0 * embed_tensor1 * embed_tensor2 - embed_tensor1 -
                embed_tensor2 + 1.0 + self.b,
                axis=1)

        def get_var(embed_tensor1, embed_tensor2):
            p = 2.0 * embed_tensor1 * embed_tensor2 - embed_tensor1 - embed_tensor2 + 1.0
            variances = p * (1 - p)
            total_var = T.sum(variances,
                              axis=1) * (self.a / T.shape(variances)[1])**2
            return total_var

        #build up list of sampling points and sampling weights, according to normal cdf approximation.
        #if objective_samples == None, stick to sub-optimal sampling scheme.
        def get_samples(embed_tensor1, embed_tensor2):
            if self.objective_samples == None:
                return [{
                    'weight': 1.0,
                    'value': get_prob(embed_tensor1, embed_tensor2)
                }]
            else:
                PhiInv = lambda z: 2**.5 * erfinv(2 * z - 1)
                means = get_mean(embed_tensor1, embed_tensor2)
                variances = get_var(embed_tensor1, embed_tensor2)
                # print (variances**.5).tag.test_value
                spacing = 1.0 / (self.objective_samples + 1)
                xs = []
                for i in range(1, self.objective_samples + 1):
                    sample = variances**.5 * PhiInv(float(i) * spacing) + means
                    xs.append({
                        'weight': 1.0 * spacing,
                        'value': sigmoid(sample)
                    })
                xs.append({
                    'weight':
                    0.5 * spacing,
                    'value':
                    sigmoid(variances**.5 * PhiInv(0.5 * spacing) + means)
                })
                xs.append({
                    'weight':
                    0.5 * spacing,
                    'value':
                    sigmoid(variances**.5 * PhiInv(1 - 0.5 * spacing) + means)
                })
                return xs

        pos_losses = [
            -sample['weight'] * T.mean(T.log(sample['value']))
            for sample in get_samples(self.x1_emb, self.x2_emb)
        ]
        neg_losses = [
            -sample['weight'] * T.mean(T.log(1 - sample['value']))
            for sample in get_samples(self.x1neg_emb, self.x2neg_emb)
        ]
        self.loss = sum(pos_losses + neg_losses)
        # for sample in get_samples(self.x1_emb,self.x2_emb):
        #     print "weight: ",sample['weight'], "value: ",sample['value'].tag.test_value
        # for x in pos_losses:
        #     print "pos loss test_value:",x.tag.test_value
        # for x in neg_losses:
        #     print "neg loss test_value:",x.tag.test_value
        #print "loss test value: ",self.loss.tag.test_value

        if self.n_samples != None:
            self.bithash_1s, self.bit_p1 = get_prob_sampled(
                self.x1_emb, self.x2_emb, self.n_samples)
            self.bithash_1s, self.bit_p2 = get_prob_sampled(
                self.x1neg_emb, self.x2neg_emb, self.n_samples)
            self.sampled_loss = T.mean(-T.log(self.bit_p1) -
                                       T.log(1 - self.bit_p2))
Exemple #30
0
    def __init__(self,
                 cooccurrence,
                 z_k,
                 opt,
                 initializer,
                 initial_pz_weight=None,
                 initial_b=None,
                 pz_regularizer=None,
                 eps=1e-9):
        cooccurrence = cooccurrence.astype(np.float32)
        self.cooccurrence = cooccurrence
        self.z_k = z_k
        self.opt = opt
        x_k = cooccurrence.shape[0]
        self.x_k = x_k

        # cooccurrence matrix
        n = np.sum(cooccurrence, axis=None)
        _co = cooccurrence / n
        co = T.constant(_co, name="co")  # (x_k, x_k)
        _co_m = np.sum(_co, axis=1, keepdims=True)
        co_m = T.constant(_co_m, name="co_m")  # (x_k,1)
        _co_c = _co / (eps + _co_m)
        _co_h = np.sum(_co * -np.log(eps + _co_c), axis=1,
                       keepdims=True)  # (x_k, 1)
        print "COh: {}".format(np.sum(_co_h))
        co_h = T.constant(_co_h, name="co_h")

        if initial_pz_weight is None:
            initial_pz_weight = initializer((x_k, z_k))
        pz_weight = K.variable(initial_pz_weight)
        pz = softmax_nd(pz_weight)
        initial_w = initializer((z_k, x_k))
        w = K.variable(initial_w, name="w")  # (z_k, x_k)
        if initial_b is None:
            initial_b = initializer((x_k, ))
        b = K.variable(initial_b, name="b")
        yw = softmax_nd(w + b)  # (z_k, x_k)
        srng = RandomStreams(123)
        zsamp = srng.random_integers(size=(x_k, ), low=0, high=z_k - 1)

        yt = yw[zsamp, :]  # (x_k, x_k)
        lt = -T.sum(co * T.log(eps + yt), axis=1)  # (x_k,)
        pt = pz[T.arange(pz.shape[0]), zsamp]
        assert lt.ndim == 1
        assert pt.ndim == 1
        nll_loss = T.sum(pt * lt, axis=None) * z_k

        self.params = [pz_weight, w, b]
        reg_loss = T.constant(0.)
        if pz_regularizer:
            reg_loss = pz_regularizer(pz)
        total_loss = nll_loss + reg_loss

        encoding = T.argmax(pz_weight, axis=1)
        one_hot_encoding = tensor_one_hot(encoding, z_k)  # (x_k, z_k)

        pb = T.dot(T.transpose(one_hot_encoding, (1, 0)), co)
        m = T.sum(pb, axis=1, keepdims=True)
        c = pb / (m + eps)
        validation_nll = -T.sum(pb * T.log(eps + c), axis=None)

        utilization = T.sum(T.gt(T.sum(one_hot_encoding, axis=0), 0), axis=0)
        updates = opt.get_updates(loss=total_loss, params=self.params)

        self.val_fun = theano.function([], [validation_nll, utilization])
        self.encodings_fun = theano.function([], encoding)
        self.train_fun = theano.function([], [reg_loss, nll_loss, total_loss],
                                         updates=updates)
        self.weights = self.params + opt.weights
Exemple #31
0
class DropModality(Layer):
    '''
	drop a modality alltogether
	'''
    def __init__(self, input_shapes=[], **kwargs):
        self.trng = RandomStreams(seed=np.random.randint(10e6))
        self.params = []
        self.input_shapes = input_shapes

    def set_prev_shape(self, input_shapes):
        self.input_shapes = input_shapes

    def get_output(self, train=False):

        X = self.get_input(train)

        full = T.ones_like(X)
        masks = [full]

        for i in xrange(len(self.input_shapes)):
            mask = T.ones_like(X)
            idx = 0
            for j in xrange(len(self.input_shapes)):
                if i == j:
                    try:
                        ishape = len(self.input_shapes[0])
                    except:
                        ishape = [1]
                        pass
                    if len(ishape) == 3:
                        mask = T.set_subtensor(
                            mask[:, :, idx:idx + self.input_shapes[j]], 0)
                    elif len(ishape) == 2:
                        mask = T.set_subtensor(
                            mask[:, idx:idx + self.input_shapes[j]], 0)
                    elif len(ishape) == 1:
                        mask = T.set_subtensor(
                            mask[idx:idx + self.input_shapes[j]], 0)
                    else:
                        raise NotImplementedError()
                idx = idx + self.input_shapes[j]
            masks += [mask]
        masked = T.stack(masks)

        if train:
            index = self.trng.random_integers(size=(1, ),
                                              low=0,
                                              high=len(masks) - 1)[0]
        else:
            index = 0
        masked_output = X * masked[index]
        return masked_output

    def get_masked(self, train=False):
        X = self.get_input(train)

        full = T.ones_like(X)
        masks = [full]

        for i in xrange(len(self.input_shapes)):
            mask = T.ones_like(X)
            idx = 0
            for j in xrange(len(self.input_shapes)):
                if i == j:
                    mask = T.set_subtensor(
                        mask[:, :, idx:idx + self.input_shapes[j]], 0)
                idx = idx + self.input_shapes[j]
            masks += [mask]
        masked = T.stack(masks)
        index = self.trng.random_integers(size=(1, ),
                                          low=0,
                                          high=len(masks) - 1)[0]

        return masked, index

    def get_input_shapes(self):
        return self.input_shapes

    def get_config(self):
        config = {
            "name": self.__class__.__name__,
            "input_shapes": self.input_shapes
        }
        base_config = super(DropModality, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))
Exemple #32
0
class DropModality(Layer):
	'''
	drop a modality alltogether
	'''
	def __init__(self, input_shapes = [], **kwargs):
		self.trng = RandomStreams(seed=np.random.randint(10e6))
		self.params = []
		self.input_shapes = input_shapes

	def set_prev_shape(self, input_shapes):
		self.input_shapes = input_shapes

	def get_output(self, train=False):

		X = self.get_input(train)

		full = T.ones_like(X)
		masks = [full]

		for i in xrange(len(self.input_shapes)):
			mask = T.ones_like(X)
			idx = 0
			for j in xrange(len(self.input_shapes)):
				if i == j:
					try:
						ishape = len(self.input_shapes[0])
					except:
						ishape = [1]
						pass
					if len(ishape)  == 3:
						mask = T.set_subtensor(mask[:,:,idx : idx+ self.input_shapes[j]], 0)
					elif len(ishape) == 2:
						mask = T.set_subtensor(mask[:,idx : idx+ self.input_shapes[j]], 0)
					elif len(ishape) == 1:
						mask = T.set_subtensor(mask[idx : idx+ self.input_shapes[j]], 0)
					else:
						raise NotImplementedError()
				idx =  idx + self.input_shapes[j]
			masks += [mask]
		masked = T.stack(masks)

		if train:
			index  = self.trng.random_integers(size=(1,),low = 0, high = len(masks)-1)[0]
		else:
			index = 0
		masked_output = X * masked[index]
		return masked_output

	def get_masked(self, train=False):
		X = self.get_input(train)

		full = T.ones_like(X)
		masks = [full]

		for i in xrange(len(self.input_shapes)):
			mask = T.ones_like(X)
			idx = 0
			for j in xrange(len(self.input_shapes)):
				if i == j:
					mask = T.set_subtensor(mask[:,:,idx : idx+ self.input_shapes[j]], 0)
				idx =  idx + self.input_shapes[j]
			masks += [mask]
		masked = T.stack(masks)
		index  = self.trng.random_integers(size=(1,),low = 0, high = len(masks)-1)[0]

		return masked, index

	def get_input_shapes(self):
		return self.input_shapes

	def get_config(self):
		config = {"name": self.__class__.__name__,
				  "input_shapes" : self.input_shapes
				  }
		base_config = super(DropModality, self).get_config()
		return dict(list(base_config.items()) + list(config.items()))
Exemple #33
0
import theano.tensor as T
from theano.tensor.shared_randomstreams import RandomStreams


floatX = theano.config.floatX

vocabularySize = 10
embeddingSize = 10
contextSize = 2
samples = 10
wordIndices = T.ivector('wordIndices')

defaultEmbeddings = np.arange(0, vocabularySize * embeddingSize).reshape((vocabularySize, embeddingSize)).astype(floatX)

embeddings = theano.shared(defaultEmbeddings, name='embeddings', borrow=True)

random = RandomStreams(seed=234)
negativeSampleIndices = random.random_integers((contextSize * samples,), 0, vocabularySize - 1)

indicies = T.concatenate([wordIndices, negativeSampleIndices])
indicies = indicies.reshape((samples + 1, contextSize))

output = embeddings[indicies]
output = output.mean(axis=1)

getEmbeddings = theano.function(
    inputs=[wordIndices],
    outputs=output
)

print getEmbeddings(range(0, contextSize))
Exemple #34
0
uniform_sample = shared(np.matrix(np.float32(np.random.rand(10000000,1))),'float32', borrow=True)
bino_input = shared(np.matrix(np.float32(np.random.binomial(1,1-dropout_input,(10000000,1)))),config.floatX, borrow=True)


t1 = T.fmatrix("t1")
a1 = T.fmatrix("a1")
e1 = T.fmatrix("e1")
idx = T.iscalar("idx")
bsize = T.fscalar("bsize")
alpha = T.fscalar("alpha")
cv_size = T.fscalar("cv_size")


drop_input = lambda rand: T.reshape(bino_input[rand:rand + (batch_size*dim_visible)],(batch_size,dim_visible))
input_drop = drop_input(rdm.random_integers(low=0, high=sample_range_dropout))

h = T.nnet.sigmoid(T.add(T.dot(v,w_vh),w_h))


u_w_plus = function([],updates=[(wu_vh, g(T.add(wu_vh,T.dot(v.T,h)))),
                            (wu_v,  g(T.add(T.sum(v[:],axis=0),wu_v))),
                            (wu_h, g(T.add(T.sum(h[:],axis=0),wu_h)))
                            ])

u_w_minus = function([],updates=[(wu_vh, g(T.sub(wu_vh,T.dot(v.T,h)))),
                            (wu_v,  g(T.sub(T.sum(v[:],axis=0),wu_v))),
                            (wu_h, g(T.sub(T.sum(h[:],axis=0),wu_h)))
                            ])

sample = lambda rdm: T.reshape(uniform_sample[rdm:rdm + (dim_hidden*batch_size)],(batch_size,dim_hidden))
class Neural_network_layer:
    '''Represents the units within a layer and the units
       activations and dropout functions.
    '''
    
    def __init__(self, size, activation_function, dropout_type, dropout, dropout_decay, batch_size, frequency):
        
        
        self.drop_count = 0
        self.size = size  
        self.frequency = frequency
        self.dropout = dropout    
        self.dropout_init = dropout    
        self.dropout_decay = dropout_decay  
        self.dropout_type = dropout_type    
        self.rdm = RandomStreams(seed=1234)  
        self.batch_size = batch_size   
        self.sample_range = 100000       
        self.create_dropout_sample_functions()  
        self.activation_crossvalidation = activation_function 
        self.activation_function = self.set_dropout(dropout, activation_function)
        self.activation_derivative = lambda X: g(T.mul(X, (1.0 - X)))   
        self.activation_tracker = self.set_activation_tracker(activation_function)             
        
        pass
    
    
    def set_dropout(self, dropout, activation_function):
        action_with_drop = None
        if dropout > 0:
            action_with_drop = lambda X: T.mul(activation_function(X),self.dropout_function)            
            self.activation_cv_dropout = lambda X: T.mul(activation_function(X),self.dropout_function_cv)
        else:
            action_with_drop = activation_function
            self.activation_cv_dropout = activation_function
            
        return action_with_drop
     
    def set_activation_tracker(self, activation_function): 
        '''Sets a tracker function that logs the activations that exceed 0.75.
        '''
        if activation_function == Activation_function.sigmoid:
            activation_tracker = lambda X: T.gt(activation_function(X),0.75)
        else:
            activation_tracker = None
        return activation_tracker
    
    def create_dropout_sample_functions(self, reset = False):
        '''Creates functions of sample vectors which can be index with random
           integers to create a pseudo random sample for dropout. This greatly
           speeds up sampling as no new samples have to be created.
        '''
        if reset:
            self.dropout = self.dropout_init
            print 'Reset dropout to ' + str(self.dropout)
        
        self.dropout_function = None
        sample_function = None
        if self.dropout > 0:
            if self.dropout_type == Dropout.drop_activation:
                if reset:
                    self.bino_sample_vector.set_value(np.matrix(np.float32(
                                        np.random.binomial(1,1-self.dropout,(10000000,1)))),
                                        borrow=True) 
                else:
                    self.bino_sample_vector = shared(np.matrix(np.float32(
                                            np.random.binomial(1,1-self.dropout,(10000000,1)))),
                                            'float32', borrow=True) 
            
                sample_function = lambda rand: g(T.reshape(self.bino_sample_vector[rand:rand + (self.batch_size*self.size)],(self.batch_size,self.size)))
                sample_function_cv = lambda rand: g(T.reshape(self.bino_sample_vector[rand:rand + (4200*self.size)],(4200,self.size)))
                self.dropout_function = sample_function(self.rdm.random_integers(low=0, high=self.sample_range))  
                self.dropout_function_cv = sample_function_cv(self.rdm.random_integers(low=0, high=self.sample_range))  
             
                
    def handle_dropout_decay(self, epoch):
        '''Handles automatically the dropout decay by decreasing the dropout by
           the given amount after the given number of epochs.
        '''
        if self.dropout_function and self.frequency[self.drop_count] > 0 and epoch % self.frequency[self.drop_count] == 0  and epoch > 0:
            print 'Setting dropout from  '  + str(self.dropout)  + ' to ' + str(np.float32(self.dropout*(1-self.dropout_decay[self.drop_count])))   
            
            self.dropout = np.float32(self.dropout*(1-self.dropout_decay[self.drop_count]))       
            
            if self.dropout_type == Dropout.drop_activation:
                self.bino_sample_vector.set_value(np.matrix(np.float32( 
                                        np.random.binomial(1,1-self.dropout,(10000000,1)))),
                                        borrow=True) 
            self.drop_count += 1   
            if self.drop_count > len(self.dropout_decay)-1:
                self.drop_count -= 1
Exemple #36
0

shared_random_generator = RandomStreams()

x_r = T.iscalar()
y_r = T.iscalar()
p_scalar = T.fscalar('p_scalar')
binomial_f = theano.function([x_r, y_r, p_scalar], outputs=shared_random_generator.
                             binomial(size=(x_r, y_r), n=1, p=p_scalar, dtype='float32'))

rows = T.iscalar()
columns = T.iscalar()
uniform_f = theano.function([rows, columns], outputs=shared_random_generator.
                            uniform(size=(rows, columns), low=-0.1, high=0.1, dtype='float32'))

random_f = theano.function([rows, columns], outputs=shared_random_generator.random_integers(
    size=(rows, columns), low=0, high=10000, dtype='float32')/10000.)


def get_random_input(in_dim):
    return 2 * binomial_f(1, in_dim, 0.5) - np.ones((1, in_dim), dtype=np.float32)


def set_contains_pattern(patterns_set, pattern):
    for pat in patterns_set:
        if get_pattern_correlation(pat, pattern) == 1:
            return True
    return False

pat1 = T.fmatrix()
pat2 = T.fmatrix()
get_pattern_correlation = theano.function([pat1, pat2], outputs=T.sum(pat1 * pat2)/(pat1.shape[0] * pat1.shape[1]))
Exemple #37
0
    np.float32(np.random.binomial(1, 1 - dropout_input, (10000000, 1)))),
                    config.floatX,
                    borrow=True)

t1 = T.fmatrix("t1")
a1 = T.fmatrix("a1")
e1 = T.fmatrix("e1")
idx = T.iscalar("idx")
bsize = T.fscalar("bsize")
alpha = T.fscalar("alpha")
cv_size = T.fscalar("cv_size")

drop_input = lambda rand: T.reshape(
    bino_input[rand:rand + (batch_size * dim_visible)],
    (batch_size, dim_visible))
input_drop = drop_input(rdm.random_integers(low=0, high=sample_range_dropout))

h = T.nnet.sigmoid(T.add(T.dot(v, w_vh), w_h))

u_w_plus = function([],
                    updates=[(wu_vh, g(T.add(wu_vh, T.dot(v.T, h)))),
                             (wu_v, g(T.add(T.sum(v[:], axis=0), wu_v))),
                             (wu_h, g(T.add(T.sum(h[:], axis=0), wu_h)))])

u_w_minus = function([],
                     updates=[(wu_vh, g(T.sub(wu_vh, T.dot(v.T, h)))),
                              (wu_v, g(T.sub(T.sum(v[:], axis=0), wu_v))),
                              (wu_h, g(T.sub(T.sum(h[:], axis=0), wu_h)))])

sample = lambda rdm: T.reshape(
    uniform_sample[rdm:rdm + (dim_hidden * batch_size)],
Exemple #38
0
x = T.scalar()
z = g * x
gradVal = T.grad(z, x)
f = theano.function([x], gradVal)

#With Scan
def step(lastVal, xval):
    return (g * xval)
outputs, updates = theano.scan(step, sequences = [], non_sequences = [x], outputs_info = [1.0], n_steps = 5)
gradVal = T.grad(outputs[-1], x)
f = theano.function([x], outputs = gradVal)
print f(1), f(1), f(1), f(1)

exit(0)


if __name__ == '__main__':
    rng = RandomStreams(0)

    x = T.vector('x')
    xx = x ** 2
    y = xx[rng.random_integers()]

    dy = T.grad(y, x)
    fdy = function([x], dy)
    for i in range(100):
        print fdy([1, 1])



Exemple #39
0
	def __init__(self, train_x, train_y, valid_x, valid_y, test_x, test_y, batchSize):
		rng = numpy.random.RandomState(42)

		self.train_x = theano.shared(train_x.astype('float32'))
		self.train_y = theano.shared(train_y.astype('int32'))
		self.valid_x = theano.shared(valid_x.astype('float32')).reshape((valid_x.shape[0],1,28,28))
		self.valid_y = theano.shared(valid_y.astype('int32'))
		self.test_x = theano.shared(test_x.astype('float32')).reshape((test_x.shape[0],1,28,28))
		self.test_y = theano.shared(test_y.astype('int32'))

		x = T.matrix()
		y = T.ivector()
		index = T.lscalar()
		learningRate = T.scalar()
		L1_reg = 0.0
		L2_reg = 0.0

		random_stream = RandomStreams(seed=420)
		indices = random_stream.random_integers((batchSize,), low=0, high=train_x.shape[0]-1)
		x = self.train_x.take(indices, axis=0)
		y = self.train_y.take(indices, axis=0)

		layer0Input = x.reshape((batchSize,1,28,28))

		layer0 = ConvPoolLayer(
			rng=rng,
			input=layer0Input,
			filter_shape=(64,1,3,3),
			image_shape=(None,1,28,28),
			poolsize=(2,2)
		)

		layer1 = ConvPoolLayer(
			rng=rng,
			input=layer0.output,
			filter_shape=(128,64,3,3),
			image_shape=(None,64,13,13),
			poolsize=(2,2)
		)

		layer1Out = layer1.output.flatten(2)

		layer2 = HiddenLayer(
			rng=rng,
			input=layer1Out,
			n_in=128*5*5,
			n_out=512,
			activation=relu
		)

		layer3 = LogisticRegression(
			rng=rng,
			input=layer2.output,
			n_in=layer2.n_out,
			n_out=10
		)

		L1 = abs(layer0.W).sum() + abs(layer1.W).sum() + abs(layer2.W).sum() + abs(layer3.W).sum()
		L2 = (layer0.W**2).sum() + (layer1.W**2).sum() + (layer2.W**2).sum() + (layer3.W**2).sum()
		cost = layer3.negative_log_likelihood(y) + L1_reg * L1 + L2_reg * L2

		self.test_model = theano.function(
			[index],
			layer3.errors(y),
			givens={
				layer0Input: self.test_x[index * 1000:(index+1)*1000,:,:,:],
				y: self.test_y[index * 1000:(index+1)*1000]
			}
		)

		self.validate_model = theano.function(
			[index],
			[layer3.errors(y), cost],
			givens={
				layer0Input: self.valid_x[index * 1000:(index+1)*1000,:,:,:],
				y: self.valid_y[index * 1000:(index+1)*1000]
			}
		)
		
		self.forward = theano.function([layer0Input], [layer3.p_y_given_x])
		
		self.params = layer3.params + layer2.params + layer1.params + layer0.params
		updates = self.rmsProp(cost, self.params, 0.7, 0.01, learningRate)
		self.train_model = theano.function(
			[learningRate],
			cost,
			updates=updates
		)