Python softmax_nd Examples, discrete_skip_gram.tensor_util.softmax_nd Python Examples

Example #1

0

Show file

File: tree_parameterization.py Project: bstriner/discrete-skip-gram

    def __init__(self, x_k, z_depth, z_k, scale=1e-1, **kwargs):
        super(ParameterizationFull, self).__init__(x_k=x_k,
                                                   z_depth=z_depth,
                                                   z_k=z_k,
                                                   **kwargs)
        for depth in range(z_depth):
            buckets = int(z_k**depth)
            initial_weight = np.random.uniform(
                -scale, scale, (x_k, buckets, z_k)).astype(self.type_np)
            pz_weight = theano.shared(
                initial_weight,
                name="pz_{}".format(depth))  # (x_k, buckets, z_k)
            self.params.append(pz_weight)

        # calculate p(z|x)
        p0 = T.reshape(softmax_nd(self.params[0]), (x_k, z_k))  # (x_k, z_k)
        pzs = [p0]
        for depth in range(1, z_depth):
            p = softmax_nd(self.params[depth])  # (x_k, b0, z_k)
            h = (p0.dimshuffle((0, 1, 'x'))) * p  # (x_k, b0, z_k)
            p1 = T.reshape(h,
                           (h.shape[0], h.shape[1] * h.shape[2]))  # (x_k, b1)
            pzs.append(p1)
            p0 = p1
        self.pzs = pzs
        self.encoding = self.calc_encoding()

Example #2

0

Show file

File: tree_parameterization.py Project: bstriner/discrete-skip-gram

    def __init__(self, x_k, z_depth, z_k, scale=1e-1, weight=1e2, **kwargs):
        super(ParameterizationReg, self).__init__(x_k=x_k,
                                                  z_depth=z_depth,
                                                  z_k=z_k,
                                                  **kwargs)
        for depth in range(z_depth):
            buckets = int(z_k**(depth + 1))
            initial_weight = np.random.uniform(
                -scale, scale, (x_k, buckets)).astype(self.type_np)
            pz_weight = theano.shared(
                initial_weight,
                name="pz_{}".format(depth))  # (x_k, buckets, z_k)
            self.params.append(pz_weight)

        # calculate p(z|x)
        pzs = []
        for depth in range(0, z_depth):
            p = softmax_nd(self.params[depth])  # (x_k, b0)
            pzs.append(p)

        self.pzs = pzs
        weight = T.constant(weight)
        # loss
        self.loss = 0.
        for i0 in range(0, z_depth - 1):
            p0 = pzs[i0]  # (x_k, b0)
            for i1 in range(i0, z_depth):
                p1 = pzs[i1]
                p1r = T.reshape(p1, (x_k, p0.shape[1], -1))  # (x_k, b0, -1)
                p1s = T.sum(p1r, axis=2)  # (x_k, b0)
                l2 = T.sum(T.square(p1s - p0), axis=None)
                self.loss += l2 * weight
        self.encoding = self.calc_encoding()

Example #3

0

Show file

File: tree_parameterization.py Project: bstriner/discrete-skip-gram

    def __init__(self, x_k, z_depth, z_k, scale=1e-1, weight=1e2, **kwargs):
        super(ParameterizationBU, self).__init__(x_k=x_k,
                                                 z_depth=z_depth,
                                                 z_k=z_k,
                                                 **kwargs)

        # probability of bottom bucket
        buckets = int(z_k**z_depth)
        initial_weight = np.random.uniform(-scale, scale,
                                           (x_k, buckets)).astype(self.type_np)
        pz_weight = theano.shared(initial_weight,
                                  name="pz_weight")  # (x_k, z_k)
        self.params.append(pz_weight)
        pz = softmax_nd(pz_weight)  # (x_k, z_k)

        # probability of combining buckets
        pcs = []
        for depth in range(0, z_depth - 1):
            d0 = z_depth - depth
            d1 = d0 - 1
            b0 = int(z_k**d0)
            b1 = int(z_k**d1)
            initial_weight = np.random.uniform(-scale, scale,
                                               (b0, b1)).astype(self.type_np)
            pc_weight = theano.shared(initial_weight,
                                      name="pc_weight_{}_{}".format(
                                          d0, d1))  # (b0, b1)
            self.params.append(pc_weight)
            pc = softmax_nd(pc_weight)  # (b0, b1)
            pcs.append(pc)

        # calculate p(z|x)
        pzs = [pz]
        p0 = pz  # (x, b0)
        for depth in range(0, z_depth - 1):
            pc = pcs[depth]  # (b0, b1)
            p1 = T.dot(p0, pc)  # (x, b1)
            pzs.append(p1)
            p0 = p1
        pzs.reverse()
        self.pzs = pzs
        self.encoding = None

Example #4

0

Show file

File: discrete_full.py Project: bstriner/discrete-skip-gram

 def calc_depth(self, pz, py_weight, cond_pt):
     # pz: (n, buckets, z_k)
     # py_weight: (buckets, z_k, x_k)
     # cond_pt: (n, x_k)
     py = softmax_nd(py_weight)  # (buckets, z_k, x_k)
     eps = 1e-9
     nll = -T.log(eps + py)  # (buckets, z_k, x_k)
     loss1 = (cond_pt.dimshuffle((0, 'x', 'x', 1))) * (nll.dimshuffle(('x', 0, 1, 2)))  # (n, buckets, z_k, x_k)
     loss2 = T.sum(loss1, axis=3)  # (n, buckets, z_k)
     loss3 = T.sum(loss2 * pz, axis=[1, 2])  # (n,)
     assert loss3.ndim == 1
     return loss3

Example #5

0

Show file

File: tree_parameterization.py Project: bstriner/discrete-skip-gram

 def __init__(self, x_k, z_depth, z_k, scale=1e-1, weight=1e2, **kwargs):
     super(ParameterizationSum, self).__init__(x_k=x_k,
                                               z_depth=z_depth,
                                               z_k=z_k,
                                               **kwargs)
     buckets = int(z_k**z_depth)
     initial_weight = np.random.uniform(-scale, scale,
                                        (x_k, buckets)).astype(self.type_np)
     pz_weight = theano.shared(initial_weight,
                               name="pz_weight")  # (x_k, z_k)
     self.params.append(pz_weight)
     pz = softmax_nd(pz_weight)  # (x_k, z_k)
     # calculate p(z|x)
     pzs = []
     for depth in range(0, z_depth - 1):
         b0 = int(z_k**(depth + 1))
         h = T.reshape(pz, (x_k, b0, -1))  # (x_k, b0, -1)
         pzt = T.sum(h, axis=2)
         pzs.append(pzt)
     pzs.append(pz)
     self.pzs = pzs
     self.encoding = self.calc_encoding()

Example #6

0

Show file

    def __init__(self,
                 cooccurrence,
                 z_depth,
                 z_k,
                 opt,
                 schedule,
                 pz_regularizer=None,
                 pz_weight_regularizer=None,
                 eps=1e-9,
                 scale=1e-2):
        cooccurrence = cooccurrence.astype(np.float32)
        cooccurrence = cooccurrence / np.sum(cooccurrence, axis=None)
        co = theano.shared(cooccurrence, name='cooccurrence')
        #co = T.constant(cooccurrence, dtype='float32', name='cooccurrence')
        self.cooccurrence = cooccurrence
        self.z_depth = z_depth
        self.z_k = z_k
        self.x_k = cooccurrence.shape[0]
        self.opt = opt
        self.schedule = schedule
        self.pz_regularizer = pz_regularizer
        self.pz_weight_regularizer = pz_weight_regularizer
        assert schedule.shape[0] == z_depth
        assert schedule.ndim == 1
        x_k = cooccurrence.shape[0]
        schedule = T.constant(schedule.astype(np.float32),
                              dtype='float32',
                              name="schedule")  # (z_depth,)

        # parameterization
        buckets = int(z_k**z_depth)
        initial_weight = np.random.uniform(-scale, scale,
                                           (x_k, buckets)).astype(np.float32)
        pz_weight = theano.shared(initial_weight,
                                  name="pz_weight")  # (x_k, z_k)
        self.params = [pz_weight]
        pz0 = softmax_nd(pz_weight)  # (x_k, z_k)
        # calculate p(z|x)
        pzs = []
        for depth in range(0, z_depth - 1):
            b0 = int(z_k**(depth + 1))
            h = T.reshape(pz0, (x_k, b0, -1))  # (x_k, b0, -1)
            pzt = T.sum(h, axis=2)  # (x_k, b0)
            pzs.append(pzt)
        pzs.append(pz0)
        self.pzs = pzs

        # calculate nlls
        nll_array = []
        for depth in range(z_depth):
            pz = pzs[depth]  # (x_k, b0)
            p = T.dot(co, pz)  # (x_k, b0)

            marg = T.sum(p, axis=0, keepdims=True)  # (1, b0)
            cond = p / (marg + eps)  # (x_k, b0)
            nll = T.sum(p * -T.log(cond + eps), axis=None)  # scalar
            nll_array.append(nll)
        nlls = T.stack(nll_array)  # (z_depth,)
        loss = T.sum(schedule * nlls, axis=0)  # scalar

        # regularization
        reg_loss = T.constant(0.)
        if pz_weight_regularizer:
            reg_loss += pz_weight_regularizer(pz_weight)
        if pz_regularizer:
            pz_loss = []
            for pz in pzs:
                pz_loss.append(pz_regularizer(pz))
            reg_loss += T.sum(T.stack(pz_loss) * schedule)

        # training
        loss += reg_loss
        updates = opt.get_updates(self.params, {}, loss)

        # encoding
        z = T.argmax(pz0, axis=1)  # (x_k,) [int 0-buckets]
        zt = z
        encodings = []
        for depth in range(z_depth):
            c = int(z_k**(z_depth - depth - 1))
            enc = T.ge(zt, c)
            zt -= (c * enc)
            encodings.append(enc)
        encodings = T.stack(encodings, axis=1)  # (x_k, z_depth)

        # Theano functions
        self.train_fun = theano.function([], [nlls, reg_loss, loss],
                                         updates=updates)
        self.val_fun = theano.function([], [nlls, reg_loss, loss])
        self.encodings_fun = theano.function([], encodings)
        self.z_fun = theano.function([], z)

        self.weights = self.params + opt.weights

Example #7

0

Show file

File: discrete_full.py Project: bstriner/discrete-skip-gram

    def __init__(self, cooccurrence, z_depth, z_k, opt,
                 schedule,
                 type_np=np.float32,
                 type_t='float32',
                 regularizer=None):
        cooccurrence = cooccurrence.astype(type_np)
        self.cooccurrence = cooccurrence
        self.type_np = type_np
        self.type_t = type_t
        self.z_depth = z_depth
        scale = 1e-2
        x_k = cooccurrence.shape[0]
        schedule = T.constant(schedule.astype(type_np), dtype=type_t, name="schedule")  # (z_depth,)

        # marginal probability
        n = np.sum(cooccurrence, axis=None)
        _margin = np.sum(cooccurrence, axis=1) / n  # (x_k,)
        marg_p = T.constant(_margin, dtype=type_t)
        log_marg_p = T.constant(np.log(_margin)-np.max(np.log(_margin)), dtype=type_t) # (x_k,)

        # conditional probability
        _cond_p = cooccurrence / np.sum(cooccurrence, axis=1, keepdims=True)
        cond_p = T.constant(_cond_p, dtype=type_t)  # (x_k,)

        # parameters
        # p(z|x) weights
        pz_weights = []
        for depth in range(z_depth):
            buckets = int(z_k ** depth)
            initial_weight = np.random.uniform(-scale, scale, (x_k, buckets, z_k)).astype(type_np)
            pz_weight = theano.shared(initial_weight, name="pz_{}".format(depth))  # (x_k, buckets, z_k)
            pz_weights.append(pz_weight)

        # p(y|z) weights
        py_weights = []
        for depth in range(z_depth):
            buckets = int(z_k ** depth)
            initial_py = np.random.uniform(-scale, scale, (buckets, z_k, x_k)).astype(type_np)  # (buckets, z_k, x_k)
            py_weight = theano.shared(initial_py, name='py_{}'.format(depth))  # (buckets, z_k, x_k)
            py_weights.append(py_weight)
        params = pz_weights + py_weights

        # indices
        idx = T.ivector()  # (n,)
        n = idx.shape[0]

        # calculate p(z|x)
        p0 = T.ones((n, 1, 1), dtype=type_t)  # (n, b0, z_k)
        pzs = []
        for depth in range(z_depth):
            p = softmax_nd(pz_weights[depth][idx, :, :])  # (n, b1, z_k)
            h = T.reshape(p0, (p0.shape[0], p0.shape[1] * p0.shape[2]))  # (n, b1)
            p1 = (h.dimshuffle((0, 1, 'x'))) * p  # (n, b1, z_k)
            p0 = p1
            pzs.append(p1)

        # loss calculation
        cond_pt = cond_p[idx, :]  # (n, x_k)
        marg_pt = marg_p[idx]  # (n,)
        nlls = []
        for depth in range(z_depth):
            nll = self.calc_depth(pzs[depth], py_weights[depth]+log_marg_p, cond_pt)  # (n,)
            nlls.append(nll)
        nlls = T.stack(nlls, axis=1)  # (n, z_depth)
        wnlls = T.sum(nlls * (marg_pt.dimshuffle((0, 'x'))), axis=0)  # (z_depth,)
        loss = T.sum(schedule * wnlls, axis=0)  # scalar
        reg_loss = 0.
        if regularizer:
            for p in params:
                reg_loss += regularizer(p)
            reg_loss *= T.sum(marg_pt) # scale to size of batch
            loss += reg_loss
        updates = opt.get_updates(params, {}, loss)
        train = theano.function([idx], [wnlls, reg_loss, loss], updates=updates)

        # Discrete encoding
        e0 = T.zeros((x_k,), dtype='int32')  # (x_k,)
        encs = []
        for depth in range(z_depth):
            p = softmax_nd(pz_weights[depth])  # (x_k, buckets, z_k)
            enc = T.argmax(p[T.arange(p.shape[0]), e0, :], axis=1)  # (x_k,) [int 0-z_k]
            assert enc.ndim == 1
            e1 = (e0 * z_k) + enc  # (x_k,) [int 0-b1] todo: double-check order
            e0 = e1
            encs.append(enc)
        encoding = T.stack(encs, axis=1)  # (x_k, z_depth)
        encodings = theano.function([], encoding)
        self.train_fun = train
        self.encodings_fun = encodings
        self.all_weights = params + opt.weights