def __init__(self, rbm, units_list, dimensions_list, W, name=None):
        super(AdvancedProdParameters, self).__init__(rbm, units_list, name=name)
        assert len(units_list) == 2
        self.var = W
        self.variables = [self.var]
        self.vu = units_list[0]
        self.hu = units_list[1]
        self.vd = dimensions_list[0]
        self.hd = dimensions_list[1]
        self.vard = self.vd + self.hd

        # there are vd visible dimensions and hd hidden dimensions, meaning that the weight matrix has
        # vd + hd = Wd dimensions.
        # the hiddens and visibles have hd+1 and vd+1 dimensions respectively, because the first dimension
        # is reserved for minibatches!
        self.terms[self.vu] = lambda vmap: tensordot(vmap[self.hu], W, axes=(range(1,self.hd+1),range(self.vd, self.vard)))
        self.terms[self.hu] = lambda vmap: tensordot(vmap[self.vu], W, axes=(range(1,self.vd+1),range(0, self.vd)))

        def gradient(vmap):
            v_indices = range(0, self.vd + 1) + (['x'] * self.hd)
            h_indices = [0] + (['x'] * self.vd) + range(1, self.hd + 1)
            v_reshaped = vmap[self.vu].dimshuffle(v_indices)
            h_reshaped = vmap[self.hu].dimshuffle(h_indices)
            return v_reshaped * h_reshaped

        self.energy_gradients[self.var] = gradient
        self.energy_gradient_sums[self.var] = lambda vmap: tensordot(vmap[self.vu], vmap[self.hu], axes=([0],[0]))
 def __init__(self, rbm, units_list, dimensions_list, W, name=None):
     super(AdvancedProdParameters, self).__init__(rbm, units_list, name=name)
     assert len(units_list) == 2
     self.var = W
     self.variables = [self.var]
     self.vu = units_list[0]
     self.hu = units_list[1]
     self.vd = dimensions_list[0]
     self.hd = dimensions_list[1]
     self.vard = self.vd + self.hd
     
     # there are vd visible dimensions and hd hidden dimensions, meaning that the weight matrix has
     # vd + hd = Wd dimensions.
     # the hiddens and visibles have hd+1 and vd+1 dimensions respectively, because the first dimension
     # is reserved for minibatches!
     self.terms[self.vu] = lambda vmap: tensordot(vmap[self.hu], W, axes=(range(1,self.hd+1),range(self.vd, self.vard)))
     self.terms[self.hu] = lambda vmap: tensordot(vmap[self.vu], W, axes=(range(1,self.vd+1),range(0, self.vd)))
     
     def gradient(vmap):
         v_indices = range(0, self.vd + 1) + (['x'] * self.hd)
         h_indices = [0] + (['x'] * self.vd) + range(1, self.hd + 1)
         v_reshaped = vmap[self.vu].dimshuffle(v_indices)
         h_reshaped = vmap[self.hu].dimshuffle(h_indices)
         return v_reshaped * h_reshaped
     
     self.energy_gradients[self.var] = gradient
     self.energy_gradient_sums[self.var] = lambda vmap: tensordot(vmap[self.vu], vmap[self.hu], axes=([0],[0]))
 def energy_term(self, vmap):
     # b_padded = T.shape_padright(self.var, self.sd)
     # return - T.sum(tensordot(vmap[self.u], b_padded, axes=(range(1, self.ud+1), range(0, self.ud))), axis=0)
     # this does not work because tensordot cannot handle broadcastable dimensions.
     # instead, the dimensions of b_padded which are broadcastable should be summed out afterwards.
     # this comes down to the same thing. so:
     t = tensordot(vmap[self.u], self.var, axes=(range(1, self.nd+1), range(0, self.nd)))
     # now sum t over its trailing shared dimensions, which mimics broadcast + tensordot behaviour.
     axes = range(t.ndim - self.sd, t.ndim)
     return - T.sum(t, axis=axes)
 def energy_term(self, vmap):
     # b_padded = T.shape_padright(self.var, self.sd)
     # return - T.sum(tensordot(vmap[self.u], b_padded, axes=(range(1, self.ud+1), range(0, self.ud))), axis=0)
     # this does not work because tensordot cannot handle broadcastable dimensions.
     # instead, the dimensions of b_padded which are broadcastable should be summed out afterwards.
     # this comes down to the same thing. so:
     t = tensordot(vmap[self.u], self.var, axes=(range(1, self.nd+1), range(0, self.nd)))
     # now sum t over its trailing shared dimensions, which mimics broadcast + tensordot behaviour.
     axes = range(t.ndim - self.sd, t.ndim)
     return - T.sum(t, axis=axes)
Beispiel #5
0
 def energy_term(self, vmap):
     # v_part = tensordot(vmap[self.vu], self.var, axes=(range(1, self.vd+1), range(0, self.vd)))
     v_part = self.terms[self.hu](vmap)
     neg_energy = tensordot(v_part,
                            vmap[self.hu],
                            axes=(range(1,
                                        self.hd + 1), range(1,
                                                            self.hd + 1)))
     # we do not sum over the first dimension, which is reserved for minibatches!
     return -neg_energy  # don't forget to flip the sign!
 def energy_term(self, vmap):
     # v_part = tensordot(vmap[self.vu], self.var, axes=(range(1, self.vd+1), range(0, self.vd)))
     v_part = self.terms[self.hu](vmap)
     neg_energy = tensordot(v_part, vmap[self.hu], axes=(range(1, self.hd+1), range(1, self.hd+1)))
     # we do not sum over the first dimension, which is reserved for minibatches!
     return - neg_energy # don't forget to flip the sign!
 def term_u2(vmap):
     p = tensordot(vmap[self.u0], W, axes=([1],[0])) # (mb, u1, u2)
     return T.sum(p * vmap[self.u1].dimshuffle(0, 1, 'x'), axis=1) # (mb, u2)
 def term_u1(vmap):
     p = tensordot(vmap[self.u0], W, axes=([1],[0])) # (mb, u1, u2)
     return T.sum(p * vmap[self.u2].dimshuffle(0, 'x', 1), axis=2) # (mb, u1)
 def energy_term(self, vmap):
     return - tensordot(vmap[self.u], self.var, axes=(range(1, self.ud+1), range(0, self.ud)))
 def energy_term(self, vmap):
     return - tensordot(vmap[self.u], self.var, axes=(range(1, self.ud+1), range(0, self.ud)))
Beispiel #11
0
 def term_u2(vmap):
     p = tensordot(vmap[self.u0], W, axes=([1], [0]))  # (mb, u1, u2)
     return T.sum(p * vmap[self.u1].dimshuffle(0, 1, 'x'),
                  axis=1)  # (mb, u2)
Beispiel #12
0
 def term_u1(vmap):
     p = tensordot(vmap[self.u0], W, axes=([1], [0]))  # (mb, u1, u2)
     return T.sum(p * vmap[self.u2].dimshuffle(0, 'x', 1),
                  axis=2)  # (mb, u1)