def __init__(self, x_k, z_depth, z_k, scale=1e-1, **kwargs): super(ParameterizationFull, self).__init__(x_k=x_k, z_depth=z_depth, z_k=z_k, **kwargs) for depth in range(z_depth): buckets = int(z_k**depth) initial_weight = np.random.uniform( -scale, scale, (x_k, buckets, z_k)).astype(self.type_np) pz_weight = theano.shared( initial_weight, name="pz_{}".format(depth)) # (x_k, buckets, z_k) self.params.append(pz_weight) # calculate p(z|x) p0 = T.reshape(softmax_nd(self.params[0]), (x_k, z_k)) # (x_k, z_k) pzs = [p0] for depth in range(1, z_depth): p = softmax_nd(self.params[depth]) # (x_k, b0, z_k) h = (p0.dimshuffle((0, 1, 'x'))) * p # (x_k, b0, z_k) p1 = T.reshape(h, (h.shape[0], h.shape[1] * h.shape[2])) # (x_k, b1) pzs.append(p1) p0 = p1 self.pzs = pzs self.encoding = self.calc_encoding()
def __init__(self, x_k, z_depth, z_k, scale=1e-1, weight=1e2, **kwargs): super(ParameterizationReg, self).__init__(x_k=x_k, z_depth=z_depth, z_k=z_k, **kwargs) for depth in range(z_depth): buckets = int(z_k**(depth + 1)) initial_weight = np.random.uniform( -scale, scale, (x_k, buckets)).astype(self.type_np) pz_weight = theano.shared( initial_weight, name="pz_{}".format(depth)) # (x_k, buckets, z_k) self.params.append(pz_weight) # calculate p(z|x) pzs = [] for depth in range(0, z_depth): p = softmax_nd(self.params[depth]) # (x_k, b0) pzs.append(p) self.pzs = pzs weight = T.constant(weight) # loss self.loss = 0. for i0 in range(0, z_depth - 1): p0 = pzs[i0] # (x_k, b0) for i1 in range(i0, z_depth): p1 = pzs[i1] p1r = T.reshape(p1, (x_k, p0.shape[1], -1)) # (x_k, b0, -1) p1s = T.sum(p1r, axis=2) # (x_k, b0) l2 = T.sum(T.square(p1s - p0), axis=None) self.loss += l2 * weight self.encoding = self.calc_encoding()
def __init__(self, x_k, z_depth, z_k, scale=1e-1, weight=1e2, **kwargs): super(ParameterizationBU, self).__init__(x_k=x_k, z_depth=z_depth, z_k=z_k, **kwargs) # probability of bottom bucket buckets = int(z_k**z_depth) initial_weight = np.random.uniform(-scale, scale, (x_k, buckets)).astype(self.type_np) pz_weight = theano.shared(initial_weight, name="pz_weight") # (x_k, z_k) self.params.append(pz_weight) pz = softmax_nd(pz_weight) # (x_k, z_k) # probability of combining buckets pcs = [] for depth in range(0, z_depth - 1): d0 = z_depth - depth d1 = d0 - 1 b0 = int(z_k**d0) b1 = int(z_k**d1) initial_weight = np.random.uniform(-scale, scale, (b0, b1)).astype(self.type_np) pc_weight = theano.shared(initial_weight, name="pc_weight_{}_{}".format( d0, d1)) # (b0, b1) self.params.append(pc_weight) pc = softmax_nd(pc_weight) # (b0, b1) pcs.append(pc) # calculate p(z|x) pzs = [pz] p0 = pz # (x, b0) for depth in range(0, z_depth - 1): pc = pcs[depth] # (b0, b1) p1 = T.dot(p0, pc) # (x, b1) pzs.append(p1) p0 = p1 pzs.reverse() self.pzs = pzs self.encoding = None
def calc_depth(self, pz, py_weight, cond_pt): # pz: (n, buckets, z_k) # py_weight: (buckets, z_k, x_k) # cond_pt: (n, x_k) py = softmax_nd(py_weight) # (buckets, z_k, x_k) eps = 1e-9 nll = -T.log(eps + py) # (buckets, z_k, x_k) loss1 = (cond_pt.dimshuffle((0, 'x', 'x', 1))) * (nll.dimshuffle(('x', 0, 1, 2))) # (n, buckets, z_k, x_k) loss2 = T.sum(loss1, axis=3) # (n, buckets, z_k) loss3 = T.sum(loss2 * pz, axis=[1, 2]) # (n,) assert loss3.ndim == 1 return loss3
def __init__(self, x_k, z_depth, z_k, scale=1e-1, weight=1e2, **kwargs): super(ParameterizationSum, self).__init__(x_k=x_k, z_depth=z_depth, z_k=z_k, **kwargs) buckets = int(z_k**z_depth) initial_weight = np.random.uniform(-scale, scale, (x_k, buckets)).astype(self.type_np) pz_weight = theano.shared(initial_weight, name="pz_weight") # (x_k, z_k) self.params.append(pz_weight) pz = softmax_nd(pz_weight) # (x_k, z_k) # calculate p(z|x) pzs = [] for depth in range(0, z_depth - 1): b0 = int(z_k**(depth + 1)) h = T.reshape(pz, (x_k, b0, -1)) # (x_k, b0, -1) pzt = T.sum(h, axis=2) pzs.append(pzt) pzs.append(pz) self.pzs = pzs self.encoding = self.calc_encoding()
def __init__(self, cooccurrence, z_depth, z_k, opt, schedule, pz_regularizer=None, pz_weight_regularizer=None, eps=1e-9, scale=1e-2): cooccurrence = cooccurrence.astype(np.float32) cooccurrence = cooccurrence / np.sum(cooccurrence, axis=None) co = theano.shared(cooccurrence, name='cooccurrence') #co = T.constant(cooccurrence, dtype='float32', name='cooccurrence') self.cooccurrence = cooccurrence self.z_depth = z_depth self.z_k = z_k self.x_k = cooccurrence.shape[0] self.opt = opt self.schedule = schedule self.pz_regularizer = pz_regularizer self.pz_weight_regularizer = pz_weight_regularizer assert schedule.shape[0] == z_depth assert schedule.ndim == 1 x_k = cooccurrence.shape[0] schedule = T.constant(schedule.astype(np.float32), dtype='float32', name="schedule") # (z_depth,) # parameterization buckets = int(z_k**z_depth) initial_weight = np.random.uniform(-scale, scale, (x_k, buckets)).astype(np.float32) pz_weight = theano.shared(initial_weight, name="pz_weight") # (x_k, z_k) self.params = [pz_weight] pz0 = softmax_nd(pz_weight) # (x_k, z_k) # calculate p(z|x) pzs = [] for depth in range(0, z_depth - 1): b0 = int(z_k**(depth + 1)) h = T.reshape(pz0, (x_k, b0, -1)) # (x_k, b0, -1) pzt = T.sum(h, axis=2) # (x_k, b0) pzs.append(pzt) pzs.append(pz0) self.pzs = pzs # calculate nlls nll_array = [] for depth in range(z_depth): pz = pzs[depth] # (x_k, b0) p = T.dot(co, pz) # (x_k, b0) marg = T.sum(p, axis=0, keepdims=True) # (1, b0) cond = p / (marg + eps) # (x_k, b0) nll = T.sum(p * -T.log(cond + eps), axis=None) # scalar nll_array.append(nll) nlls = T.stack(nll_array) # (z_depth,) loss = T.sum(schedule * nlls, axis=0) # scalar # regularization reg_loss = T.constant(0.) if pz_weight_regularizer: reg_loss += pz_weight_regularizer(pz_weight) if pz_regularizer: pz_loss = [] for pz in pzs: pz_loss.append(pz_regularizer(pz)) reg_loss += T.sum(T.stack(pz_loss) * schedule) # training loss += reg_loss updates = opt.get_updates(self.params, {}, loss) # encoding z = T.argmax(pz0, axis=1) # (x_k,) [int 0-buckets] zt = z encodings = [] for depth in range(z_depth): c = int(z_k**(z_depth - depth - 1)) enc = T.ge(zt, c) zt -= (c * enc) encodings.append(enc) encodings = T.stack(encodings, axis=1) # (x_k, z_depth) # Theano functions self.train_fun = theano.function([], [nlls, reg_loss, loss], updates=updates) self.val_fun = theano.function([], [nlls, reg_loss, loss]) self.encodings_fun = theano.function([], encodings) self.z_fun = theano.function([], z) self.weights = self.params + opt.weights
def __init__(self, cooccurrence, z_depth, z_k, opt, schedule, type_np=np.float32, type_t='float32', regularizer=None): cooccurrence = cooccurrence.astype(type_np) self.cooccurrence = cooccurrence self.type_np = type_np self.type_t = type_t self.z_depth = z_depth scale = 1e-2 x_k = cooccurrence.shape[0] schedule = T.constant(schedule.astype(type_np), dtype=type_t, name="schedule") # (z_depth,) # marginal probability n = np.sum(cooccurrence, axis=None) _margin = np.sum(cooccurrence, axis=1) / n # (x_k,) marg_p = T.constant(_margin, dtype=type_t) log_marg_p = T.constant(np.log(_margin)-np.max(np.log(_margin)), dtype=type_t) # (x_k,) # conditional probability _cond_p = cooccurrence / np.sum(cooccurrence, axis=1, keepdims=True) cond_p = T.constant(_cond_p, dtype=type_t) # (x_k,) # parameters # p(z|x) weights pz_weights = [] for depth in range(z_depth): buckets = int(z_k ** depth) initial_weight = np.random.uniform(-scale, scale, (x_k, buckets, z_k)).astype(type_np) pz_weight = theano.shared(initial_weight, name="pz_{}".format(depth)) # (x_k, buckets, z_k) pz_weights.append(pz_weight) # p(y|z) weights py_weights = [] for depth in range(z_depth): buckets = int(z_k ** depth) initial_py = np.random.uniform(-scale, scale, (buckets, z_k, x_k)).astype(type_np) # (buckets, z_k, x_k) py_weight = theano.shared(initial_py, name='py_{}'.format(depth)) # (buckets, z_k, x_k) py_weights.append(py_weight) params = pz_weights + py_weights # indices idx = T.ivector() # (n,) n = idx.shape[0] # calculate p(z|x) p0 = T.ones((n, 1, 1), dtype=type_t) # (n, b0, z_k) pzs = [] for depth in range(z_depth): p = softmax_nd(pz_weights[depth][idx, :, :]) # (n, b1, z_k) h = T.reshape(p0, (p0.shape[0], p0.shape[1] * p0.shape[2])) # (n, b1) p1 = (h.dimshuffle((0, 1, 'x'))) * p # (n, b1, z_k) p0 = p1 pzs.append(p1) # loss calculation cond_pt = cond_p[idx, :] # (n, x_k) marg_pt = marg_p[idx] # (n,) nlls = [] for depth in range(z_depth): nll = self.calc_depth(pzs[depth], py_weights[depth]+log_marg_p, cond_pt) # (n,) nlls.append(nll) nlls = T.stack(nlls, axis=1) # (n, z_depth) wnlls = T.sum(nlls * (marg_pt.dimshuffle((0, 'x'))), axis=0) # (z_depth,) loss = T.sum(schedule * wnlls, axis=0) # scalar reg_loss = 0. if regularizer: for p in params: reg_loss += regularizer(p) reg_loss *= T.sum(marg_pt) # scale to size of batch loss += reg_loss updates = opt.get_updates(params, {}, loss) train = theano.function([idx], [wnlls, reg_loss, loss], updates=updates) # Discrete encoding e0 = T.zeros((x_k,), dtype='int32') # (x_k,) encs = [] for depth in range(z_depth): p = softmax_nd(pz_weights[depth]) # (x_k, buckets, z_k) enc = T.argmax(p[T.arange(p.shape[0]), e0, :], axis=1) # (x_k,) [int 0-z_k] assert enc.ndim == 1 e1 = (e0 * z_k) + enc # (x_k,) [int 0-b1] todo: double-check order e0 = e1 encs.append(enc) encoding = T.stack(encs, axis=1) # (x_k, z_depth) encodings = theano.function([], encoding) self.train_fun = train self.encodings_fun = encodings self.all_weights = params + opt.weights