def dlogpxmc_dw(self, w, x, n_batch): x = self.tiled_x(x, n_batch) x, z, _ = self.gen_xz(w, x, {}, n_batch=n_batch) x, z = self.xz_to_theano(x, z) A = self.get_A(x) r = self.f_dlogpxmc_dw(*ndict.orderedvals((w, x, z)) + [A]) return r[0], dict(list(zip(list(ndict.ordered(w).keys()), r[1:])))
def dlogpxmc_dw(self, w, x, n_batch): x = self.tiled_x(x, n_batch) x, z, _ = self.gen_xz(w, x, {}, n_batch=n_batch) x, z = self.xz_to_theano(x, z) A = self.get_A(x) r = self.f_dlogpxmc_dw(*ndict.orderedvals((w, x, z))+[A]) return r[0], dict(zip(ndict.ordered(w).keys(), r[1:]))
def gen_xz(self, w, x, z, n_batch=0): if not x.has_key('x'): raise Exception('Not implemented') if n_batch == 0: n_batch = x['x'].shape[1] A = np.ones((1, n_batch)) _z = {} if not x.has_key('y'): w = ndict.ordered(w) py = self.dist_px['y'](*([x['x']] + w.values() + [A])) _z['py'] = py x['y'] = np.zeros(py.shape) for i in range(n_batch): x['y'][:,i] = np.random.multinomial(n=1, pvals=py[:,i]) return x, z, _z
def gen_xz(self, w, x, z, n_batch=0): if 'x' not in x: raise Exception('Not implemented') if n_batch == 0: n_batch = x['x'].shape[1] A = np.ones((1, n_batch)) _z = {} if 'y' not in x: w = ndict.ordered(w) py = self.dist_px['y'](*([x['x']] + list(w.values()) + [A])) _z['py'] = py x['y'] = np.zeros(py.shape) for i in range(n_batch): x['y'][:,i] = np.random.multinomial(n=1, pvals=py[:,i]) return x, z, _z
def xz_to_theano(self, x, z): _x = {'x':np.dstack(ndict.ordered(x).values()).transpose((2,0,1))} _z = {'eps': np.dstack(ndict.ordered(z).values()).transpose((2,0,1))} return _x, _z
def dlogpw_dw(self, w): w = ndict.ordered(w) r = self.f_dlogpw_dw(*(list(w.values()))) return r[0], dict(list(zip(list(w.keys()), r[1:])))
def f_df(w, minibatch): u = w['u'] v = w['v'] w = w['w'] i_minibatch = minibatch[0] _x_l = minibatch[1] #labeled x_minibatch_l = {'x': np.random.normal(_x_l['mean'], np.exp(0.5*_x_l['logvar'])), 'y': _x_l['y']} eps_minibatch_l = model.gen_eps(n_batch_l) _x_u = minibatch[2] #unlabeled x_minibatch_u = {'x': np.random.normal(_x_u['mean'], np.exp(0.5*_x_u['logvar'])), 'y': _x_u['y']} eps_minibatch_u = [model.gen_eps(n_batch_u) for i in range(n_y)] # === Get gradient for labeled data # gradient of -KL(q(z|y,x) ~p(x,y) || p(x,y,z)) logpx, logpz, logqz, gv_labeled, gw_labeled = model.dL_dw(v, w, x_minibatch_l, eps_minibatch_l) # gradient of classification error E_{~p(x,y)}[q(y|x)] logqy, _, gu_labeled, _ = model_qy.dlogpxz_dwz(u, x_minibatch_l, {}) # Reweight gu_labeled and logqy #beta = alpha / (1.-alpha) * (1. * n_unlabeled / n_labeled) #old beta = alpha * (1. * n_tot / n_labeled) for i in u: gu_labeled[i] *= beta logqy *= beta L_labeled = logpx + logpz - logqz + logqy # === Get gradient for unlabeled data # -KL(q(z|x,y)q(y|x) ~p(x) || p(x,y,z)) # Approach where outer expectation (over q(z|x,y)) is taken as explicit sum (instead of sampling) u = ndict.ordered(u) py = model_qy.dist_px['y'](*([x_minibatch_u['x']] + u.values() + [np.ones((1, n_batch_u))])) if True: # Original _L = np.zeros((n_y, n_batch_u)) gv_unlabeled = {i: 0 for i in v} gw_unlabeled = {i: 0 for i in w} for label in range(n_y): new_y = np.zeros((n_y, n_batch_u)) new_y[label,:] = 1 eps = eps_minibatch_u[label] #logpx, logpz, logqz, _gv, _gw = model.dL_dw(v, w, {'x':x_minibatch['x'],'y':new_y}, eps) L_unweighted, L_weighted, _gv, _gw = model.dL_weighted_dw(v, w, {'x':x_minibatch_u['x'],'y':new_y}, eps, py[label:label+1,:]) _L[label:label+1,:] = L_unweighted for i in v: gv_unlabeled[i] += _gv[i] for i in w: gw_unlabeled[i] += _gw[i] else: # New, should be more efficient. (But is not in practice) _y = np.zeros((n_y, n_batch_u*n_y)) for label in range(n_y): _y[label,label*n_batch_u:(label+1)*n_batch_u] = 1 _x = np.tile(x_minibatch_u['x'].astype(np.float32), (1, n_y)) eps = model.gen_eps(n_batch_u*n_y) L_unweighted, L_weighted, gv_unlabeled, gw_unlabeled = model.dL_weighted_dw(v, w, {'x':_x,'y':_y}, eps, py.reshape((1, -1))) _L = L_unweighted.reshape((n_y, n_batch_u)) r = f_du(*([x_minibatch_u['x']] + u.values() + [np.zeros((1, n_batch_u)), _L])) L_unlabeled = r[0] gu_unlabeled = dict(zip(u.keys(), r[1:])) # Get gradient of prior logpu, gu_prior = model_qy.dlogpw_dw(u) logpv, logpw, gv_prior, gw_prior = model.dlogpw_dw(v, w) # Combine gradients and objective gu = {i: ((gu_labeled[i] + gu_unlabeled[i]) * n_minibatches + gu_prior[i])/(-n_tot) for i in u} gv = {i: ((gv_labeled[i] + gv_unlabeled[i]) * n_minibatches + gv_prior[i])/(-n_tot) for i in v} gw = {i: ((gw_labeled[i] + gw_unlabeled[i]) * n_minibatches + gw_prior[i])/(-n_tot) for i in w} f = ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches + logpu + logpv + logpw)/(-n_tot) L[0] += ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches + logpu + logpv + logpw)/(-n_tot) n_L[0] += 1 #ndict.pNorm(gu_unlabeled) return f, {'u': gu, 'v':gv, 'w':gw}
def dlogpw_dw(self, w): w = ndict.ordered(w) r = self.f_dlogpw_dw(*(w.values())) return r[0], dict(zip(w.keys(), r[1:]))
def f_df(w, minibatch): u = w['u'] v = w['v'] w = w['w'] i_minibatch = minibatch[0] _x_l = minibatch[1] #labeled x_minibatch_l = { 'x': np.random.normal(_x_l['mean'], np.exp(0.5 * _x_l['logvar'])), 'y': _x_l['y'] } eps_minibatch_l = model.gen_eps(n_batch_l) _x_u = minibatch[2] #unlabeled x_minibatch_u = { 'x': np.random.normal(_x_u['mean'], np.exp(0.5 * _x_u['logvar'])), 'y': _x_u['y'] } eps_minibatch_u = [model.gen_eps(n_batch_u) for i in range(n_y)] # === Get gradient for labeled data # gradient of -KL(q(z|y,x) ~p(x,y) || p(x,y,z)) logpx, logpz, logqz, gv_labeled, gw_labeled = model.dL_dw( v, w, x_minibatch_l, eps_minibatch_l) # gradient of classification error E_{~p(x,y)}[q(y|x)] logqy, _, gu_labeled, _ = model_qy.dlogpxz_dwz(u, x_minibatch_l, {}) # Reweight gu_labeled and logqy #beta = alpha / (1.-alpha) * (1. * n_unlabeled / n_labeled) #old beta = alpha * (1. * n_tot / n_labeled) for i in u: gu_labeled[i] *= beta logqy *= beta L_labeled = logpx + logpz - logqz + logqy # === Get gradient for unlabeled data # -KL(q(z|x,y)q(y|x) ~p(x) || p(x,y,z)) # Approach where outer expectation (over q(z|x,y)) is taken as explicit sum (instead of sampling) u = ndict.ordered(u) py = model_qy.dist_px['y'](*([x_minibatch_u['x']] + list(u.values()) + [np.ones((1, n_batch_u))])) if True: # Original _L = np.zeros((n_y, n_batch_u)) gv_unlabeled = {i: 0 for i in v} gw_unlabeled = {i: 0 for i in w} for label in range(n_y): new_y = np.zeros((n_y, n_batch_u)) new_y[label, :] = 1 eps = eps_minibatch_u[label] #logpx, logpz, logqz, _gv, _gw = model.dL_dw(v, w, {'x':x_minibatch['x'],'y':new_y}, eps) L_unweighted, L_weighted, _gv, _gw = model.dL_weighted_dw( v, w, { 'x': x_minibatch_u['x'], 'y': new_y }, eps, py[label:label + 1, :]) _L[label:label + 1, :] = L_unweighted for i in v: gv_unlabeled[i] += _gv[i] for i in w: gw_unlabeled[i] += _gw[i] else: # New, should be more efficient. (But is not in practice) _y = np.zeros((n_y, n_batch_u * n_y)) for label in range(n_y): _y[label, label * n_batch_u:(label + 1) * n_batch_u] = 1 _x = np.tile(x_minibatch_u['x'].astype(np.float32), (1, n_y)) eps = model.gen_eps(n_batch_u * n_y) L_unweighted, L_weighted, gv_unlabeled, gw_unlabeled = model.dL_weighted_dw( v, w, { 'x': _x, 'y': _y }, eps, py.reshape((1, -1))) _L = L_unweighted.reshape((n_y, n_batch_u)) r = f_du(*([x_minibatch_u['x']] + list(u.values()) + [np.zeros((1, n_batch_u)), _L])) L_unlabeled = r[0] gu_unlabeled = dict(list(zip(list(u.keys()), r[1:]))) # Get gradient of prior logpu, gu_prior = model_qy.dlogpw_dw(u) logpv, logpw, gv_prior, gw_prior = model.dlogpw_dw(v, w) # Combine gradients and objective gu = { i: ((gu_labeled[i] + gu_unlabeled[i]) * n_minibatches + gu_prior[i]) / (-n_tot) for i in u } gv = { i: ((gv_labeled[i] + gv_unlabeled[i]) * n_minibatches + gv_prior[i]) / (-n_tot) for i in v } gw = { i: ((gw_labeled[i] + gw_unlabeled[i]) * n_minibatches + gw_prior[i]) / (-n_tot) for i in w } f = ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches + logpu + logpv + logpw) / (-n_tot) L[0] += ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches + logpu + logpv + logpw) / (-n_tot) n_L[0] += 1 #ndict.pNorm(gu_unlabeled) return f, {'u': gu, 'v': gv, 'w': gw}
def xz_to_theano(self, x, z): _x = {'x': np.dstack(ndict.ordered(x).values()).transpose((2, 0, 1))} _z = {'eps': np.dstack(ndict.ordered(z).values()).transpose((2, 0, 1))} return _x, _z