def subgrad(self, i, w, z): @interactive def dlogpxz_dwz(w, z, k): _x = ndict.getCols(my_x, k*my_n_batch, (k+1)*my_n_batch).copy() #@UndefinedVariable if z == None: logpx, gw = my_model.dlogpxmc_dw(w, _x) #@UndefinedVariable return logpx, None, gw, None else: return my_model.dlogpxz_dwz(w, _x, z) #@UndefinedVariable tasks = [] for j in range(len(self.c)): _z = z if _z != None: _z = ndict.getCols(z, j*self.n_batch, (j+1)*self.n_batch) tasks.append(self.c.load_balanced_view().apply_async(dlogpxz_dwz, w, _z, i)) res = [task.get() for task in tasks] v, gw, gz = res[0] for k in range(1,len(self.c)): vi, gwi, gzi = res[k] v += vi for j in gw: gw[j] += gwi[j] for j in gz: gz[j] += gzi[j] return v, gw, gz
def ll(w, z, k): _x = ndict.getCols(my_x, k * my_n_batch, (k + 1) * my_n_batch) # @UndefinedVariable if z is None: return my_model.logpxmc(w, _x), None # @UndefinedVariable else: return my_model.logpxz(w, _x, z) # @UndefinedVariable
def doEpoch(): from collections import OrderedDict n_tot = x['x'].shape[1] idx_from = 0 L = 0 scores = [] while idx_from < n_tot: idx_to = min(n_tot, idx_from+n_batch) x_minibatch = ndict.getCols(x, idx_from, idx_to) idx_from += n_batch if byteToFloat: x_minibatch['x'] = x_minibatch['x'].astype(np.float32)/256. if bernoulli_x: x_minibatch['x'] = np.random.binomial(n=1, p=x_minibatch['x']).astype(np.float32) # Do gradient ascent step L += model.evalAndUpdate(x_minibatch, {}).sum() #model.profmode.print_summary() L /= n_tot return L
def dlogpxz_dwz(w, z, k): _x = ndict.getCols(my_x, k*my_n_batch, (k+1)*my_n_batch).copy() #@UndefinedVariable if z == None: logpx, gw = my_model.dlogpxmc_dw(w, _x) #@UndefinedVariable return logpx, None, gw, None else: return my_model.dlogpxz_dwz(w, _x, z) #@UndefinedVariable
def infer(data, n_batch=1000): size = data['x'].shape[1] #res = np.zeros((sum(n_hidden), size)) #res3 = np.zeros((n_z,size)) #res1 = np.zeros((n_z,size)) predy = [] for i in range(0, size, n_batch): idx_to = min(size, i+n_batch) x_batch = ndict.getCols(data, i, idx_to) # may have bugs nn_batch = idx_to - i _x, _z, _z_confab = model.gen_xz(x_batch, {}, nn_batch) x_samples = _z_confab['x'] predy += list(_z_confab['predy']) #for (hi, hidden) in enumerate(_z_confab['hidden']): # res[sum(n_hidden[:hi]):sum(n_hidden[:hi+1]),i:i+nn_batch] = hidden #res3[:,i:i+nn_batch] = _z_confab['logvar'] #res1[:,i:i+nn_batch] = _z_confab['mean'] stats = dict() #if epoch == -1: # print 'features: ', res.shape return predy #(res, predy, _z, res1, res3)
def doEpoch(): from collections import OrderedDict n_tot = next(iter(x.values())).shape[1] idx_from = 0 L = 0 while idx_from < n_tot: idx_to = min(n_tot, idx_from + n_batch) x_minibatch = ndict.getCols(x, idx_from, idx_to) idx_from += n_batch if byteToFloat: x_minibatch['x'] = x_minibatch['x'].astype(np.float32) / 256. if bernoulli_x: x_minibatch['x'] = np.random.binomial( n=1, p=x_minibatch['x']).astype(np.float32) # Get gradient #raise Exception() L += model.evalAndUpdate(x_minibatch, {}).sum() #model.profmode.print_summary() L /= n_tot return L
def est_loglik(self, x, n_batch, n_samples=1, byteToFloat=False): n_tot = next(iter(x.values())).shape[1] px = 0 # estimate of marginal likelihood lowbound = 0 # estimate of lower bound of marginal likelihood for _ in range(n_samples): _L = np.zeros((1, 0)) i = 0 # iterate over the examples in x while i < n_tot: i_to = min(n_tot, i + n_batch) # Get examples from i to i_to _x = ndict.getCols(x, i, i_to) if byteToFloat: _x = {i: _x[i].astype(np.float32) / 256. for i in _x} # Stack arrays in sequence horizontally, basically adds the evaluation of _x _L = np.hstack((_L, self.eval(_x, {}))) i += n_batch lowbound += _L.mean() px += np.exp(_L) lowbound /= n_samples logpx = np.log(px / n_samples).mean() return lowbound, logpx
def subval(self, i, w, z): raise Exception("TODO") # Replaced my_model.nbatch with my_n_batch, this is UNTESTED @interactive def ll(w, z, k): _x = ndict.getCols(my_x, k * my_n_batch, (k + 1) * my_n_batch) # @UndefinedVariable if z is None: return my_model.logpxmc(w, _x), None # @UndefinedVariable else: return my_model.logpxz(w, _x, z) # @UndefinedVariable tasks = [] for j in range(len(self.c)): _z = z if _z is not None: _z = ndict.getCols(z, j * self.n_batch, (j + 1) * self.n_batch) tasks.append(self.c.load_balanced_view().apply_async(ll, w, _z, i)) res = [task.get() for task in tasks] raise Exception("TODO: implementation with uncoupled logpx and logpz") return sum(res)
def est_loglik(self, x, n_batch, n_samples=1, byteToFloat=False): n_tot = x.itervalues().next().shape[1] px = 0 # estimate of marginal likelihood lowbound = 0 # estimate of lower bound of marginal likelihood for _ in range(n_samples): _L = np.zeros((1,0)) i = 0 while i < n_tot: i_to = min(n_tot, i+n_batch) _x = ndict.getCols(x, i, i_to) if byteToFloat: _x = {i:_x[i].astype(np.float32)/256. for i in _x} _L = np.hstack((_L, self.eval(_x, {}))) i += n_batch lowbound += _L.mean() px += np.exp(_L) lowbound /= n_samples logpx = np.log(px / n_samples).mean() return lowbound, logpx
def est_loglik(self, x, n_batch, n_samples=1, byteToFloat=False): n_tot = iter(x.values()).next().shape[1] px = 0 # estimate of marginal likelihood lowbound = 0 # estimate of lower bound of marginal likelihood for _ in range(n_samples): _L = np.zeros((1, 0)) i = 0 while i < n_tot: i_to = min(n_tot, i + n_batch) _x = ndict.getCols(x, i, i_to) if byteToFloat: _x = {i: _x[i].astype(np.float32) / 256. for i in _x} _L = np.hstack((_L, self.eval(_x, {}))) i += n_batch lowbound += _L.mean() px += np.exp(_L) lowbound /= n_samples logpx = np.log(px / n_samples).mean() return lowbound, logpx
def test(self, x, n_batch, n_samples=1, byteToFloat=False): n_tot = x.itervalues().next().shape[1] px = 0 # estimate of marginal likelihood lowbound = 0 # estimate of lower bound of marginal likelihood pzzz = 0 pxxx = 0 qzzz = 0 for _ in range(n_samples): _L = np.zeros((1, 0)) px = np.zeros((1, 0)) pz = np.zeros((1, 0)) qz = np.zeros((1, 0)) i = 0 while i < n_tot: i_to = min(n_tot, i + n_batch) _x = ndict.getCols(x, i, i_to) if byteToFloat: _x = {i: _x[i].astype(np.float32) / 256. for i in _x} result = self.eval_test(_x, {}) _L = np.hstack((_L, result[0])) px = np.hstack((px, result[1])) pz = np.hstack((pz, result[2])) qz = np.hstack((qz, result[3])) i += n_batch lowbound += _L.mean() pzzz += pz.mean() pxxx += px.mean() qzzz += qz.mean() lowbound /= n_samples pzzz /= n_samples pxxx /= n_samples qzzz /= n_samples return lowbound, pxxx, pzzz, qzzz
def __init__(self, x, model, n_batch): raise Exception("TODO") self.x = x self.c = c = IPython.parallel.Client() self.model = model self.n_batch = n_batch self.clustersize = len(c) print('ipcluster size = ' + str(self.clustersize)) n_train = next(iter(x.values())).shape[1] if n_train % (self.n_batch * len(c)) != 0: raise BaseException() self.blocksize = self.n_batch * len(c) self.n_minibatches = n_train / self.blocksize # Get pointers to slaves c.block = False # Remove namespaces on slaves c[:].clear() # Execute stuff on slaves module, function, args = self.model.constr c[:].push({'args': args, 'x': x}).wait() commands = [ 'import os; cwd = os.getcwd()', 'import sys; sys.path.append(\'../shared\')', 'import anglepy.ndict as ndict', 'import ' + module, 'my_n_batch = ' + str(n_batch), 'my_model = ' + module + '.' + function + '(**args)' ] for cmd in commands: c[:].execute(cmd).get() # Import data on slaves for i in range(len(c)): _x = ndict.getCols(x, i * (n_train / len(c)), (i + 1) * (n_train / len(c))) c[i].push({'my_x': _x}) c[:].pull(['my_x']).get()
def test(self, x, n_batch, n_samples=1, byteToFloat=False): n_tot = x.itervalues().next().shape[1] px = 0 # estimate of marginal likelihood lowbound = 0 # estimate of lower bound of marginal likelihood pzzz = 0 pxxx = 0 qzzz = 0 for _ in range(n_samples): _L = np.zeros((1,0)) px = np.zeros((1,0)) pz = np.zeros((1,0)) qz = np.zeros((1,0)) i = 0 while i < n_tot: i_to = min(n_tot, i+n_batch) _x = ndict.getCols(x, i, i_to) if byteToFloat: _x = {i:_x[i].astype(np.float32)/256. for i in _x} result = self.eval_test(_x, {}) _L = np.hstack((_L, result[0])) px = np.hstack((px, result[1])) pz = np.hstack((pz, result[2])) qz = np.hstack((qz, result[3])) i += n_batch lowbound += _L.mean() pzzz+= pz.mean() pxxx+= px.mean() qzzz+= qz.mean() lowbound /= n_samples pzzz /= n_samples pxxx /= n_samples qzzz /= n_samples return lowbound, pxxx, pzzz, qzzz
def infer(data, n_batch=1000): #print '--', n_batch size = data['x'].shape[1] res = np.zeros((sum(n_hidden), size)) res1 = np.zeros((n_z,size)) res2 = np.zeros((n_hidden[-1],size)) res3 = np.zeros((n_z,size)) for i in range(0, size, n_batch): idx_to = min(size, i+n_batch) x_batch = ndict.getCols(data, i, idx_to) # may have bugs nn_batch = idx_to - i _x, _z, _z_confab = model.gen_xz(x_batch, {}, nn_batch) x_samples = _z_confab['x'] for (hi, hidden) in enumerate(_z_confab['hidden']): res[sum(n_hidden[:hi]):sum(n_hidden[:hi+1]),i:i+nn_batch] = hidden res1[:,i:i+nn_batch] = _z_confab['mean'] res2[:,i:i+nn_batch] = _z_confab['hidden'][-1] res3[:,i:i+nn_batch] = _z_confab['logvar'] #print '--' return res, res1, res2, res3
def __init__(self, x, model, n_batch): raise Exception("TODO") self.x = x self.c = c = IPython.parallel.Client() self.model = model self.n_batch = n_batch self.clustersize = len(c) print 'ipcluster size = '+str(self.clustersize) n_train = x.itervalues().next().shape[1] if n_train%(self.n_batch*len(c)) != 0: raise BaseException() self.blocksize = self.n_batch*len(c) self.n_minibatches = n_train/self.blocksize # Get pointers to slaves c.block = False # Remove namespaces on slaves c[:].clear() # Execute stuff on slaves module, function, args = self.model.constr c[:].push({'args':args,'x':x}).wait() commands = [ 'import os; cwd = os.getcwd()', 'import sys; sys.path.append(\'../shared\')', 'import anglepy.ndict as ndict', 'import '+module, 'my_n_batch = '+str(n_batch), 'my_model = '+module+'.'+function+'(**args)' ] for cmd in commands: c[:].execute(cmd).get() # Import data on slaves for i in range(len(c)): _x = ndict.getCols(x, i*(n_train/len(c)), (i+1)*(n_train/len(c))) c[i].push({'my_x':_x}) c[:].pull(['my_x']).get()
def subval(self, i, w, z): raise Exception("TODO") # Replaced my_model.nbatch with my_n_batch, this is UNTESTED @interactive def ll(w, z, k): _x = ndict.getCols(my_x, k*my_n_batch, (k+1)*my_n_batch) #@UndefinedVariable if z == None: return my_model.logpxmc(w, _x), None #@UndefinedVariable else: return my_model.logpxz(w, _x, z) #@UndefinedVariable tasks = [] for j in range(len(self.c)): _z = z if _z != None: _z = ndict.getCols(z, j*self.n_batch, (j+1)*self.n_batch) tasks.append(self.c.load_balanced_view().apply_async(ll, w, _z, i)) res = [task.get() for task in tasks] raise Exception("TODO: implementation with uncoupled logpx and logpz") return sum(res)
def make_minibatch(i): _x = ndict.getCols(x, i * n_batch, (i + 1) * n_batch) _eps = model.gen_eps(n_batch) if bernoulli_x: _x['x'] = np.random.binomial(n=1, p=_x['x']) return [i, _x, _eps]
def subgrad(self, i, w, z): _x = ndict.getCols(self.x, i*self.n_batch, (i+1)*self.n_batch) _z = ndict.getCols(z, i*self.n_batch, (i+1)*self.n_batch) logpx, logpz, g, _ = self.model.dlogpxz_dwz(w, _x, _z) return logpx, logpz, g
def subgrad(self, i, w): _x = ndict.getCols(self.x, i, i+1) logpx, gw = self.model.dlogpxmc_dw(w, _x, self.n_mc_samples) return logpx, gw
def subval(self, i, w): _x = ndict.getCols(self.x, i, i+1) return self.model.logpxmc(w, _x, self.n_mc_samples)
def getColsZX(self, w, z, i): _x = ndict.getCols(self.x, i*self.n_batch, (i+1)*self.n_batch) if z != None: _z = ndict.getCols(z, i*self.n_batch, (i+1)*self.n_batch) return _z, _x
def subval(self, i, w, z): _x = ndict.getCols(self.x, i*self.n_batch, (i+1)*self.n_batch) _z = ndict.getCols(z, i*self.n_batch, (i+1)*self.n_batch) return self.model.logpxz(w, _x, _z)
def make_minibatch(i): _x = ndict.getCols(x, i * n_batch, (i+1) * n_batch) _eps = model.gen_eps(n_batch) if bernoulli_x: _x['x'] = np.random.binomial(n=1, p=_x['x']) return [i, _x, _eps]
def make_minibatch(i): _x_labeled = ndict.getCols(x_labeled, i * n_batch_l, (i + 1) * n_batch_l) _x_unlabeled = ndict.getCols(x_unlabeled, i * n_batch_u, (i + 1) * n_batch_u) return [i, _x_labeled, _x_unlabeled]
def ll(w, z, k): _x = ndict.getCols(my_x, k*my_n_batch, (k+1)*my_n_batch) #@UndefinedVariable if z == None: return my_model.logpxmc(w, _x), None #@UndefinedVariable else: return my_model.logpxz(w, _x, z) #@UndefinedVariable
def make_minibatch(i): _x_labeled = ndict.getCols(x_labeled, i * n_batch_l, (i+1) * n_batch_l) _x_unlabeled = ndict.getCols(x_unlabeled, i * n_batch_u, (i+1) * n_batch_u) return [i, _x_labeled, _x_unlabeled]