def gpu_nnc_predict(trX, trY, teX, metric='cosine', batch_size=4096): if metric == 'cosine': metric_fn = cosine_dist else: metric_fn = euclid_dist idxs = [] for i in range(0, len(teX), batch_size): mb_dists = [] mb_idxs = [] for j in range(0, len(trX), batch_size): dist = metric_fn(floatX(teX[i:i + batch_size]), floatX(trX[j:j + batch_size])) if metric == 'cosine': mb_dists.append(np.max(dist, axis=1)) mb_idxs.append(j + np.argmax(dist, axis=1)) else: mb_dists.append(np.min(dist, axis=1)) mb_idxs.append(j + np.argmin(dist, axis=1)) mb_idxs = np.asarray(mb_idxs) mb_dists = np.asarray(mb_dists) if metric == 'cosine': i = mb_idxs[np.argmax(mb_dists, axis=0), np.arange(mb_idxs.shape[1])] else: i = mb_idxs[np.argmin(mb_dists, axis=0), np.arange(mb_idxs.shape[1])] idxs.append(i) idxs = np.concatenate(idxs, axis=0) nearest = trY[idxs] return nearest
def gpu_nnc_predict(trX, trY, teX, metric='cosine', batch_size=4096): if metric == 'cosine': metric_fn = cosine_dist else: metric_fn = euclid_dist idxs = [] for i in range(0, len(teX), batch_size): mb_dists = [] mb_idxs = [] for j in range(0, len(trX), batch_size): dist = metric_fn(floatX(teX[i:i+batch_size]), floatX(trX[j:j+batch_size])) if metric == 'cosine': mb_dists.append(np.max(dist, axis=1)) mb_idxs.append(j+np.argmax(dist, axis=1)) else: mb_dists.append(np.min(dist, axis=1)) mb_idxs.append(j+np.argmin(dist, axis=1)) mb_idxs = np.asarray(mb_idxs) mb_dists = np.asarray(mb_dists) if metric == 'cosine': i = mb_idxs[np.argmax(mb_dists, axis=0), np.arange(mb_idxs.shape[1])] else: i = mb_idxs[np.argmin(mb_dists, axis=0), np.arange(mb_idxs.shape[1])] idxs.append(i) idxs = np.concatenate(idxs, axis=0) nearest = trY[idxs] return nearest
def __init__(self, n_tensors_list, func_key_list, l2_reg, drop_list, gamma_scale, bias_scale): assert len(n_tensors_list) == 2 assert len(func_key_list) == 2 assert len(drop_list) == 2 assert isinstance(func_key_list[0], str) super(Rcn2layer_hidden_bn, self).__init__(n_tensors_list, func_key_list, l2_reg, drop_list) self.w1, self.b1 = \ _make_weight_bias(self.n_tensors_list[0], self.n_tensors_list[1], layer_number=1, bias_scale=bias_scale) self.gamma1 = theano.shared( floatX(np.abs(gamma_scale*np.random.normal(size=(self.n_tensors_list[1],)))), name="gamma1", borrow=False) self.var1 = theano.shared( floatX(np.zeros(self.n_tensors_list[1])), name="var1", borrow=False) self.w2, self.b2 =\ _make_weight_bias(self.n_tensors_list[1], 1, layer_number=2, bias_scale=bias_scale) self.param_l = [self.w1, self.b1, self.gamma1, self.w2, self.b2]
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(0.)) b1_t = self.b1 * self.l**t tp1 = t + 1. for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) value = p.get_value() * 0. if p.dtype == theano.config.floatX: value = floatX(value) m = theano.shared(value) v = theano.shared(value) m_t = b1_t * m + (1 - b1_t) * g v_t = self.b2 * v + (1 - self.b2) * g**2 m_c = m_t / (1 - self.b1**tp1) v_c = v_t / (1 - self.b2**tp1) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, tp1)) return updates
def svgd(x0, score_q, max_iter=2000, kernel='rbf', n_features=-1, fixed_weights=True, optimizer=None, progressbar=True, trace=False, **model_params): theta = theano.shared(floatX(x0)) epsilon = theano.shared(floatX(np.zeros(x0.shape[1]))) svgd_grad = svgd_gradient(theta, score_q, kernel, n_features, fixed_weights, **model_params) # Initialize optimizer if optimizer is None: optimizer = Adagrad(lr=1e-3, alpha=.5) # TODO. works better with regularizer for high dimension data svgd_updates = optimizer([theta], [-1 * svgd_grad]) _svgd_step = theano.function([], [], updates=svgd_updates) # Run svgd optimization if progressbar: progress = tqdm(np.arange(max_iter)) else: progress = np.arange(max_iter) xx, grad_err = [], [] for iter in progress: _svgd_step() if trace: xx.append(theta.get_value()) theta_val = theta.get_value() return theta_val, xx
def _make_weight_bias(n_input_tensors, n_output_tensors, layer_number, bias_scale): w_mat = theano.shared( floatX(0.01 * np.random.normal(size=(n_output_tensors, n_input_tensors))), name="w{}".format(layer_number), borrow=False) bias_vec = theano.shared( floatX(bias_scale*np.ones(shape=(n_output_tensors,))), name="bias{}".format(layer_number), borrow=False) return w_mat, bias_vec
def make_weight_bias(n_input_tensors, n_output_tensors): w_mat = theano.shared( floatX(0.01 * np.random.normal(size=(n_output_tensors, n_input_tensors, 1, 1))), name="w_mat", borrow=False) bias_vec = theano.shared( floatX(0.01*np.ones(shape=(n_output_tensors,))), name="bias_vec", borrow=False) return w_mat, bias_vec
def share_data_sets(feature_vec, gt_vec, test_feature_vec, test_gt_vec): s_input = theano.shared(floatX(feature_vec), "feature_vec", borrow=True) s_target = theano.shared(floatX(gt_vec), "gt2", borrow=True) s_test_input = theano.shared(floatX(test_feature_vec), "test_feature_vec", borrow=True) s_test_target = theano.shared(floatX(test_gt_vec), "test_gt_vec", borrow=True) return s_input, s_target, s_test_input, s_test_target
def get_hog(self, x_o): use_bin = self.use_bin NO = self.NO BS = self.BS nc = self.nc x = (x_o + sharedX(1)) / (sharedX(2)) Gx = np.array([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]]) / 4.0 Gy = Gx.T f1_w = [] for i in range(NO): t = np.pi / NO * i g = np.cos(t) * Gx + np.sin(t) * Gy gg = np.tile(g[np.newaxis, np.newaxis, :, :], [1, 1, 1, 1]) f1_w.append(gg) f1_w = np.concatenate(f1_w, axis=0) G = np.concatenate([ Gx[np.newaxis, np.newaxis, :, :], Gy[np.newaxis, np.newaxis, :, :] ], axis=0) G_f = sharedX(floatX(G)) a = np.cos(np.pi / NO) l1 = sharedX(floatX(1 / (1 - a))) l2 = sharedX(floatX(a / (1 - a))) eps = sharedX(1e-3) if nc == 3: x_gray = T.mean(x, axis=1).dimshuffle(0, 'x', 1, 2) else: x_gray = x f1 = sharedX(floatX(f1_w)) h0 = T.abs_(dnn_conv(x_gray, f1, subsample=(1, 1), border_mode=(1, 1))) g = dnn_conv(x_gray, G_f, subsample=(1, 1), border_mode=(1, 1)) if use_bin: gx = g[:, [0], :, :] gy = g[:, [1], :, :] gg = T.sqrt(gx * gx + gy * gy + eps) hk = T.maximum(0, l1 * h0 - l2 * gg) bf_w = np.zeros((NO, NO, 2 * BS, 2 * BS)) b = 1 - np.abs( (np.arange(1, 2 * BS + 1) - (2 * BS + 1.0) / 2.0) / BS) b = b[np.newaxis, :] bb = b.T.dot(b) for n in range(NO): bf_w[n, n] = bb bf = sharedX(floatX(bf_w)) h_f = dnn_conv(hk, bf, subsample=(BS, BS), border_mode=(BS / 2, BS / 2)) return h_f else: return g
def get_batch(X, index, batch_size): """ iterate through data set """ size = X.shape[0] n1 = (index*batch_size)%size n2 = ((index+1)*batch_size)%size if n1>n2: return floatX(np.concatenate((X[n1:], X[:n2]))) else: return floatX(X[n1:n2])
def __call__(self, params, cost, consider_constant=None): updates = [] # if self.clipnorm > 0: # print 'clipping grads', self.clipnorm # grads = T.grad(theano.gradient.grad_clip(cost, 0, self.clipnorm), params) grads = T.grad(cost, params, consider_constant=consider_constant) grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1*self.l**(t-1) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = b1_t*m + (1 - b1_t)*g v_t = self.b2*v + (1 - self.b2)*g**2 m_c = m_t / (1-self.b1**t) v_c = v_t / (1-self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t) ) updates.append((t, t + 1.)) return updates
def __init__(self, inp, shape, act=T.nnet.sigmoid): self.shape = shape print(shape) self.W = theano.shared( value=floatX( nprng.randn(shape[0], shape[1]) * np.sqrt(2 / shape[1])), # value=floatX(nprng.randn(shape[0], shape[1])*np.sqrt(2/(shape[1] + shape[0]))), name='W', borrow=True) # self.b = theano.shared( # value=floatX(nprng.randn(shape[0])*np.sqrt(2/shape[0])), # name='b', # borrow=True # ) # self.s = T.dot(self.W, inp.T).T + self.b self.s = T.dot(self.W, inp.T).T self.a = act(self.s) # self.params = [self.W, self.b] self.params = [self.W] self.inp = inp
def __call__(self, params, cost, return_grads=False): updates = [] grads_pre_clip = T.grad(cost, params) grads = clip_norms(grads_pre_clip, self.clipnorm) t = theano.shared(floatX(1.)) for p, g in zip(params, grads): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = self.b1*m + (1. - self.b1)*g v_t = self.b2*v + (1. - self.b2)*(g**2.) if type(p) == type(self.n): step_t = (m_t / (T.sqrt(v_t) + self.e)) + \ (self.n[0] * (cu_rng.uniform(size=p.shape)-0.5)) else: step_t = m_t / (T.sqrt(v_t) + self.e) p_t = p - (self.lr * step_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.)) if return_grads: result = [updates, grads_pre_clip] else: result = updates return result
def __call__(self, params, cost, return_grads=False): updates = [] grads_pre_clip = T.grad(cost, params) grads = clip_norms(grads_pre_clip, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1*self.l**(t-1) for p, g in zip(params, grads): #g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = b1_t*m + (1 - b1_t)*g v_t = self.b2*v + (1 - self.b2)*g**2 m_c = m_t / (1-self.b1**t) v_c = v_t / (1-self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) #p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.)) if return_grads: result = [updates, grads_pre_clip] else: result = updates return result
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size self.w_i = self.init((self.n_in, self.size)) self.w_f = self.init((self.n_in, self.size)) self.w_o = self.init((self.n_in, self.size)) self.w_c = self.init((self.n_in, self.size)) self.b_i = shared0s((self.size)) self.b_f = shared0s((self.size)) self.b_o = shared0s((self.size)) self.b_c = shared0s((self.size)) self.u_i = self.init((self.size, self.size)) self.u_f = self.init((self.size, self.size)) self.u_o = self.init((self.size, self.size)) self.u_c = self.init((self.size, self.size)) self.params = [self.w_i, self.w_f, self.w_o, self.w_c, self.u_i, self.u_f, self.u_o, self.u_c, self.b_i, self.b_f, self.b_o, self.b_c] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size self.w_i = self.init((self.n_in, self.size)) self.w_f = self.init((self.n_in, self.size)) self.w_o = self.init((self.n_in, self.size)) self.w_c = self.init((self.n_in, self.size)) self.b_i = shared0s((self.size)) self.b_f = shared0s((self.size)) self.b_o = shared0s((self.size)) self.b_c = shared0s((self.size)) self.u_i = self.init((self.size, self.size)) self.u_f = self.init((self.size, self.size)) self.u_o = self.init((self.size, self.size)) self.u_c = self.init((self.size, self.size)) self.params = [ self.w_i, self.w_f, self.w_o, self.w_c, self.u_i, self.u_f, self.u_o, self.u_c, self.b_i, self.b_f, self.b_o, self.b_c ] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size self.h0 = shared0s((1, self.size)) self.w_z = self.init((self.n_in, self.size)) self.w_r = self.init((self.n_in, self.size)) self.u_z = self.init((self.size, self.size)) self.u_r = self.init((self.size, self.size)) self.b_z = shared0s((self.size)) self.b_r = shared0s((self.size)) if 'maxout' in self.activation_str: self.w_h = self.init((self.n_in, self.size*2)) self.u_h = self.init((self.size, self.size*2)) self.b_h = shared0s((self.size*2)) else: self.w_h = self.init((self.n_in, self.size)) self.u_h = self.init((self.size, self.size)) self.b_h = shared0s((self.size)) self.params = [self.h0, self.w_z, self.w_r, self.w_h, self.u_z, self.u_r, self.u_h, self.b_z, self.b_r, self.b_h] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size self.h0 = shared0s((1, self.size)) self.w_z = self.init((self.n_in, self.size)) self.w_r = self.init((self.n_in, self.size)) self.u_z = self.init((self.size, self.size)) self.u_r = self.init((self.size, self.size)) self.b_z = shared0s((self.size)) self.b_r = shared0s((self.size)) if 'maxout' in self.activation_str: self.w_h = self.init((self.n_in, self.size * 2)) self.u_h = self.init((self.size, self.size * 2)) self.b_h = shared0s((self.size * 2)) else: self.w_h = self.init((self.n_in, self.size)) self.u_h = self.init((self.size, self.size)) self.b_h = shared0s((self.size)) self.params = [ self.h0, self.w_z, self.w_r, self.w_h, self.u_z, self.u_r, self.u_h, self.b_z, self.b_r, self.b_h ] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def __call__(self, params, grads): updates = [] t_prev = theano.shared(floatX(0.)) # Using theano constant to prevent upcasting of float32 one = T.constant(1) t = t_prev + 1 a_t = self.lr * T.sqrt(one - self.b2**t) / (one - self.b1**t) for param, g_t in zip(params, grads): value = param.get_value(borrow=True) m_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) v_prev = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=param.broadcastable) m_t = self.b1 * m_prev + (one - self.b1) * g_t v_t = self.b2 * v_prev + (one - self.b2) * g_t**2 step = a_t * m_t / (T.sqrt(v_t) + self.e) updates.append((m_prev, m_t)) updates.append((v_prev, v_t)) updates.append((param, param - step)) updates.append((t_prev, t)) return updates
def langevin(x0, score_q, lr=1e-2, max_iter=500, progressbar=True, trace=False, **model_params): theta = theano.shared(x0) i = theano.shared(floatX(0)) stepsize = T.cast(lr * (i+1)**(-0.55), theano.config.floatX) grad = score_q(theta, **model_params) update = stepsize * grad/2. + T.sqrt(stepsize) * t_rng.normal(size=theta.shape) cov_grad = T.sum(update**2, axis=1).mean() langevin_step = theano.function([], [], updates=[(theta, theta+update), (i, i+1)]) if progressbar: progress = tqdm(np.arange(max_iter)) else: progress = np.arange(max_iter) xx = [] for _ in progress: langevin_step() if trace: xx.append(theta.get_value()) theta_val = theta.get_value() return theta_val, xx
def __call__(self, params, cost, grads): updates = [] #grads = T.grad(cost, params,disconnected_inputs='raise') grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1 * self.l**(t - 1) for p, g in zip(params, grads): #updates_g.append(g) g = self.regularizer.gradient_regularize(p, g) #updates_g.append(g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = b1_t * m + (1 - b1_t) * g v_t = self.b2 * v + (1 - self.b2) * g**2 m_c = m_t / (1 - self.b1**t) v_c = v_t / (1 - self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.)) return updates
def step(self, X, y, outc): """Perform single train iteration. Args: X: input vectors y: target labels. outc: target vectors. Returns: Dict consisting of 'loss', 'err', 'est_loss', 'rho', 'delta_ll' and parameters from self.print_pls. """ self.x_d.set_value(X) self.y_d.set_value(y) self.outc_d.set_value(outc) self.rand_outc_d.set_value(floatX(nprng.randn(self.over_sampling, *outc.shape))) old_params = self.get_params() while True: # reset params to saved for op, p in zip(old_params, self.model.params): p.set_value(op) try: t_r = self.train(self.c_lambd_inv) print_pls_vals = t_r[-len(self.print_pls):] self.print_pls_res = {k: v for k, v in zip(self.print_pls.keys(), print_pls_vals)} except numpy.linalg.linalg.LinAlgError: t_r = [1e20, 1e10, 10] + [None] * len(self.print_pls) self.print_pls_res = {k: None for k in self.print_pls.keys()} e_v = self.eva() delta_ll = t_r[1] - e_v[0] rho = delta_ll/float(t_r[0]) print() print('lambda:', round(self.c_lambd_inv, 7), 'rho:', round(rho, 2), 'old loss:', t_r[1], 'new loss:', e_v[0]) if rho < 0: self.c_lambd_inv *= self.rate * 2 continue elif rho < 0.5: self.c_lambd_inv *= self.rate # self.c_lambd_inv = min(self.c_lambd_inv, 0.02) elif rho > 0.5: self.c_lambd_inv /= self.rate else: pass break # self.train.profiler.print_summary() res = {'rho': rho, 'est_loss': t_r[0], 'loss': t_r[1], 'err': t_r[2], 'delta_ll': delta_ll} res.update(self.print_pls_res) return res
def def_comp_mask(self): BS = self.BS t = time() m = T.tensor4() bf_w = np.ones((1, 1, 2 * BS, 2 * BS)) bf = sharedX(floatX(bf_w)) m_b = dnn_conv(m, bf, subsample=(BS, BS), border_mode=(BS / 2, BS / 2)) _comp_mask = theano.function(inputs=[m], outputs=m_b) return _comp_mask
def def_comp_mask(self): BS = self.BS print('COMPILING') t = time() m = T.tensor4() bf_w = np.ones((1, 1, 2 * BS, 2 * BS)) bf = sharedX(floatX(bf_w)) m_b = dnn_conv(m, bf, subsample=(BS, BS), border_mode=(BS / 2, BS / 2)) _comp_mask = theano.function(inputs=[m], outputs=m_b) print('%.2f seconds to compile [compMask] functions' % (time() - t)) return _comp_mask
def preprocess_dataset(X, y): if source == 'mnist': X = (floatX(X)/255)[:,::downscale,::downscale].reshape(-1, 28*28//(downscale**2)) elif source == 'digits': X = (floatX(X)/16).reshape(-1, 8, 8)[:,::downscale,::downscale].reshape(-1, 64//(downscale**2)) outc = floatX(np.zeros((len(y), 10))) for i in range(len(y)): outc[i, y[i]] = 1. if data_type == 'test': X, y, outc = X[-size:], y[-size:], outc[-size:] else: X, y, outc = X[:size], y[:size], outc[:size] X = X y = y.astype('int32') outc = outc return X, y, outc
def iterXY(self, X, Y): if self.shuffle: X, Y = shuffle(X, Y) self.loader = Loader(X, self.train_load, self.train_transform, self.size) self.proc = Process(target=self.loader.load) self.proc.start() for ymb in iter_data(Y, size=self.size): xmb = self.loader.get() yield xmb, floatX(ymb)
def __init__(self, size=128, n_features=256, init='uniform', weights=None): self.settings = locals() del self.settings['self'] self.init = getattr(inits, init) self.size = size self.n_features = n_features self.input = T.imatrix() self.wv = self.init((self.n_features, self.size)) self.params = [self.wv] if weights is not None: for param, weight in zip(self.params, weights): param.set_value(floatX(weight))
def gpu_nnd_score(trX, teX, metric='cosine', batch_size=4096): if metric == 'cosine': metric_fn = cosine_dist else: metric_fn = euclid_dist dists = [] for i in range(0, len(teX), batch_size): mb_dists = [] for j in range(0, len(trX), batch_size): dist = metric_fn(floatX(teX[i:i+batch_size]), floatX(trX[j:j+batch_size])) if metric == 'cosine': mb_dists.append(np.max(dist, axis=1)) else: mb_dists.append(np.min(dist, axis=1)) mb_dists = np.asarray(mb_dists) if metric == 'cosine': d = np.max(mb_dists, axis=0) else: d = np.min(mb_dists, axis=0) dists.append(d) dists = np.concatenate(dists, axis=0) return float(np.mean(dists))
def iterXY(self, X, Y): """ DOCSTRING """ if self.shuffle: X, Y = utils.shuffle(X, Y) self.loader = Loader(X, self.train_load, self.train_transform, self.size) self.proc = multiprocessing.Process(target=self.loader.load) self.proc.start() for ymb in utils.iter_data(Y, size=self.size): xmb = self.loader.get() yield xmb, theano_utils.floatX(ymb)
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size if 'maxout' in self.activation_str: self.w = self.init((self.n_in, self.size*2)) self.b = shared0s((self.size*2)) else: self.w = self.init((self.n_in, self.size)) self.b = shared0s((self.size)) self.params = [self.w, self.b] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def gpu_nnd_score(trX, teX, metric='cosine', batch_size=4096): if metric == 'cosine': metric_fn = cosine_dist else: metric_fn = euclid_dist dists = [] for i in range(0, len(teX), batch_size): mb_dists = [] for j in range(0, len(trX), batch_size): dist = metric_fn(floatX(teX[i:i + batch_size]), floatX(trX[j:j + batch_size])) if metric == 'cosine': mb_dists.append(np.max(dist, axis=1)) else: mb_dists.append(np.min(dist, axis=1)) mb_dists = np.asarray(mb_dists) if metric == 'cosine': d = np.max(mb_dists, axis=0) else: d = np.min(mb_dists, axis=0) dists.append(d) dists = np.concatenate(dists, axis=0) return float(np.mean(dists))
def connect(self, l_in): self.l_in = l_in self.n_in = l_in.size if 'maxout' in self.activation_str: self.w = self.init((self.n_in, self.size * 2)) self.b = shared0s((self.size * 2)) else: self.w = self.init((self.n_in, self.size)) self.b = shared0s((self.size)) self.params = [self.w, self.b] if self.weights is not None: for param, weight in zip(self.params, self.weights): param.set_value(floatX(weight))
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) value = p.get_value() * 0. if p.dtype == theano.config.floatX: value = floatX(value) m = theano.shared(value) v = (self.momentum * m) - (self.lr * g) updates.append((m, v)) updated_p = p + v updated_p = self.regularizer.weight_regularize(updated_p) updates.append((p, updated_p)) return updates
def svgd(x0, score_q, max_iter=2000, alg='svgd', N0=None, optimizer=None, progressbar=True, trace=False, **model_params): if alg == 'graphical' and N0 is None: raise NotImplementedError theta = theano.shared(floatX(np.copy(x0).reshape((len(x0), -1)))) # initlization if alg == 'graphical': N = theano.shared(N0.astype('int32')) # adjacency matrix svgd_grad = -1 * graphical_svgd_gradient(theta, score_q, N, **model_params) elif alg == 'svgd': svgd_grad = -1 * svgd_gradient(theta, score_q, **model_params) else: raise NotImplementedError # Initialize optimizer if optimizer is None: optimizer = Adagrad(lr=1e-2, alpha=.5) svgd_updates = optimizer([theta], [svgd_grad]) svgd_step = theano.function([], [], updates=svgd_updates) # Run svgd optimization if progressbar: progress = tqdm(np.arange(max_iter)) else: progress = np.arange(max_iter) xx = [] for ii in progress: svgd_step() if trace: xx.append(theta.get_value()) theta_val = theta.get_value().reshape(x0.shape) if trace: return theta_val, np.asarray(xx) else: return theta_val
def __call__(self, params, cost, consider_constant=None): updates = list() grads = theano.tensor.grad(cost, params, consider_constant=consider_constant) grads = clip_norms(grads, self.clipnorm) t = theano.shared(theano_utils.floatX(1.0)) b1_t = self.b1 * self.l**(t-1) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.0) v = theano.shared(p.get_value() * 0.0) m_t = b1_t * m + (1 - b1_t) * g v_t = self.b2 * v + (1 - self.b2)*g**2 m_c = m_t / (1 - self.b1**t) v_c = v_t / (1 - self.b2**t) p_t = p - (self.lr * m_c) / (theano.tensor.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.0)) return updates
def __call__(self, params, grads): updates = [] t_prev = theano.shared(floatX(0.)) t = t_prev + 1 for p, g in zip(params, grads): value = p.get_value(borrow=True) velocity = theano.shared(np.zeros(value.shape, dtype=value.dtype), broadcastable=p.broadcastable) if self.decay: curr_lr = self.lr * T.cast( (1 + t)**(-.55), theano.config.floatX) else: curr_lr = self.lr step = self.alpha * velocity + curr_lr * g updated_p = p - step updates.append((p, updated_p)) updates.append((t_prev, t)) return updates
def get_updates(self, params, cost): updates = [] grads = T.grad(cost, params) grads = clip_norms(grads, self.clipnorm) i = theano.shared(floatX(0.)) i_t = i + 1. fix1 = 1. - self.b1**(i_t) fix2 = 1. - self.b2**(i_t) lr_t = self.lr * (T.sqrt(fix2) / fix1) for p, g in zip(params, grads): m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) m_t = (self.b1 * g) + ((1. - self.b1) * m) v_t = (self.b2 * T.sqr(g)) + ((1. - self.b2) * v) g_t = m_t / (T.sqrt(v_t) + self.e) g_t = self.regularizer.gradient_regularize(p, g_t) p_t = p - (lr_t * g_t) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((i, i_t)) return updates
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params, disconnected_inputs="ignore") grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.0)) b1_t = self.b1 * self.l ** (t - 1) for p, g in zip(params, grads): g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.0) v = theano.shared(p.get_value() * 0.0) m_t = b1_t * m + (1 - b1_t) * g v_t = self.b2 * v + (1 - self.b2) * g ** 2 m_c = m_t / (1 - self.b1 ** t) v_c = v_t / (1 - self.b2 ** t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.0)) return updates
def __call__(self, params, cost): updates = [] grads = T.grad(cost, params) # grads = clip_norms(grads, self.clipnorm) t = theano.shared(floatX(1.)) b1_t = self.b1 * self.l**(t - 1) for p, g in zip(params, grads): # g = self.regularizer.gradient_regularize(p, g) m = theano.shared(p.get_value() * 0.) v = theano.shared(p.get_value() * 0.) # gg=g*sharedX(floatX(4)) gg = g * self.batch_size m_t = b1_t * m + (1 - b1_t) * gg v_t = self.b2 * v + (1 - self.b2) * gg**2 m_c = m_t / (1 - self.b1**t) v_c = v_t / (1 - self.b2**t) p_t = p - (self.lr * m_c) / (T.sqrt(v_c) + self.e) # p_t = self.regularizer.weight_regularize(p_t) updates.append((m, m_t)) updates.append((v, v_t)) updates.append((p, p_t)) updates.append((t, t + 1.)) return updates
M_line = theano.gradient.disconnected_grad(M_line) # compute Huberized regression loss, with linear/quadratic switch at "t" loss = (M_quad * abs_res**2.) + (M_line * (2. * t * abs_res - t**2.)) return loss cce = CCE = CategoricalCrossEntropy bce = BCE = BinaryCrossEntropy mse = MSE = MeanSquaredError mae = MAE = MeanAbsoluteError ############################ # Probability stuff, yeah? # ############################ # library with theano PDF functions PI = floatX(np.pi) C = floatX(-0.5 * np.log(2*PI)) def normal(x, mean, logvar): return C - logvar/2 - (x - mean)**2 / (2 * T.exp(logvar)) def laplace(x, mean, logvar): sd = T.exp(0.5 * logvar) return -(abs(x - mean) / sd) - (0.5 * logvar) - np.log(2) # Centered student-t distribution # v>0 is degrees of freedom # See: http://en.wikipedia.org/wiki/Student's_t-distribution def studentt(x, v): gamma1 = log_gamma_lanczos((v + 1) / 2.)
def transform(X): """ shift data from [0,255] to [-1, 1] """ return floatX(X)/127.5 - 1.
check_valid='raise') ## adjacency matrix W = np.zeros(A.shape).astype(int) W[A != 0] = 1 assert np.all(np.sum(W, axis=1) > 0), 'illegal inputs' assert np.sum((W - W.T)**2) < 1e-8, 'illegal inputs' return model_params, score_q, gt, W all_algorithms = ['graphical', 'svgd'] max_iter = 5000 model_params, score_q, gt0, N0 = init_model() x0 = floatX(np.random.uniform(-5, 5, [args.n_samples, gt0.shape[1]])) for alg in all_algorithms: optimizer = Adagrad(lr=5e-3, alpha=0.9) xc = svgd(x0, score_q, max_iter=max_iter, alg=alg, N0=N0, optimizer=optimizer, trace=False, **model_params) print alg, comm_func_eval(xc, gt0)
plt.plot(xs, preal, lw=2) plt.xlim([-5., 5.]) plt.ylim([0., 1.]) plt.ylabel('Prob') plt.xlabel('x') plt.legend(['P(data)', 'G(z)', 'D(x)']) plt.title('GAN learning guassian') fig.canvas.draw() plt.show(block=False) show() #Train both networks for i in range(10001): # get the uniform distribution of both networks # The zmb (z mini batch) is randomly drawn from a uniform distribution zmb = np.random.uniform(-1, 1, size=(batch_size, 1)).astype('float32') # The xmb are randomly drawn from a gaussian distribution, these are actually our target values that we want our generator to learn # to compute from the uniformly drawn inputs. xmb = np.random.normal(1., 1, size=(batch_size, 1)).astype('float32') # Train the discriminator for x times and then the generator once if i % 2 == 0: print i _train_g(xmb, zmb) else: _train_d(xmb, zmb) if i % 100 == 0: print i vis(i) lrt.set_value(floatX(lrt.get_value() * 0.9999))
rbf: svgd with rbf kernel combine: combine linear kernel and random feature kernel ''' all_algorithms = ['poly', 'random_feature', 'rbf', 'combine', 'mc'] for ii in range(1, n_iter + 1): Q = np.random.normal(size=(d0, d0)) #var_n_samples = np.sort(np.concatenate((np.exp(np.linspace(np.log(10), np.log(500), 10)),[d0])).astype('int32')) model_params, score_q, log_prob, gt0 = init_model(d0, Q, cond_num) from scipy.spatial.distance import cdist H = cdist(gt0[:1000], gt0[:1000])**2 h0 = np.median(H.flatten()) n_features = n_samples x0 = floatX(np.random.uniform(-5, 5, [n_samples, d0])) for alg in all_algorithms: if alg == 'mc': xc = gt0[-n_samples:] else: optimizer = Adagrad(lr=5e-3, alpha=0.9) xc, _ = svgd(x0, score_q, max_iter=max_iter, kernel=alg, n_features=n_features, fixed_weights=True, optimizer=optimizer, trace=False,
def __init__(self, lr=0.001, b1=0.9, b2=0.999, e=1e-8, n=0.0, *args, **kwargs): Update.__init__(self, *args, **kwargs) self.__dict__.update(locals()) self.n = theano.shared(floatX(n+np.zeros((1,)))) return