def pqr_gbrbm_perturb(to_perturb_Bp, to_perturb_Bq, dx=50, dh=10): """ Get a Gaussian-Bernoulli RBM problem where the first entry of the B matrix (the matrix linking the latent and the observation) is perturbed. - to_perturb_Bp: constant to add to the entry of B in p to purturb it - to_perturb_Bq: constant to add to the entry of B in q to purturb it - dx: observed dimension - dh: latent dimension Return P (model.Model), Q (model.Model), data source (representing distribution R) """ with util.NumpySeedContext(seed=11): B = np.random.randint(0, 2, (dx, dh)) * 2 - 1.0 b = np.random.randn(dx) c = np.random.randn(dh) r = density.GaussBernRBM(B, b, c) # for p Bp_perturb = np.copy(B) Bp_perturb[0, 0] = Bp_perturb[0, 0] + to_perturb_Bp # for q Bq_perturb = np.copy(B) Bq_perturb[0, 0] = Bq_perturb[0, 0] + to_perturb_Bq p = density.GaussBernRBM(Bp_perturb, b, c) q = density.GaussBernRBM(Bq_perturb, b, c) ds = r.get_datasource(burnin=2000) return (model.ComposedModel(p=p), model.ComposedModel(p=q), ds)
def __init__(self, train_x, train_y, shared_nn, non_shared_nns, max_iter = 100, l1 = 0, l2 = 0, debug=False): self.train_x = np.copy(train_x) self.train_y = np.copy(train_y) self.dim = self.train_x.shape[0] self.num_train = self.train_x.shape[1] self.num_obj = self.train_y.shape[1] self.means = np.mean(self.train_y, axis=0) self.stds = np.std(self.train_y, axis=0) self.train_y = (self.train_y - self.means) / self.stds # standardize output self.debug = debug self.max_iter = max_iter # max iter for the L-BFGS optimization self.l1 = l1 self.l2 = l2 self.shared_nn = shared_nn self.non_shared_nns = non_shared_nns self.num_param = self.calc_num_params() if(train_x.ndim != 2 or train_y.ndim != 2): print("train_x.ndim != 2 or train_y.ndim != 2") sys.exit(1) if(train_x.shape[1] != train_y.shape[0]): print("train_x.shape[1] != train_y.shape[0]") sys.exit(1) if(len(non_shared_nns) != self.num_obj): print("len(non_shared_nns) != self.num_obj") sys.exit(1)
def __init__(self, train_x, train_y, layer_sizes, activations, bfgs_iter=100, l1=0, l2=0, debug=False): self.train_x = np.copy(train_x) self.train_y = np.copy(train_y) self.dim = train_x.shape[0] self.num_train = train_x.shape[1] self.nn = NN(layer_sizes, activations) self.num_param = 2 + self.dim + self.nn.num_param(self.dim) self.bfgs_iter = bfgs_iter self.l1 = l1 self.l2 = l2 self.debug = debug self.m = layer_sizes[-1] self.in_mean = np.mean(self.train_x, axis=1) self.in_std = np.std(self.train_x, axis=1) self.train_x = ((self.train_x.T - self.in_mean) / self.in_std).T self.out_mean = np.mean(self.train_y) self.out_std = np.std(self.train_y) self.train_y = (self.train_y - self.out_mean) / self.out_std self.loss = np.inf
def __init__(self, dataset, gamma, scale, bounds, bfgs_iter, debug=True): self.dataset = {} self.dataset['low_x'] = np.copy(dataset['low_x']) self.dataset['low_y'] = np.copy(dataset['low_y']) self.dataset['high_x'] = np.copy(dataset['high_x']) self.dataset['high_y'] = np.copy(dataset['high_y']) self.gamma = self.dataset['high_y'].shape[0] * gamma * ( self.dataset['low_y'].max(axis=1) - self.dataset['low_y'].min(axis=1)) self.scale = scale self.bounds = np.copy(bounds) self.bfgs_iter = bfgs_iter self.debug = debug self.dim = self.dataset['low_x'].shape[0] self.outdim = self.dataset['low_y'].shape[0] self.num_low = self.dataset['low_y'].shape[1] self.num_high = self.dataset['high_y'].shape[1] self.construct_model() self.best_constr = np.array([np.inf, np.inf]) self.best_y = np.zeros((2, self.outdim)) self.best_y[:, 0] = np.inf self.best_x = np.zeros((2, self.dim)) self.get_best_y(self.dataset['low_x'], self.dataset['low_y'], is_high=0) self.get_best_y(self.dataset['high_x'], self.dataset['high_y'], is_high=1)
def leapfrog(q, p, dVdq, path_len, step_size): """Leapfrog integrator for Hamiltonian Monte Carlo. Parameters ---------- q : np.floatX Initial position p : np.floatX Initial momentum dVdq : callable Gradient of the velocity path_len : float How long to integrate for step_size : float How long each integration step should be Returns ------- q, p : np.floatX, np.floatX New position and momentum """ q, p = np.copy(q), np.copy(p) p -= step_size * dVdq(q) / 2 # half step for _ in range(int(path_len / step_size) - 1): q += step_size * p # whole step p -= step_size * dVdq(q) # whole step q += step_size * p # whole step p -= step_size * dVdq(q) / 2 # half step # momentum flip at end return q, -p
def calc_update(self, x, p, trust_radius, trust_radius_max, obj, quality_required=0.2, quality_low=0.25, quality_high=0.75): # Parameter checks if not quality_required < quality_low < quality_high: raise ValueError( 'Invalid quality parameters, must be: quality_required < quality_low < quality_high' ) df = obj.function(x) - obj.function(x + p) dm = self.model(x, np.zeros_like(x), obj) - self.model(x, p, obj) quality = df / dm if quality < quality_low: trust_radius_new = quality_low * trust_radius else: if quality > quality_high and np.isclose(la.norm(p), trust_radius): trust_radius_new = min(2 * trust_radius, trust_radius_max) else: trust_radius_new = np.copy(trust_radius) if quality > quality_required: x_new = x + p else: x_new = np.copy(x) return x_new, trust_radius_new
def kinetic_fd(self, param, pos, eps=1E-6): """Compute the action of the kinetic operator on the we points. Args : pos : position of the electrons metod (string) : mehod to compute the derivatives Returns : value of K * psi """ nwalk = pos.shape[0] ndim = pos.shape[1] out = np.zeros(nwalk) for icol in range(ndim): pos_tmp = np.copy(pos) feps = -2*self.values(param, pos_tmp) pos_tmp = np.copy(pos) pos_tmp[:, icol] += eps feps += self.values(param, pos_tmp) pos_tmp = np.copy(pos) pos_tmp[:, icol] -= eps feps += self.values(param, pos_tmp) out += feps/(eps**2) return out
def fullSample( self, stabilize=False, **kwargs ): if( stabilize == True ): J11 = np.copy( self.J11 ) J12 = np.copy( self.J12 ) J22 = np.copy( self.J22 ) A, sigma = Regression.natToStandard( -0.5 * self.J11, -0.5 * self.J22, -self.J12.T ) A = stab( A ) n1, n2, n3 = Regression.standardToNat( A, sigma ) self.J11 = -2 * n1 self.J12 = -n3.T self.J22 = -2 * n2 self.log_Z = 0.5 * np.linalg.slogdet( np.linalg.inv( self.J11 ) )[ 1 ] ans = super( LDSState, self ).fullSample( **kwargs ) if( stabilize == True ): self.J11 = J11 self.J12 = J12 self.J22 = J22 self.log_Z = 0.5 * np.linalg.slogdet( np.linalg.inv( self.J11 ) )[ 1 ] return ans
def loss_augmented_inference(self, x, y, w, relaxed=False, return_energy=False): """Loss-augmented Inference for x relative to y using parameters w. Finds (approximately) argmax_y_hat np.dot(w, joint_feature(x, y_hat)) + loss(y, y_hat) using self.inference_method. """ self.inference_calls += 1 self._check_size_w(w) # check if size of w is equal to size of joint feature # need to compute the joint feature and hinge loss # return self.loss_augmented_inference_graph_iht(x, y, w, relaxed=relaxed) # y_hat = x # y_hat = np.random.rand(len(y)) y_hat = np.zeros_like(y) yt = np.copy(y_hat) max_iter = 1000 for iter in range(max_iter): print("iter {}".format(iter)) print("yt {}\n{}".format(yt, np.nonzero(yt))) y_prev = np.copy(yt) gradient = self._get_cost_augment_grad(x, y, w, yt) print("gradient {}".format(gradient)) yt = yt + 0.001 * gradient yt[yt <= 0.] = 0. yt[yt >= 1.0] = 1.0 gap_y = np.linalg.norm(yt - y_prev) ** 2 if gap_y < 1e-6: break return yt
def __init__(self, name, num_models, dataset, bfgs_iter=100, debug=False, scale=[], num_layers=[], layer_sizes=[], activations=[], l1=0, l2=0): self.name = name self.num_models = num_models self.dataset = dataset self.bfgs_iter = bfgs_iter self.debug = debug self.num_layers = np.copy(num_layers) self.layer_sizes = np.copy(layer_sizes) self.activations = np.copy(activations) self.l1 = l1 self.l2 = l2 self.scale = np.copy(scale) self.construct_model() self.best_constr = np.inf self.best_y = np.zeros(self.outdim) self.best_y[0] = np.inf # get best_y from the dataset if self.dataset.has_key('train_x'): self.get_best_y(self.dataset['train_x'], self.dataset['train_y']) else: self.get_best_y(self.dataset['high_x'], self.dataset['high_y'])
def _cost_batched(self, inputs, targets, hprev, Cprev, weights, disable_tqdm=True): W_1, b_1, W_f, b_f, W_i, b_i, W_c, b_c, W_o, b_o, W_2, b_2 = weights h = np.copy(hprev) C = np.copy(Cprev) h = h.reshape((self.batch_size, self.h_size, 1)) C = C.reshape((self.batch_size, self.h_size, 1)) loss = 0 # W_sth_dropout = get_dropout_function((self.h_size, self.h_size + self.x_size), self.keep_prob) # b_sth_dropout = get_dropout_function((self.h_size,), self.keep_prob) # W_dropout = get_dropout_function((self.y_size, self.h_size), self.keep_prob) # b_dropout = get_dropout_function((self.y_size,), self.keep_prob) cell_dropout = get_dropout_function((self.batch_size, self.h_size, 1), self.keep_prob) y_dropout = get_dropout_function((self.batch_size, self.y_size, 1), self.keep_prob) for t in tqdm(range(len(inputs)), disable=disable_tqdm): x = np.array([char_to_one_hot(c) for c in inputs[:, t]]) x = x.reshape((self.batch_size, -1, 1)) x = np.matmul(W_1, x) + np.reshape(b_1, (-1, 1)) x = cell_dropout(x) f = sigmoid( np.matmul(W_f, np.concatenate((h, x), axis=1)) + np.reshape(b_f, (-1, 1))) f = cell_dropout(f) i = sigmoid( np.matmul(W_i, np.concatenate((h, x), axis=1)) + np.reshape(b_i, (-1, 1))) i = cell_dropout(i) C_hat = np.tanh( np.matmul(W_c, np.concatenate((h, x), axis=1)) + np.reshape(b_c, (-1, 1))) C_hat = cell_dropout(C_hat) C = f * C + i * C_hat C = cell_dropout(C) o = sigmoid( np.matmul(W_o, np.concatenate((h, x), axis=1)) + np.reshape(b_o, (-1, 1))) o = cell_dropout(o) h = o * np.tanh(C) h = cell_dropout(h) ys = np.matmul(W_2, h) + np.reshape(b_2, (-1, 1)) ys = y_dropout(ys) target_indices = np.array( [char_to_index[c] for c in targets[:, t]]) # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t])) # probability for next chars being target # loss += -np.log(ps_target[t]) loss += np.sum([ -(y[target_index] - logsumexp(y)) for y, target_index in zip(ys, target_indices) ]) / (self.number_of_steps * self.batch_size) return loss
def _cost(self, inputs, targets, hprev, Cprev, weights, disable_tqdm=True): W_1, b_1, W_f, b_f, W_i, b_i, W_c, b_c, W_o, b_o, W_2, b_2 = weights h = np.copy(hprev) C = np.copy(Cprev) loss = 0 for t in tqdm(range(len(inputs)), disable=disable_tqdm): x = char_to_one_hot(inputs[t]) x = np.matmul(W_1, x) + b_1 f = sigmoid(np.matmul(W_f, np.concatenate((h, x))) + b_f) i = sigmoid(np.matmul(W_i, np.concatenate((h, x))) + b_i) C_hat = np.tanh(np.matmul(W_c, np.concatenate((h, x))) + b_c) C = f * C + i * C_hat o = sigmoid(np.matmul(W_o, np.concatenate((h, x))) + b_o) h = o * np.tanh(C) y = np.matmul(W_2, h) + b_2 target_index = char_to_index[targets[t]] # ps_target[t] = np.exp(ys[t][target_index])/np.sum(np.exp(ys[t])) # probability for next chars being target # loss += -np.log(ps_target[t]) loss += -(y[target_index] - logsumexp(y)) loss = loss / len(inputs) return loss
def validation_points_error(Xi, Xo, Hestimated): Xi = np.copy(Xi) Xo = np.copy(Xo) sum = 0 for i in range(Xo.shape[1]): sum += geometric_distance(Xo[:, i], Xi[:, i], Hestimated) return sum / Xo.shape[1]
def train(self, scale=1.0): theta = self.rand_theta(scale) self.loss = np.inf theta0 = np.copy(theta) self.theta = np.copy(theta) def loss(theta): nlz = self.neg_likelihood(theta) return nlz gloss = grad(loss) try: fmin_l_bfgs_b(loss, theta0, gloss, maxiter=self.bfgs_iter, m=100, iprint=self.debug) except np.linalg.LinAlgError: print('Increase noise term and re-optimization') theta0 = np.copy(self.theta) theta0[1] += np.log(10) theta0[2] += np.log(10) try: fmin_l_bfgs_b(loss, theta0, gloss, maxiter=self.bfgs_iter, m=10, iprint=self.debug) except: print('Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) except: print('Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) if(np.isnan(self.loss) or np.isinf(self.loss)): print('Fail to build GP model') sys.exit(1) self.alpha = chol_inv(self.L, self.y.T)
def __init__(self, dimension, inputs, obs, mini_batch=False): """These functions implement a standard multi-layer perceptron, vectorized over both training examples and weight samples.""" self.dimension = dimension self.prior = FiniteDimensionalPrior(self.dimension) self.inputs = inputs self.inputs_size = len(inputs) self.obs = obs self.mini_batch = mini_batch if self.mini_batch: self.it = 0 self.mini_batch_size = 32 self.number_batchs = np.int( np.ceil(self.inputs_size / self.mini_batch_size)) self.inputs_all = np.copy(inputs) self.obs_all = np.copy(obs) self.inputs = inputs[:self.mini_batch_size] self.obs = obs[:self.mini_batch_size] self.gx = grad(self.cost) self.J = jacobian(self.forward) self.hx = hessian_vector_product(self.cost) self.hvp = hvp(self.hx)
def leapfrog(M, q, p, dVdq, path_len, step_size): """Leapfrog integrator for standard HMC and naive SGHMC Parameters ---------- M : np.matrix Mass of the Euclidean-Gaussian kinetic energy of shape D x D q : np.floatX Initial position p : np.floatX Initial momentum dVdq : callable Gradient of the velocity path_len : float How long to integrate for step_size : float How long each integration step should be Returns ------- q, p : np.floatX, np.floatX New position and momentum """ q, p = np.copy(q), np.copy(p) Minv = np.linalg.inv(M) p -= step_size * dVdq(q) / 2 # half step for _ in range(int(path_len / step_size) - 1): q += step_size * np.dot(Minv, p) # whole step p -= step_size * dVdq(q) # whole step q += step_size * np.dot(Minv, p) # whole step p -= step_size * dVdq(q) / 2 # half step # momentum flip at end return q, -p
def drift_fd(self, param, pos, eps=1E-6): """Compute the drift force on the points. Args : pos : position of the electrons metod (string) : mehod to compute the derivatives Returns : value of Grad Psi """ ndim = pos.shape[1] out = np.zeros_like(pos) for icol in range(ndim): pos_tmp = np.copy(pos) pos_tmp[:, icol] += eps feps = self.values(param, pos_tmp) pos_tmp = np.copy(pos) pos_tmp[:, icol] -= eps feps -= self.values(param, pos_tmp) out[:, icol] = feps.reshape(-1)/(2*eps) if self.ndim_tot == 1: return 2*out.reshape(-1, 1)/self.values(param, pos) else: return 2*out/self.values(param, pos)
def optimize_constr(self, x): x0 = np.copy(x).reshape(-1) best_x = np.copy(x) best_loss = np.inf tmp_loss = np.inf def loss(x0): nonlocal tmp_loss x0 = x0.reshape(self.dim, -1) py, ps2 = self.models[0].predict(x0) tmp_loss = py.sum() for i in range(1, self.outdim): py, ps2 = self.models[i].predict(x0) tmp_loss += np.maximum(0, py).sum() return tmp_loss def callback(x): nonlocal best_x nonlocal best_loss if tmp_loss < best_loss: best_loss = tmp_loss best_x = np.copy(x) gloss = value_and_grad(loss) try: #starting point for fmin_l_bfgs_b should be a one-dimensional vector fmin_l_bfgs_b(gloss, x0, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=100, iprint=self.debug, callback=callback) except np.linalg.LinAlgError: print( 'Optimizing constrains. Increase noise term and re-optimization' ) x0 = np.copy(best_x).reshape(-1) x0[0] += 0.01 try: fmin_l_bfgs_b(gloss, x0, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=10, iprint=self.debug, callback=callback) except: print( 'Optimizing constrains. Exception caught, L-BFGS early stopping...' ) print(traceback.format_exc()) except: print( 'Optimizing constrains. Exception caught, L-BFGS early stopping...' ) print(traceback.format_exc()) best_x = best_x.reshape(self.dim, -1) return best_x
def backtracking(weights, levels, steps, loss, gradients, l_rate, x, idx_to_skip, learn_levels, learn_steps, variables, n_layers_learn, loss_init=None, cst_mul=2., n_tries=30, l_rate_min=1e-6): if loss_init is None: loss_init = loss(optim_vars, x) l_rate *= cst_mul for _ in range(n_tries): new_vars = change_params(deepcopy(weights), np.copy(levels), np.copy(steps), l_rate, gradients, idx_to_skip, learn_levels, learn_steps, variables, n_layers_learn) new_loss = loss(new_vars, x) if new_loss < loss_init or l_rate < l_rate_min: break l_rate /= cst_mul return new_vars, new_loss, l_rate
def sample(self, n_samples=2000, observed_states=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) if observed_states is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) startdist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) states[0] = startdist.rvs(size=1)[0] transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states mu = np.copy(self._mu_) precision = np.copy(self._precision_) for idx in range(n_samples): mean_ = self._mu_[states[idx]] var_ = np.sqrt(1/precision[states[idx]]) samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def optimize_wEI(self, x): x0 = np.copy(x).reshape(-1) best_x = np.copy(x) best_loss = np.inf tmp_loss = np.inf def loss(x0): nonlocal tmp_loss x0 = x0.reshape(self.dim, -1) tmp_loss = -self.calc_log_wEI_approx(x0) tmp_loss = tmp_loss.sum() return tmp_loss def callback(x): nonlocal best_x nonlocal best_loss if tmp_loss < best_loss: best_loss = tmp_loss best_x = np.copy(x) gloss = value_and_grad(loss) try: fmin_l_bfgs_b(gloss, x0, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=100, iprint=self.debug, callback=callback) except np.linalg.LinAlgError: print( 'Acquisition func optimization error, Increase noise term and re-optimization' ) x0 = np.copy(best_x).reshape(-1) x0[0] += 0.01 try: fmin_l_bfgs_b(loss, x0, gloss, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=10, iprint=self.debug, callback=callback) except: print( 'Optimizing acquisition function, Exception caught, L-BFGS early stopping...' ) print(traceback.format_exc()) except: print( 'Optimizing acquisition function, Exception caught, L-BFGS early stopping...' ) print(traceback.format_exc()) best_x = best_x.reshape(self.dim, -1) return best_x
def transfer_error(Xo, Xi, H): """transfer error including normalization Xo: Object points in 2D Homogeneous Coordinates (3xn) Xi: Image points in 2D Homogeneous Coordinates (3xn) """ Xo = np.copy(Xo) Xi = np.copy(Xi) H = np.copy(H) return d(Xi, np.dot(H, Xo))
def __init__(self, ll, lu, path): self.ll = np.copy(ll) self.lu = np.copy(lu) self.path1 = path self.num_layers = self.lu.size self.zeta = 0.1 self.calc_lm() self.calc_path2() self.path_lengths()
def _init_params(self, data, lengths=None, params='stmp'): X = data['obs'] if 's' in params: self.startprob_.fill(1.0 / self.n_components) if 't' in params or 'm' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ if 't' in params: # TODO: estimate transitions from data (!) / consider n_tied=1 if self.n_tied == 0: transmat = np.ones([self.n_components, self.n_components]) np.fill_diagonal(transmat, 10.0) self.transmat_ = transmat # .90 for self-transition else: transmat = np.zeros((self.n_components, self.n_components)) transmat[range(self.n_components), range(self.n_components)] = 100.0 # diagonal transmat[range(self.n_components - 1), range(1, self.n_components)] = 1.0 # diagonal + 1 transmat[[ r * (self.n_chain) - 1 for r in range(1, self.n_unique + 1) for c in range(self.n_unique - 1) ], [ c * (self.n_chain) for r in range(self.n_unique) for c in range(self.n_unique) if c != r ]] = 1.0 self.transmat_ = np.copy(transmat) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): mu_init[u][f] = kmeans[u, f] self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros( (self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = np.linalg.inv( np.cov(X[kmmod.labels_ == u], bias=1)) else: precision_init[u] = np.linalg.inv( np.cov(np.transpose(X[kmmod.labels_ == u]))) self.precision_ = np.copy(precision_init)
def train(self): theta0 = self.get_default_theta() self.loss = np.inf self.theta = np.copy(theta0) nlz = self.neg_log_likelihood(theta0) def loss(theta): nlz = self.neg_log_likelihood(theta) return nlz def callback(theta): if self.nlz < self.loss: self.loss = self.nlz self.theta = np.copy(theta) gloss = value_and_grad(loss) try: fmin_l_bfgs_b(gloss, theta0, maxiter=self.bfgs_iter, m=100, iprint=self.debug, callback=callback) except np.linalg.LinAlgError: print('GP. Increase noise term and re-optimization.') theta0 = np.copy(self.theta) theta0[0] += np.log(10) try: fmin_l_bfgs_b(gloss, theta0, maxiter=self.bfgs_iter, m=10, iprint=self.debug, callback=callback) except: print('GP. Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) except: print('GP. Exception caught, L-BFGS early stopping...') if self.debug: print(traceback.format_exc()) sn2 = np.exp(self.theta[0]) hyp = self.theta[1:] K = self.kernel(self.train_x, self.train_x, hyp) + sn2 * np.eye( self.num_train) + self.jitter * np.eye(self.num_train) self.L = np.linalg.cholesky(K) self.alpha = chol_inv(self.L, self.train_y.T) if self.k: self.for_diag = np.exp(self.theta[1]) * np.exp( self.theta[3]) + np.exp(self.theta[3 + self.dim]) else: self.for_diag = np.exp(self.theta[1]) print('GP. Finished training process.')
def dlyap(A, Q): """ Solve the discrete-time Lyapunov equation. Wrapper around scipy.linalg.solve_discrete_lyapunov. Pass a copy of input matrices to protect them from modification. """ try: return solve_discrete_lyapunov(np.copy(A), np.copy(Q)) except ValueError: return np.full_like(Q, np.inf)
def sym_transfer_error(Xo, Xi, H): """Symetric transfer error Xo: Object points in 2D Homogeneous Coordinates (3xn) Xi: Image points in 2D Homogeneous Coordinates (3xn) """ Xo = np.copy(Xo) Xi = np.copy(Xi) H = np.copy(H) error1 = d(Xi, np.dot(H, Xo)) error2 = d(Xo, np.dot(np.linalg.inv(H), Xi)) return error1 + error2
def fit_new_py(x, model): x0 = np.copy(x).reshape(-1) best_x = np.copy(x) best_loss = np.inf tmp_loss = np.inf def loss(x0): nonlocal tmp_loss x0 = x0.reshape(model.dim, -1) py, ps2 = model.models[0].predict(x0) tmp_loss = py.sum() for i in range(1, model.outdim): py, ps2 = model.models[0].predict(x0) tmp_loss += np.maximum(0, py).sum() return tmp_loss def callback(x): nonlocal best_loss nonlocal best_x if tmp_loss < best_loss: best_loss = tmp_loss best_x = np.copy(x) gloss = value_and_grad(loss) try: fmin_l_bfgs_b(gloss, x0, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=100, iprint=model.debug, callback=callback) except np.linalg.LinAlgError: print('Fit_new_py. Increase noise term and re-optimization') x0 = np.copy(best_x).reshape(-1) x0[0] += 0.01 try: fmin_l_bfgs_b(gloss, x0, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=10, iprint=model.debug, callback=callback) except: print('Fit_new_py. Exception caught, L-BFGS early stopping...') print(traceback.format_exc()) except: print('Fit_new_py. Exception caught, L-BFGS early stopping...') print(traceback.format_exc()) return best_x
def fit_new_py(x, model): x0 = np.copy(x).reshape(-1) best_x = np.copy(x) best_loss = np.inf def loss(x0): nonlocal best_x nonlocal best_loss x0 = x0.reshape(model.dim, -1) py, ps2 = model.models[0].predict(x0) tmp_loss = py.sum() for i in range(1, model.outdim): py, ps2 = model.models[0].predict(x0) tmp_loss += np.maximum(0, py).sum() if tmp_loss < best_loss: best_loss = tmp_loss best_x = np.copy(x0) return tmp_loss gloss = grad(loss) try: fmin_l_bfgs_b(loss, x0, gloss, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=100, iprint=model.debug) except np.linalg.LinAlgError: print('Fit_new_py. Increase noise term and re-optimization') x0 = np.copy(best_x).reshape(-1) x0[0] += 0.01 try: fmin_l_bfgs_b(loss, x0, gloss, bounds=[[-0.5, 0.5]] * x.size, maxiter=2000, m=10, iprint=model.debug) except: print('Fit_new_py. Exception caught, L-BFGS early stopping...') print(traceback.format_exc()) except: print('Fit_new_py. Exception caught, L-BFGS early stopping...') print(traceback.format_exc()) if (np.isnan(best_loss) or np.isinf(best_loss)): print('Fit_new_py. Fail to build GP model') sys.exit(1) return best_x
def get_best_y(self, x, y): for i in range(y.shape[1]): constr = np.maximum(y[1:, i], 0).sum() if constr < self.best_constr and self.best_constr > 0: self.best_constr = constr self.best_y = np.copy(y[:, i]) self.best_x = np.copy(x[:, i]) elif constr <= 0 and self.best_constr <= 0 and y[ 0, i] < self.best_y[0]: self.best_constr = constr self.best_y = np.copy(y[:, i]) self.best_x = np.copy(x[:, i])
def geometric_distance(Xo,Xi,H): """ Xi point measured in the image Xo real value of the model point H an estimated homography as defined in Multiple View Geometry in Computer vision """ Xo = np.copy(Xo) Xi = np.copy(Xi) H = np.copy(H) Xio = np.dot(H,Xo) return np.sqrt((Xi[0]/Xi[2] - Xio[0]/Xio[2])**2+(Xi[1]/Xi[2] - Xio[1]/Xio[2])**2)
def _init_params(self, data, lengths=None, params='stmp'): X = data['obs'] if 's' in params: self.startprob_.fill(1.0 / self.n_components) if 't' in params or 'm' in params or 'p' in params: kmmod = cluster.KMeans(n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ if 't' in params: # TODO: estimate transitions from data (!) / consider n_tied=1 if self.n_tied == 0: transmat = np.ones([self.n_components, self.n_components]) np.fill_diagonal(transmat, 10.0) self.transmat_ = transmat # .90 for self-transition else: transmat = np.zeros((self.n_components, self.n_components)) transmat[range(self.n_components), range(self.n_components)] = 100.0 # diagonal transmat[range(self.n_components-1), range(1, self.n_components)] = 1.0 # diagonal + 1 transmat[[r * (self.n_chain) - 1 for r in range(1, self.n_unique+1) for c in range(self.n_unique-1)], [c * (self.n_chain) for r in range(self.n_unique) for c in range(self.n_unique) if c != r]] = 1.0 self.transmat_ = np.copy(transmat) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): mu_init[u][f] = kmeans[u, f] self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1)) else: precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u]))) self.precision_ = np.copy(precision_init)
def _set_startprob(self, startprob): if startprob is None: startprob = np.tile(1.0 / self.n_components, self.n_components) else: startprob = np.asarray(startprob, dtype=np.float) if not np.alltrue(startprob <= 1.0): normalize(startprob) if len(startprob) != self.n_components: if len(startprob) == self.n_unique: startprob_split = np.copy(startprob) / (1.0+self.n_tied) startprob = np.zeros(self.n_components) for u in range(self.n_unique): for t in range(self.n_chain): startprob[u*(self.n_chain)+t] = \ startprob_split[u].copy() else: raise ValueError("cannot match shape of startprob") if not np.allclose(np.sum(startprob), 1.0): raise ValueError('startprob must sum to 1.0') self._log_startprob = np.log(np.asarray(startprob).copy())
def _obj_grad(self, wrt, m, p, a, xn, xln, gn, entries='all', **kwargs): m = m.reshape(self.n_unique, self.n_features, 1) # tm if wrt == 'm': wrt_num = 0 elif wrt == 'p': wrt_num = 1 elif wrt == 'a': wrt_num = 2 else: raise ValueError('unknown parameter') res = grad(self._obj, wrt_num)(m, p, a, xn, xln, gn) if wrt == 'p' and self.n_features > 1: if entries == 'diag': res_new = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): res_new[u,f,f] = res[u,f,f] res = np.copy(res_new) elif entries == 'offdiag': for u in range(self.n_unique): for f in range(self.n_features): res[u,f,f] = 0. res = np.array([res]) return res
def hard_thr(x, lambdaPar, lower=None, upper=None): out = np.copy(x) out[np.abs(x) < lambdaPar] = 0.0 if (lower != None): out[out < lower] = 0.0 if (upper != None): out[out > upper] = 0.0 return out
def draw(linePoints, points): linePoints = np.copy(linePoints) points = np.copy(points) #assert False # not implemented canvasSize = (256, 256, 3) scale = 20 offset = (5, 5) linePoints += offset linePoints *= scale p1 = linePoints[0] p2 = linePoints[1] p1 = (int(p1[0]), int(p1[1])) p2 = (int(p2[0]), int(p2[1])) canvas = np.zeros(canvasSize, dtype=np.uint8) print p1, p2 #cv2.circle(canvas, p1, 2, (0, 255, 0), 2) #cv2.circle(canvas, p2, 2, (0, 255, 0), 2) cv2.line(canvas, p1, p2, (0, 255, 0)) cv2.imshow('', canvas) cv2.waitKey()
def _set_transmat_prior(self, transmat_prior_val): # new val needs be n_unique x n_unique # internally, n_components x n_components # _ntied_transmat_prior is # called to get n_components x n_components transmat_prior_new = np.zeros((self.n_components, self.n_components)) if transmat_prior_val is not None: if transmat_prior_val.shape == (self.n_unique, self.n_unique): transmat_prior_new = \ np.copy(self._ntied_transmat_prior(transmat_prior_val)) else: raise ValueError("cannot match shape of transmat_prior") self.transmat_prior = transmat_prior_new
def _set_startprob_prior(self, startprob_prior): if startprob_prior is None or startprob_prior == 1.0: startprob_prior = np.zeros(self.n_components) else: startprob_prior = np.asarray(startprob_prior, dtype=np.float) if len(startprob_prior) != self.n_components: if len(startprob_prior) == self.n_unique: startprob_prior_split = np.copy(startprob_prior) / \ (1.0 + self.n_tied) startprob_prior = np.zeros(self.n_components) for u in range(self.n_unique): for t in range(self.n_chain): startprob_prior[u*(self.n_chain)+t] = \ startprob_prior_split[u].copy() else: raise ValueError("cannot match shape of startprob") self.startprob_prior = np.asarray(startprob_prior).copy()
def _set_transmat(self, transmat_val): if transmat_val is None: transmat = np.tile(1.0 / self.n_components, (self.n_components, self.n_components)) else: transmat_val[np.isnan(transmat_val)] = 0.0 normalize(transmat_val, axis=1) if (np.asarray(transmat_val).shape == (self.n_components, self.n_components)): transmat = np.copy(transmat_val) elif transmat_val.shape[0] == self.n_unique: transmat = self._ntied_transmat(transmat_val) else: raise ValueError("cannot match shape of transmat") if not np.all(np.allclose(np.sum(transmat, axis=1), 1.0)): raise ValueError('Rows of transmat must sum to 1.0') self._log_transmat = np.log(np.asarray(transmat).copy()) underflow_idx = np.isnan(self._log_transmat) self._log_transmat[underflow_idx] = NEGINF
def _do_mstep_grad(self, gn, data): wrt = [str(p) for p in self.wrt if str(p) in self.params] for update_idx in range(self.n_iter_update): for p in wrt: if p in 'm': optim_x0 = self.mu_ newv = self._do_optim(p, optim_x0, gn, data) self.mu_ = newv elif p in 'a': optim_x0 = self.alpha_ newv = self._do_optim(p, optim_x0, gn, data) self.alpha_ = newv elif p == 'p': optim_x0 = self.precision_ # update just diagonal newv = self._do_optim(p, optim_x0, gn, data, entries='diag') template = np.copy(self.precision_) for u in range(self.n_unique): for f in range(self.n_features): template[u,f,f] = newv[u,f,f] self.precision_ = template optim_x0 = self.precision_ # update just off diagonal newv = self._do_optim(p, optim_x0, gn, data, entries='offdiag') for u in range(self.n_unique): for f in range(self.n_features): newv[u,f,f] = self.precision_[u,f,f] + 0. # ensure that precision matrix is symmetric for u in range(self.n_unique): newv[u,:,:] = (newv[u,:,:] + newv[u,:,:].T)/2.0 self.precision_ = newv
def finite_difference_second_order_(self, func, x): n_dim = len(x) func_x = func(x) hessian = np.zeros((n_dim, n_dim)) for i in range(n_dim): for j in range(n_dim): x_copy = np.copy(x) x_copy[i] += self.finite_diff_eps x_copy[j] += self.finite_diff_eps fpp = func(x_copy) x_copy = np.copy(x) x_copy[i] += self.finite_diff_eps fp_ = func(x_copy) x_copy = np.copy(x) x_copy[j] += self.finite_diff_eps f_p = func(x_copy) x_copy = np.copy(x) x_copy[i] -= self.finite_diff_eps fn_ = func(x_copy) x_copy = np.copy(x) x_copy[j] -= self.finite_diff_eps f_n = func(x_copy) x_copy = np.copy(x) x_copy[i] -= self.finite_diff_eps x_copy[j] -= self.finite_diff_eps fnn = func(x_copy) hessian[i, j] = fpp - fp_ - f_p - f_n - fn_ + fnn hessian = (hessian + 2*func_x) / (2*self.finite_diff_eps**2) return hessian
def build_ilqr_tracking_solver(self, ref_pnts, weight_mats): #figure out dimension self.T_ = len(ref_pnts) self.n_dims_ = len(ref_pnts[0]) self.ref_array = np.copy(ref_pnts) self.weight_array = [mat for mat in weight_mats] #clone weight mats if there are not enough weight mats for i in range(self.T_ - len(self.weight_array)): self.weight_array.append(self.weight_array[-1]) #build dynamics, second-order linear dynamical system self.A_ = np.eye(self.n_dims_*2) self.A_[0:self.n_dims_, self.n_dims_:] = np.eye(self.n_dims_) * self.dt_ self.B_ = np.zeros((self.n_dims_*2, self.n_dims_)) self.B_[self.n_dims_:, :] = np.eye(self.n_dims_) * self.dt_ self.plant_dyn_ = lambda x, u, t, aux: np.dot(self.A_, x) + np.dot(self.B_, u) #build cost functions, quadratic ones def tmp_cost_func(x, u, t, aux): err = x[0:self.n_dims_] - self.ref_array[t] #autograd does not allow A.dot(B) cost = np.dot(np.dot(err, self.weight_array[t]), err) + np.sum(u**2) * self.R_ if t > self.T_-1: #regularize velocity for the termination point #autograd does not allow self increment cost = cost + np.sum(x[self.n_dims_:]**2) * self.R_ * self.Q_vel_ratio_ return cost self.cost_ = tmp_cost_func self.ilqr_ = pylqr.PyLQR_iLQRSolver(T=self.T_-1, plant_dyn=self.plant_dyn_, cost=self.cost_, use_autograd=self.use_autograd) if not self.use_autograd: self.plant_dyn_dx_ = lambda x, u, t, aux: self.A_ self.plant_dyn_du_ = lambda x, u, t, aux: self.B_ def tmp_cost_func_dx(x, u, t, aux): err = x[0:self.n_dims_] - self.ref_array[t] grad = np.concatenate([2*err.dot(self.weight_array[t]), np.zeros(self.n_dims_)]) if t > self.T_-1: grad[self.n_dims_:] = grad[self.n_dims_:] + 2 * self.R_ * self.Q_vel_ratio_ * x[self.n_dims_, :] return grad self.cost_dx_ = tmp_cost_func_dx self.cost_du_ = lambda x, u, t, aux: 2 * self.R_ * u def tmp_cost_func_dxx(x, u, t, aux): hessian = np.zeros((2*self.n_dims_, 2*self.n_dims_)) hessian[0:self.n_dims_, 0:self.n_dims_] = 2 * self.weight_array[t] if t > self.T_-1: hessian[self.n_dims_:, self.n_dims_:] = 2 * np.eye(self.n_dims_) * self.R_ * self.Q_vel_ratio_ return hessian self.cost_dxx_ = tmp_cost_func_dxx self.cost_duu_ = lambda x, u, t, aux: 2 * self.R_ * np.eye(self.n_dims_) self.cost_dux_ = lambda x, u, t, aux: np.zeros((self.n_dims_, 2*self.n_dims_)) #build an iLQR solver based on given functions... self.ilqr_.plant_dyn_dx = self.plant_dyn_dx_ self.ilqr_.plant_dyn_du = self.plant_dyn_du_ self.ilqr_.cost_dx = self.cost_dx_ self.ilqr_.cost_du = self.cost_du_ self.ilqr_.cost_dxx = self.cost_dxx_ self.ilqr_.cost_duu = self.cost_duu_ self.ilqr_.cost_dux = self.cost_dux_ return
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: count = 0 for u in range(self.n_unique): for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[count].params[1:]) ar_resid.append(ar_mod[count].resid) count += 1 else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[f].params[1:]) ar_resid.append(ar_mod[f].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u,f] = kmeans[u, f] - np.dot( np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u])) else: precision_init[u] = np.linalg.inv\ (np.cov(np.transpose(X[kmmod.labels_ == u]))) # Alternative: Initialization using ar_resid #for f in range(self.n_features): # if not self.shared_alpha: # precision_init[u,f,f] = 1./np.var(ar_resid[count]) # count += 1 # else: # precision_init[u,f,f] = 1./np.var(ar_resid[f])''' self.precision_ = np.copy(precision_init) if 'a' in params: if self.shared_alpha: alpha_init = np.zeros((1, self.n_lags)) alpha_init = ar_alpha[0].reshape((1, self.n_lags)) else: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = 0 alpha_init[u] = ar_alpha[ar_idx] ar_idx += self.n_features self.alpha_ = np.copy(alpha_init)
def _accumulate_sufficient_statistics(self, stats, X, framelogprob, posteriors, fwdlattice, bwdlattice): """Updates sufficient statistics from a given sample. Parameters ---------- stats : dict Sufficient statistics as returned by :meth:`~base._BaseHMM._initialize_sufficient_statistics`. X : array, shape (n_samples, n_features) Sample sequence. framelogprob : array, shape (n_samples, n_components) Log-probabilities of each sample under each of the model states. posteriors : array, shape (n_samples, n_components) Posterior probabilities of each sample being generated by each of the model states. fwdlattice, bwdlattice : array, shape (n_samples, n_components) Log-forward and log-backward probabilities. """ # Based on hmmlearn's _BaseHMM safe_transmat = self.transmat_ + np.finfo(float).eps stats['nobs'] += 1 if 's' in self.params: stats['start'] += posteriors[0] if 't' in self.params: n_samples, n_components = framelogprob.shape # when the sample is of length 1, it contains no transitions # so there is no reason to update our trans. matrix estimate if n_samples <= 1: return lneta = np.zeros((n_samples - 1, n_components, n_components)) _hmmc._compute_lneta(n_samples, n_components, fwdlattice, np.log(safe_transmat), bwdlattice, framelogprob, lneta) stats['trans'] += np.exp(logsumexp(lneta, axis=0)) # stats['trans'] = np.round(stats['trans']) # if np.sum(stats['trans']) != X.shape[0]-1: # warnings.warn("transmat counts != n_samples", RuntimeWarning) # import pdb; pdb.set_trace() template = np.zeros((self.n_components, self.n_components)) for u in range(self.n_components): template[u,u] = stats['trans'][u,u] + 0. for l in range(self.n_components - 1): template[l, (l + 1)] = stats['trans'][l, (l + 1)] + 0. for b in range(self.n_unique): transition_index = \ [i * self.n_chain for i in range(self.n_unique)] transition_index.remove(b * self.n_chain) block = \ stats['trans'][self.n_chain * b : self.n_chain * (b + 1)][:] + 0. template_block = \ template[self.n_chain * b : self.n_chain * (b + 1)][:] + 0. for i in transition_index: template_block[(self.n_chain - 1), i] = \ block[(self.n_chain - 1), i] template[self.n_chain * b : self.n_chain * (b + 1)][:] = \ template_block stats['trans'] = np.copy(template)
def _do_mstep(self, stats, params): # M-Step for startprob and transmat if 's' in params: startprob_ = self.startprob_prior + stats['start'] normalize(startprob_) self.startprob_ = np.where(self.startprob_ <= np.finfo(float).eps, self.startprob_, startprob_) if 't' in params: if self.n_tied == 0: transmat_ = self.transmat_prior + stats['trans'] normalize(transmat_, axis=1) self.transmat_ = np.where(self.transmat_ <= np.finfo(float).eps, self.transmat_, transmat_) else: transmat_ = np.zeros((self.n_components, self.n_components)) transitionCnts = stats['trans'] + self.transmat_prior transition_index = [i * self.n_chain for i in range(self.n_unique)] for b in range(self.n_unique): block = \ transitionCnts[self.n_chain * b : self.n_chain * (b + 1)][:] + 0. denominator_diagonal = np.sum(block) diagonal = 0.0 index_line = range(0, self.n_chain) index_row = range(self.n_chain * b, self.n_chain * (b + 1)) for l, r in zip(index_line, index_row): diagonal += (block[l][r]) for l, r in zip(index_line, index_row): block[l][r] = diagonal / denominator_diagonal self_transition = block[0][self.n_chain * b] denominator_off_diagonal = \ (np.sum(block[self.n_chain-1])) - self_transition template = block[self.n_chain - 1] + 0. for entry in range(len(template)): template[entry] = (template[entry] * (1 - self_transition)) \ / float(denominator_off_diagonal) template[(self.n_chain * (b + 1)) - 1] = 0. line_value = 1 - self_transition for entry in range(len(template)): line_value = line_value - template[entry] for index in transition_index: if index != (b * self.n_chain): block[self.n_chain - 1][index] = \ line_value + template[index] line = range(self.n_chain - 1) row = [b * self.n_chain + i for i in range(1, self.n_chain)] for x, y in zip(line, row): block[x][y] = 1 - self_transition transmat_[self.n_chain * b : self.n_chain * (b + 1)][:] = block self.transmat_ = np.copy(transmat_)
def sample(self, n_samples=2000, observed_states=None, init_samples=None, init_state=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used init_state : int If provided, initial state is not sampled. init_samples : array, default: None If provided, initial samples (for AR) are not sampled. E : array-like, shape (n_samples, n_inputs) Feature matrix of individual inputs. Returns ------- samples : array_like, length (``n_samples``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros(n_samples) states = np.zeros(n_samples) order = self.n_lags if init_state is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) start_dist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) start_state = start_dist.rvs(size=1)[0] else: start_state = init_state if self.n_lags > 0: if init_samples is None: """ n_init_samples = order + 10 noise = np.sqrt(1.0/self._precision_[start_state]) * \ random_state.randn(n_init_samples) pad_after = n_init_samples - order - 1 col = np.pad(1*self._alpha_[start_state, :], (1, pad_after), mode='constant') row = np.zeros(n_init_samples) col[0] = row[0] = 1 A = toeplitz(col, row) init_samples = np.dot(pinv(A), noise + self._mu_[start_state]) # TODO: fix bug with n_lags > 1, blows up """ init_samples = 0.01*np.ones((self.n_lags, self.n_features)) # temporary fix if observed_states is None: transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) states[0] = (transmat_cdf[start_state] > random_state.rand()).argmax() transmat_pdf = np.exp(self._log_transmat) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states precision = np.copy(self._precision_) for idx in range(n_samples): state_ = int(states[idx]) var_ = np.sqrt(1/precision[state_]) if self.n_lags == 0: mean_ = np.copy(self._mu_[state_]) else: mean_ = np.copy(self._mu_[state_]) for lag in range(1, order+1): if idx < lag: prev_ = init_samples[len(init_samples)-lag] else: prev_ = samples[idx-lag] mean_ += np.copy(self._alpha_[state_, lag-1])*prev_ samples[idx] = norm.rvs(loc=mean_, scale=var_, size=1, random_state=random_state) states = self._process_sequence(states) return samples, states
def manual_grads(params): """ Compute the gradient of the loss WRT the parameters Ordering of the operations is reverse of that in fprop() """ deltas = {} for key, val in params.iteritems(): deltas[key] = np.zeros_like(val) [loss, mems, ps, ys, os, zos, hs, zhs, xs, rs, w_rs, w_ws, adds, erases, k_rs, k_ws, g_rs, g_ws, wc_rs, wc_ws, zbeta_rs, zbeta_ws, zs_rs, zs_ws, wg_rs, wg_ws] = self.stats dd = {} drs = {} dzh = {} dmem = {} # might not need this, since we have dmemtilde dmemtilde = {} du_r = {} du_w = {} dwg_r = {} dwg_w = {} for t in reversed(xrange(len(targets))): dy = np.copy(ps[t]) dy -= targets[t].T # backprop into y deltas['oy'] += np.dot(dy, os[t].T) deltas['by'] += dy if t < len(targets) - 1: # r[t] affects cost through zh[t+1] via Wrh drs[t] = np.dot(self.W['rh'].T, dzh[t + 1]) # right now, mems[t] influences cost through rs[t+1], via w_rs[t+1] dmem[t] = np.dot( w_rs[t + 1], drs[t + 1].reshape((self.M,1)).T ) # and also through mems at next step W = np.reshape(w_ws[t+1], (w_ws[t+1].shape[0], 1)) E = np.reshape(erases[t+1], (erases[t+1].shape[0], 1)) WTE = np.dot(W, E.T) KEEP = np.ones(mems[0].shape) - WTE dmem[t] += np.multiply(dmemtilde[t+1], KEEP) # and also through its influence on the content weighting next step dmem[t] += du_r[t+1] + du_w[t+1] dmemtilde[t] = dmem[t] # erases[t] affects cost through mems[t], via w_ws[t] derase = np.dot(np.multiply(dmemtilde[t], -mems[t-1]).T, w_ws[t]) # zerase affects just erases through a sigmoid dzerase = derase * (erases[t] * (1 - erases[t])) # adds[t] affects costs through mems[t], via w_ws dadd = np.dot(dmem[t].T, w_ws[t]) # zadds affects just adds through a tanh dzadd = dadd * (1 - adds[t] * adds[t]) # dbadds is just dzadds deltas['badds'] += dzadd deltas['oadds'] += np.dot(dzadd, os[t].T) deltas['berases'] += dzerase deltas['oerases'] += np.dot(dzerase, os[t].T) # # read weights affect what is read, via what's in mems[t-1] # dwc_r = np.dot(mems[t-1], drs[t]) # # write weights affect mem[t] through adding # dwc_w = np.dot(dmem[t], adds[t]) # # they also affect memtilde[t] through erasing # dwc_w += np.dot(np.multiply(dmemtilde[t], -mems[t-1]), erases[t]) dw_r = np.dot(mems[t-1], drs[t]) dw_r += dwg_r[t+1] * (1 - g_rs[t+1]) # write weights affect mem[t] through adding dw_w = np.dot(dmem[t], adds[t]) # they also affect memtilde[t] through erasing dw_w += np.dot(np.multiply(dmemtilde[t], -mems[t-1]), erases[t]) dw_w += dwg_w[t+1] * (1 - g_ws[t+1]) sgwr = np.zeros((self.N, self.N)) sgww = np.zeros((self.N, self.N)) for i in range(self.N): sgwr[i,i] = softmax(zs_rs[t])[0] sgwr[i,(i+1) % self.N] = softmax(zs_rs[t])[2] sgwr[i,(i-1) % self.N] = softmax(zs_rs[t])[1] sgww[i,i] = softmax(zs_ws[t])[0] sgww[i,(i+1) % self.N] = softmax(zs_ws[t])[2] sgww[i,(i-1) % self.N] = softmax(zs_ws[t])[1] # right now, shifted weights are final weight dws_r = dw_r dws_w = dw_w dwg_r[t] = np.dot(sgwr.T, dws_r) dwg_w[t] = np.dot(sgww.T, dws_w) dwc_r = dwg_r[t] * g_rs[t] dwc_w = dwg_w[t] * g_ws[t] """ We need dw/dK now w has N elts and K has N elts and we want, for every elt of W, the grad of that elt w.r.t. each of the N elts of K. that gives us N * N things """ # first, we must build up the K values (should be taken from fprop) K_rs = [] K_ws = [] for i in range(self.N): K_rs.append(cosine_sim(mems[t-1][i, :], k_rs[t])) K_ws.append(cosine_sim(mems[t-1][i, :], k_ws[t])) # then, we populate the grads dwdK_r = np.zeros((self.N, self.N)) dwdK_w = np.zeros((self.N, self.N)) # for every row in the memory for i in range(self.N): # for every element in the weighting for j in range(self.N): dwdK_r[i,j] += softmax_grads(K_rs, softplus(zbeta_rs[t]), i, j) dwdK_w[i,j] += softmax_grads(K_ws, softplus(zbeta_ws[t]), i, j) # compute dK for all i in N # K is the evaluated cosine similarity for the i-th row of mem matrix dK_r = np.zeros_like(w_rs[0]) dK_w = np.zeros_like(w_ws[0]) # for all i in N (for every row that we've simmed) for i in range(self.N): # for every j in N (for every elt of the weighting) for j in range(self.N): # specifically, dwdK_r will change, and for write as well dK_r[i] += dwc_r[j] * dwdK_r[i,j] dK_w[i] += dwc_w[j] * dwdK_w[i,j] """ dK_r_dk_rs is a list of N things each elt of the list corresponds to grads of K_idx w.r.t. the key k_t so it should be a length N list of M by 1 vectors """ dK_r_dk_rs = [] dK_r_dmem = [] for i in range(self.N): # let k_rs be u, Mem[i] be v u = np.reshape(k_rs[t], (self.M,)) v = mems[t-1][i, :] dK_r_dk_rs.append( dKdu(u,v) ) dK_r_dmem.append( dKdu(v,u)) dK_w_dk_ws = [] dK_w_dmem = [] for i in range(self.N): # let k_ws be u, Mem[i] be v u = np.reshape(k_ws[t], (self.M,)) v = mems[t-1][i, :] dK_w_dk_ws.append( dKdu(u,v) ) dK_w_dmem.append( dKdu(v,u)) # compute delta for keys dk_r = np.zeros_like(k_rs[0]) dk_w = np.zeros_like(k_ws[0]) # for every one of M elt of dk_r for i in range(self.M): # for every one of the N Ks for j in range(self.N): # add delta K_r[j] * dK_r[j] / dk_r[i] # add influence on through K_r[j] dk_r[i] += dK_r[j] * dK_r_dk_rs[j][i] dk_w[i] += dK_w[j] * dK_w_dk_ws[j][i] # these represent influence of mem on next K """ Let's let du_r[t] represent the influence of mems[t-1] on the cost through the K values this is analogous to dk_w, but, k only every affects that whereas mems[t-1] will also affect what is read at time t+1 and through memtilde at time t+1 """ du_r[t] = np.zeros_like(mems[0]) du_w[t] = np.zeros_like(mems[0]) # for every row in mems[t-1] for i in range(self.N): # for every elt of this row (one of M) for j in range(self.M): du_r[t][i,j] = dK_r[i] * dK_r_dmem[i][j] du_w[t][i,j] = dK_w[i] * dK_w_dmem[i][j] # key values are activated as tanh dzk_r = dk_r * (1 - k_rs[t] * k_rs[t]) dzk_w = dk_w * (1 - k_ws[t] * k_ws[t]) deltas['ok_r'] += np.dot(dzk_r, os[t].T) deltas['ok_w'] += np.dot(dzk_w, os[t].T) deltas['bk_r'] += dzk_r deltas['bk_w'] += dzk_w dg_r = np.dot(dwg_r[t].T, (wc_rs[t] - w_rs[t-1]) ) dg_w = np.dot(dwg_w[t].T, (wc_ws[t] - w_ws[t-1]) ) # compute dzg_r, dzg_w dzg_r = dg_r * (g_rs[t] * (1 - g_rs[t])) dzg_w = dg_w * (g_ws[t] * (1 - g_ws[t])) deltas['og_r'] += np.dot(dzg_r, os[t].T) deltas['og_w'] += np.dot(dzg_w, os[t].T) deltas['bg_r'] += dzg_r deltas['bg_w'] += dzg_w # compute dbeta, which affects w_content through interaction with Ks dwcdbeta_r = np.zeros_like(w_rs[0]) dwcdbeta_w = np.zeros_like(w_ws[0]) for i in range(self.N): dwcdbeta_r[i] = beta_grads(K_rs, softplus(zbeta_rs[t]), i) dwcdbeta_w[i] = beta_grads(K_ws, softplus(zbeta_ws[t]), i) dbeta_r = np.zeros_like(zbeta_rs[0]) dbeta_w = np.zeros_like(zbeta_ws[0]) for i in range(self.N): dbeta_r[0] += dwc_r[i] * dwcdbeta_r[i] dbeta_w[0] += dwc_w[i] * dwcdbeta_w[i] # beta is activated from zbeta by softplus, grad of which is sigmoid dzbeta_r = dbeta_r * sigmoid(zbeta_rs[t]) dzbeta_w = dbeta_w * sigmoid(zbeta_ws[t]) deltas['obeta_r'] += np.dot(dzbeta_r, os[t].T) deltas['obeta_w'] += np.dot(dzbeta_w, os[t].T) deltas['bbeta_r'] += dzbeta_r deltas['bbeta_w'] += dzbeta_w sgsr = np.zeros((self.N, 3)) sgsw = np.zeros((self.N, 3)) for i in range(self.N): sgsr[i,1] = wg_rs[t][(i - 1) % self.N] sgsr[i,0] = wg_rs[t][i] sgsr[i,2] = wg_rs[t][(i + 1) % self.N] sgsw[i,1] = wg_ws[t][(i - 1) % self.N] sgsw[i,0] = wg_ws[t][i] sgsw[i,2] = wg_ws[t][(i + 1) % self.N] ds_r = np.dot(sgsr.T, dws_r) ds_w = np.dot(sgsw.T, dws_w) shift_act_jac_r = np.zeros((3,3)) shift_act_jac_w = np.zeros((3,3)) bf = np.array([[1.0]]) for i in range(3): for j in range(3): shift_act_jac_r[i,j] = softmax_grads(zs_rs[t], bf, i, j) shift_act_jac_w[i,j] = softmax_grads(zs_ws[t], bf, i, j) dzs_r = np.dot(shift_act_jac_r.T, ds_r) dzs_w = np.dot(shift_act_jac_w.T, ds_w) deltas['os_r'] += np.dot(dzs_r, os[t].T) deltas['os_w'] += np.dot(dzs_w, os[t].T) deltas['bs_r'] += dzs_r deltas['bs_w'] += dzs_w else: drs[t] = np.zeros_like(rs[0]) dmemtilde[t] = np.zeros_like(mems[0]) du_r[t] = np.zeros_like(mems[0]) du_w[t] = np.zeros_like(mems[0]) dwg_r[t] = np.zeros_like(w_rs[0]) dwg_w[t] = np.zeros_like(w_ws[0]) # o affects y through Woy do = np.dot(params['oy'].T, dy) if t < len(targets) - 1: # and also zadd through Woadds do += np.dot(params['oadds'].T, dzadd) do += np.dot(params['oerases'].T, dzerase) # and also through the keys do += np.dot(params['ok_r'].T, dzk_r) do += np.dot(params['ok_w'].T, dzk_w) # and also through the interpolators do += np.dot(params['og_r'].T, dzg_r) do += np.dot(params['og_w'].T, dzg_w) # and also through beta do += np.dot(params['obeta_r'].T, dzbeta_r) do += np.dot(params['obeta_w'].T, dzbeta_w) # and also through the shift values do += np.dot(params['os_r'].T, dzs_r) do += np.dot(params['os_w'].T, dzs_w) # compute deriv w.r.t. pre-activation of o dzo = do * (1 - os[t] * os[t]) deltas['ho'] += np.dot(dzo, hs[t].T) deltas['bo'] += dzo # compute hidden dh dh = np.dot(params['ho'].T, dzo) # compute deriv w.r.t. pre-activation of h dzh[t] = dh * (1 - hs[t] * hs[t]) deltas['xh'] += np.dot(dzh[t], xs[t].T) deltas['bh'] += dzh[t] # Wrh affects zh via rs[t-1] deltas['rh'] += np.dot(dzh[t], rs[t-1].reshape((self.M, 1)).T) return deltas
def sample(self, n_samples=2000, observed_states=None, init_samples=None, init_state=None, random_state=None): """Generate random samples from the self. Parameters ---------- n : int Number of samples to generate. observed_states : array If provided, states are not sampled. random_state: RandomState or an int seed A random number generator instance. If None is given, the object's random_state is used init_state : int If provided, initial state is not sampled. init_samples : array, default: None If provided, initial samples (for AR) are not sampled. E : array-like, shape (n_samples, n_inputs) Feature matrix of individual inputs. Returns ------- samples : array_like, length (``n_samples``, ``n_features``) List of samples states : array_like, shape (``n_samples``) List of hidden states (accounting for tied states by giving them the same index) """ if random_state is None: random_state = self.random_state random_state = check_random_state(random_state) samples = np.zeros((n_samples, self.n_features)) states = np.zeros(n_samples) order = self.n_lags if init_state is None: startprob_pdf = np.exp(np.copy(self._log_startprob)) start_dist = stats.rv_discrete(name='custm', values=(np.arange(startprob_pdf.shape[0]), startprob_pdf), seed=random_state) start_state = start_dist.rvs(size=1)[0] else: start_state = init_state if self.n_lags > 0: if init_samples is None: init_samples = 0.01*np.ones((self.n_lags, self.n_features)) # TODO: better init if observed_states is None: transmat_pdf = np.exp(np.copy(self._log_transmat)) transmat_cdf = np.cumsum(transmat_pdf, 1) states[0] = (transmat_cdf[start_state] > random_state.rand()).argmax() transmat_pdf = np.exp(self._log_transmat) transmat_cdf = np.cumsum(transmat_pdf, 1) nrand = random_state.rand(n_samples) for idx in range(1,n_samples): newstate = (transmat_cdf[states[idx-1]] > nrand[idx-1]).argmax() states[idx] = newstate else: states = observed_states precision = np.copy(self._precision_) for idx in range(n_samples): state_ = int(states[idx]) covar_ = np.linalg.inv(precision[state_]) if self.n_lags == 0: mean_ = np.copy(self._mu_[state_]) else: mean_ = np.copy(self._mu_[state_]) for lag in range(1, order+1): if idx < lag: prev_ = init_samples[len(init_samples)-lag] else: prev_ = samples[idx-lag] mean_ += np.copy(self._alpha_[state_, lag-1])*prev_ samples[idx] = self.multivariate_t_rvs(mean_, covar_, random_state) states = self._process_sequence(states) return samples, states
def reporter(p): """Reporter function to capture intermediate states of optimization.""" global ps #ps.append(p) ps.append(np.copy(p))
def ilqr_iterate(self, x0, u_init, n_itrs=50, tol=1e-6, verbose=True): #initialize the regularization term self.reg = 1 #derive the initial guess trajectory from the initial guess of u x_array = self.forward_propagation(x0, u_init) u_array = np.copy(u_init) #initialize current trajectory cost J_opt = self.evaluate_trajectory_cost(x_array, u_init) J_hist = [J_opt] #iterates... converged = False for i in range(n_itrs): k_array, K_array = self.back_propagation(x_array, u_array) norm_k = np.mean(np.linalg.norm(k_array, axis=1)) #apply the control to update the trajectory by trying different alpha accept = False for alpha in self.alpha_array: x_array_new, u_array_new = self.apply_control(x_array, u_array, k_array, K_array, alpha) #evaluate the cost of this trial J_new = self.evaluate_trajectory_cost(x_array_new, u_array_new) if J_new < J_opt: #see if it is converged if np.abs((J_opt - J_new )/J_opt) < tol: #replacement for the next iteration J_opt = J_new x_array = x_array_new u_array = u_array_new converged = True break else: #replacement for the next iteration J_opt = J_new x_array = x_array_new u_array = u_array_new #successful step, decrease the regularization term #momentum like adaptive regularization self.reg = np.max([self.reg_min, self.reg / self.reg_factor]) accept = True print 'Iteration {0}:\tJ = {1};\tnorm_k = {2};\treg = {3}'.format(i+1, J_opt, norm_k, np.log10(self.reg)) break else: #don't accept this accept = False J_hist.append(J_opt) #exit if converged... if converged: print 'Converged at iteration {0}; J = {1}; reg = {2}'.format(i+1, J_opt, self.reg) break #see if all the trials are rejected if not accept: #need to increase regularization #check if the regularization term is too large if self.reg > self.reg_max: print 'Exceeds regularization limit at iteration {0}; terminate the iterations'.format(i+1) break self.reg = self.reg * self.reg_factor if verbose: print 'Reject the control perturbation. Increase the regularization term.' #prepare result dictionary res_dict = { 'J_hist':np.array(J_hist), 'x_array_opt':np.array(x_array), 'u_array_opt':np.array(u_array), 'k_array_opt':np.array(k_array), 'K_array_opt':np.array(K_array) } return res_dict
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: for u in range(self.n_unique): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u]).fit(self.n_lags)) ar_alpha.append(ar_mod[u].params[1:]) ar_resid.append(ar_mod[u].resid) else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf]).fit(self.n_lags)) ar_alpha.append(ar_mod[0].params[1:]) ar_resid.append(ar_mod[0].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u] = kmeans[u, 0] - np.dot( np.repeat(kmeans[u, 0], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): if not self.shared_alpha: maxVar = np.max([np.var(ar_resid[i]) for i in range(self.n_unique)]) else: maxVar = np.var(ar_resid[0]) precision_init[u] = 1.0 / maxVar self.precision_ = np.copy(precision_init) if 'a' in params: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 alpha_init[u, :] = ar_alpha[ar_idx] self.alpha_ = alpha_init