def sample(self, T, input=None, tag=None): K, D = self.K, self.D input = np.zeros((T, self.M)) if input is None else input mask = np.ones((T, D), dtype=bool) # Initialize outputs z = np.zeros(T, dtype=int) x = np.zeros((T, D)) # Sample discrete and continuous latent states pi0 = np.exp( self.init_state_distn.log_initial_state_distn(x, input, mask, tag)) z[0] = npr.choice(self.K, p=pi0) x[0] = self.dynamics.sample_x(z[0], x[:0], tag=tag) for t in range(1, T): Pt = np.exp( self.transitions.log_transition_matrices(x[t - 1:t + 1], input[t - 1:t + 1], mask=mask[t - 1:t + 1], tag=tag))[0] z[t] = npr.choice(self.K, p=Pt[z[t - 1]]) x[t] = self.dynamics.sample_x(z[t], x[:t], input=input[t], tag=tag) # Sample observations given latent states y = self.emissions.sample_y(z, x, input=input, tag=tag) return z, x, y
def sample(self, T, prefix=None, input=None, tag=None, with_noise=True): K, D = self.K, self.D # If prefix is given, pad the output with it if prefix is None: pad = 1 z = np.zeros(T+1, dtype=int) data = np.zeros((T+1, D)) input = np.zeros((T+1, self.M)) if input is None else input mask = np.ones((T+1, D), dtype=bool) # Sample the first state from the initial distribution pi0 = np.exp(self.init_state_distn.log_initial_state_distn(data, input, mask, tag)) z[0] = npr.choice(self.K, p=pi0) data[0] = self.observations.sample_x(z[0], data[:0], with_noise=with_noise) else: zhist, xhist = prefix pad = len(zhist) assert zhist.dtype == int and zhist.min() >= 0 and zhist.max() < K assert xhist.shape == (pad, D) z = np.concatenate((zhist, np.zeros(T, dtype=int))) data = np.concatenate((xhist, np.zeros((T, D)))) input = np.zeros((T+pad, self.M)) if input is None else input mask = np.ones((T+pad, D), dtype=bool) # Fill in the rest of the data for t in range(pad, pad+T): Pt = np.exp(self.transitions.log_transition_matrices(data[t-1:t+1], input[t-1:t+1], mask=mask[t-1:t+1], tag=tag))[0] z[t] = npr.choice(self.K, p=Pt[z[t-1]]) data[t] = self.observations.sample_x(z[t], data[:t], input=input[t], tag=tag, with_noise=with_noise) return z[pad:], data[pad:]
def sample(self, T, input=None, tag=None, prefix=None, with_noise=True): K = self.K D = (self.D, ) if isinstance(self.D, int) else self.D M = (self.M, ) if isinstance(self.M, int) else self.M assert isinstance(D, tuple) assert isinstance(M, tuple) # If prefix is given, pad the output with it if prefix is None: pad = 1 z = np.zeros(T + 1, dtype=int) x = np.zeros((T + 1, ) + D) data = np.zeros((T + 1, ) + D) input = np.zeros((T + 1, ) + M) if input is None else input xmask = np.ones((T + 1, ) + D, dtype=bool) # Sample the first state from the initial distribution pi0 = np.exp( self.init_state_distn.log_initial_state_distn( data, input, xmask, tag)) z[0] = npr.choice(self.K, p=pi0) x[0] = self.dynamics.sample_x(z[0], x[:0], tag=tag, with_noise=with_noise) else: zhist, xhist, yhist = prefix pad = len(zhist) assert zhist.dtype == int and zhist.min() >= 0 and zhist.max() < K assert xhist.shape == (pad, D) assert yhist.shape == (pad, N) z = np.concatenate((zhist, np.zeros(T, dtype=int))) x = np.concatenate((xhist, np.zeros((T, ) + D))) input = np.zeros((T + pad, ) + M) if input is None else input xmask = np.ones((T + pad, ) + D, dtype=bool) # Sample z and x for t in range(pad, T + pad): Pt = np.exp( self.transitions.log_transition_matrices(x[t - 1:t + 1], input[t - 1:t + 1], mask=xmask[t - 1:t + 1], tag=tag))[0] z[t] = npr.choice(self.K, p=Pt[z[t - 1]]) x[t] = self.dynamics.sample_x(z[t], x[:t], input=input[t], tag=tag, with_noise=with_noise) # Sample observations given latent states # TODO: sample in the loop above? y = self.emissions.sample(z, x, input=input, tag=tag) return z[pad:], x[pad:], y[pad:]
def action_selection_mlp(params, model, X, actions, epsilon): X = np.atleast_2d(X) n, _ = X.shape nactions = len(actions) R = np.empty((nactions, n)) for ai, a in enumerate(actions): X[:, :nactions] = a R[ai] = model.forward(params, X).reshape(-1) print() print('rewards (action_selection):', R.mean(axis=1)) A = np.argmax(R, axis=0) A_ = rnd.choice(2, size=n) G = rnd.choice([False, True], p=[epsilon, 1-epsilon], size=n) return np.where(G, A, A_)
def load_csv_test_split(filename, test_size, rand_seed, input_name, target_name, conditions): data = ([], []) with open(filename) as file: reader = csv.DictReader(file) reader.next() for row in reader: add_row = True for key in conditions.keys(): if row[key] != conditions[key]: add_row = False if add_row: data[0].append(row[input_name]) data[1].append(float(row[target_name])) data = np.array(data) rand.seed(rand_seed) sequence = rand.choice(data[0].size, data[0].size, replace=False) testset = (data[0][sequence[:int(data[0].size * test_size)]], data[1][sequence[:int(data[0].size * test_size)]]) trainset = (data[0][sequence[int(data[0].size * test_size):]], data[1][sequence[int(data[0].size * test_size):]]) rand.seed() print 'Loaded', trainset[0].size, 'training points;', testset[ 0].size, 'test points.' return trainset, testset
def kernel_regression(train_data, kernel_pairs, ltrewards): obs_data = train_data['obs_set'] dim_obs = obs_data.shape[1] data_count = obs_data.shape[0] k = 4 pred_obs = np.zeros((data_count, dim_obs)) pred_ltreward = np.zeros(data_count) for i in range(data_count): pairs_vec = kernel_pairs[i, :] NN = np.argsort(pairs_vec)[0:k] kernel_sum = np.sum(pairs_vec[NN]) if (kernel_sum != 0): pred_ltreward[i] = np.sum( pairs_vec[0:k] * ltrewards[NN]) / kernel_sum else: pred_ltreward[i] = 0 obs_ind = npr.choice(NN, 1)[0] pred_obs[i, :] = get_next_observation(obs_ind, train_data['dataindex_set'][i], train_data) loss = calculate_loss(pred_ltreward, ltrewards) history_lengths = get_history_length(train_data) return pred_obs, history_lengths
def initialize(self, x, u, **kwargs): localize = kwargs.get('localize', True) Ts = [_x.shape[0] for _x in x] if localize: from sklearn.cluster import KMeans km = KMeans(self.nb_states, random_state=1) km.fit((np.vstack(x))) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) zs = [z[:-1] for z in zs] else: zs = [npr.choice(self.nb_states, size=T - 1) for T in Ts] _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs)) for k in range(self.nb_states): ts = [np.where(z == k)[0] for z in zs] xs = [ np.hstack((_x[t, :], _u[t, :])) for t, _x, _u in zip(ts, x, u) ] ys = [_x[t + 1, :] for t, _x in zip(ts, x)] coef_, intercept_, sigma = linear_regression(xs, ys) self.A[k, ...] = coef_[:, :self.dm_obs] self.B[k, ...] = coef_[:, self.dm_obs:] self.c[k, :] = intercept_ _cov[k, ...] = sigma self.cov = _cov
def initialize(self, datas, inputs=None, masks=None, tags=None): # Initialize with linear regressions from sklearn.linear_model import LinearRegression data = np.concatenate(datas) input = np.concatenate(inputs) T = data.shape[0] for k in range(self.K): for d in range(self.D): ts = npr.choice(T - self.lags, replace=False, size=(T - self.lags) // self.K) x = np.column_stack( [data[ts + l, d:d + 1] for l in range(self.lags)] + [input[ts, :self.M]]) y = data[ts + self.lags, d:d + 1] lr = LinearRegression().fit(x, y) self.As[k, d] = lr.coef_[:, :self.lags] self.Vs[k, d] = lr.coef_[:, self.lags:self.lags + self.M] self.bs[k, d] = lr.intercept_ resid = y - lr.predict(x) sigmas = np.var(resid, axis=0) self.inv_sigmas[k, d] = np.log(sigmas + 1e-16)
def action_selection_bnn(params, model, X, actions, nsamples): X = np.atleast_2d(X) n, _ = X.shape nactions = len(actions) R = np.empty((nactions, nsamples, n)) for ai, a in enumerate(actions): X[:, :nactions] = a for si in range(nsamples): R[ai, si] = model.forward(params, X).reshape(-1) print() print('rewards (action_selection):', R.mean(axis=(1, 2))) A = R.mean(axis=1).argmax(axis=0) A_ = rnd.choice(2, size=n) G = rnd.choice([False, True], size=n) return np.where(G, A, A_)
def initialize(self, datas, inputs=None, masks=None, tags=None, init_method="random"): Ts = [data.shape[0] for data in datas] # Get initial discrete states if init_method.lower() == 'kmeans': # KMeans clustering from sklearn.cluster import KMeans km = KMeans(self.K) km.fit(np.vstack(datas)) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) elif init_method.lower() == 'random': # Random assignment zs = [npr.choice(self.K, size=T) for T in Ts] else: raise Exception( 'Not an accepted initialization type: {}'.format(init_method)) # Make a one-hot encoding of z and treat it as HMM expectations Ezs = [one_hot(z, self.K) for z in zs] expectations = [(Ez, None, None) for Ez in Ezs] # Set the variances all at once to use the setter self.m_step(expectations, datas, inputs, masks, tags)
def rand_unif_sample(self, n): """Returns a random uniform sample of n experiences. Arguments: n -- number of transitions to sample """ indices = npr.choice(range(self.size), replace=False, size=n) exp_batch = np.array(self.exp_buffer)[indices] return np.reshape(exp_batch, (n, -1))
def main(argv): del argv n_clusters = FLAGS.num_clusters n_dimensions = FLAGS.num_dimensions n_observations = FLAGS.num_observations alpha = 3.3 * np.ones(n_clusters) a = 1. b = 1. kappa = 0.1 npr.seed(10001) # generate true latents and data pi = npr.gamma(alpha) pi /= pi.sum() mu = npr.normal(0, 1.5, [n_clusters, n_dimensions]) z = npr.choice(np.arange(n_clusters), size=n_observations, p=pi) x = npr.normal(mu[z, :], 0.5**2) # points used for initialization pi_est = np.ones(n_clusters) / n_clusters z_est = npr.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = npr.normal(0., 0.01, [n_clusters, n_dimensions]) tau_est = 1. init_vals = pi_est, z_est, mu_est, tau_est # instantiate the model log joint log_joint = make_log_joint(x, alpha, a, b, kappa) # run mean field on variational mean parameters def callback(meanparams): fig = plot(meanparams, x) plt.savefig('/tmp/gmm_{:04d}.png'.format(itr)) plt.close(fig.number) start = time.time() cavi(log_joint, init_vals, (SIMPLEX, INTEGER, REAL, NONNEGATIVE), FLAGS.num_iterations, callback=lambda *args: None) runtime = time.time() - start print("CAVI Runtime (s): ", runtime)
def sample(self, total_steps=0): """Sample from the experience buffer by rank prioritization if specified. Otherwise sampling is done uniformly. Keyword arguments: total_steps -- number of steps taken in experiment (default: 0) """ N = self.size num_samples = np.min( (self.batch_size * self.num_strata_samples, self.size)) # Perform uniform sampling of experience buffer if not self.mem_priority: indices = npr.choice(range(N), replace=False, size=num_samples) exp_batch = np.array(self.exp_buffer)[indices] weights = np.ones(len(indices)) / (len(indices) * 1.0) return np.reshape(exp_batch, (num_samples, -1)), weights, indices # Perform prioritized sampling of experience buffer else: # Find the closest precomptued distribution by size dist_idx = math.floor(N / float(self.capacity) * self.num_partitions) distribution = self.distributions[int(dist_idx)] N = dist_idx * 100 rank_indices_set = set() # Perform stratified sampling of priority queue for i_exp in range(num_samples)[::-1]: # To increase the training batch size we sample several times from each strata, repeated indices are eliminated rank_indices_set.add( npr.randint( distribution['strata_ends'][i_exp / self.num_strata_samples], distribution['strata_ends'][(i_exp / self.num_strata_samples) + 1])) rank_indices = list(rank_indices_set) exp_indices = self.pq.get_values_by_val(rank_indices) exp_batch = [ self.exp_buffer[int(exp_idx)] for exp_idx in exp_indices ] # Compute importance sampling weights beta = np.min([ self.beta_zero + (total_steps - self.num_init_train - 1) * self.beta_grad, 1 ]) IS_weights = np.power(N * distribution['pdf'][rank_indices], -1 * beta) # Normalize IS_weights by maximum weight, guarantees that IS weights only scale downwards w_max = np.max(IS_weights) IS_weights = IS_weights / float(w_max) return np.reshape(exp_batch, (len(exp_indices), -1)), IS_weights, exp_indices
def val_split(data, val_fraction, seed=np.array([])): if seed.any(): npr.seed(seed) sequence = npr.choice(data[0].size, data[0].size, replace=False) val_lim = int(val_fraction * data[0].size) val_inputs = data[0][sequence[:val_lim]] val_targets = data[1][sequence[:val_lim]].astype('double') train_inputs = data[0][sequence[val_lim:]] train_targets = data[1][sequence[val_lim:]].astype('double') if seed.any(): npr.seed() return train_inputs, train_targets, val_inputs, val_targets
def run_experiment(train_inputs, train_targets, val_inputs, val_targets, model_params, train_params, vanilla_net_params, filename=''): val_size = 1000 conv_layer_sizes = [model_params['conv_width']] * model_params['fp_depth'] conv_arch_params = { 'num_hidden_features': conv_layer_sizes, 'fp_length': model_params['fp_length'], 'normalize': 1 } loss_fun, pred_fun, conv_parser = \ build_conv_deep_net(conv_arch_params, vanilla_net_params, model_params['L2_reg']) num_weights = len(conv_parser) predict_func, trained_weights, conv_training_curve = \ train_nn(pred_fun, loss_fun, num_weights, train_inputs, train_targets, train_params, validation_aa=val_inputs, validation_raw_targets=val_targets) if filename != '': with open(filename + '.pkl', 'w') as f: pickle.dump(trained_weights, f) train_selection = npr.choice(train_inputs.size, val_size) train_predictions = predict_func(train_inputs[train_selection]) val_selection = npr.choice(val_inputs.size, val_size) val_predictions = predict_func(val_inputs[val_selection]) plot_training(conv_training_curve) return predict_func,\ pearsonr(train_predictions, train_targets[train_selection])[0],\ pearsonr(val_predictions, val_targets[val_selection])[0]
def forcast(self, hist_obs=None, hist_act=None, nxt_act=None, horizon=None, stoch=True, infer='viterbi'): if self.learn_ctl: nxt_state = [] nxt_obs = [] nxt_act = [] for n in range(len(horizon)): _hist_obs = hist_obs[n] _hist_act = hist_act[n] _nxt_act = np.zeros((horizon[n] + 1, self.dm_act)) _nxt_obs = np.zeros((horizon[n] + 1, self.dm_obs)) _nxt_state = np.zeros((horizon[n] + 1, ), np.int64) if infer == 'viterbi': _, _state_seq = self.viterbi(_hist_obs, _hist_act) _state = _state_seq[0][-1] else: _belief = self.filter(_hist_obs, _hist_act) _state = npr.choice(self.nb_states, p=_belief[0][-1, ...]) _nxt_state[0] = _state _nxt_obs[0, :] = _hist_obs[-1, ...] _nxt_act[0, :] = _hist_act[-1, ...] for t in range(horizon[n]): _nxt_state[t + 1] = self.transitions.sample( _nxt_state[t], _nxt_obs[t, :], _nxt_act[t, :]) _nxt_obs[t + 1, :] = self.observations.sample( _nxt_state[t + 1], _nxt_obs[t, :], _nxt_act[t, :], stoch=stoch) _nxt_act[t + 1, :] = self.controls.sample( _nxt_state[t + 1], _nxt_obs[t + 1, :], stoch=stoch) nxt_state.append(_nxt_state) nxt_obs.append(_nxt_obs) nxt_act.append(_nxt_act) return nxt_state, nxt_obs, nxt_act else: return super(erARHMM, self).forcast(hist_obs, hist_act, nxt_act, horizon, stoch, infer)
def callback(weights, iter): if iter % 10 == 0: print "max of weights", np.max(np.abs(weights)) selection = npr.choice(train_aa.size, size=num_print_examples) train_preds = undo_norm(pred_fun(weights, train_aa[selection])) cur_loss = loss_fun(weights, train_aa[selection], train_targets[selection]) # train_preds = undo_norm(pred_fun(weights, train_aa[:num_print_examples])) # cur_loss = loss_fun(weights, train_aa[:num_print_examples], train_targets[:num_print_examples]) training_curve[0].append(cur_loss) train_RMSE = rmse(train_preds, train_raw_targets[selection]) # train_RMSE = rmse(train_preds, train_raw_targets[:num_print_examples]) training_curve[1].append(train_RMSE) print "Iteration", iter, "loss", cur_loss,\ "train RMSE", train_RMSE, if validation_aa is not None: selection = npr.choice(validation_aa.size, size=num_print_examples) validation_preds = undo_norm( pred_fun(weights, validation_aa[selection])) val_RMSE = rmse(validation_preds, validation_raw_targets[selection]) training_curve[2].append(val_RMSE) print "Validation RMSE", iter, ":", val_RMSE,
def step(self, hist_obs=None, hist_act=None, stoch=True, infer='viterbi'): if infer == 'viterbi': _, _state_seq = self.viterbi(hist_obs, hist_act) _state = _state_seq[0][-1] else: _belief = self.filter(hist_obs, hist_act) _state = npr.choice(self.nb_states, p=_belief[0][-1, ...]) _act = hist_act[-1, :] _obs = hist_obs[-1, :] nxt_state = self.transitions.sample(_state, _obs, _act) nxt_obs = self.observations.sample(nxt_state, _obs, _act, stoch=stoch) return nxt_state, nxt_obs
def initialize(self, x, u, **kwargs): localize = kwargs.get('localize', False) Ts = [_x.shape[0] for _x in x] if localize: from sklearn.cluster import KMeans km = KMeans(self.nb_states, random_state=1) km.fit((np.vstack(x))) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) zs = [z[:-1] for z in zs] else: zs = [npr.choice(self.nb_states, size=T - 1) for T in Ts] _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs)) for k in range(self.nb_states): ## Select the transformation si = int(self.rot_lds[k, 0]) sj = int(self.rot_lds[k, 1]) T = self.T[sj, ...] ts = [np.where(z == k)[0] for z in zs] xs = [] ys = [] for i in range(len(ts)): _x = x[i][ts[i], :] _x = np.dot(T, _x.T).T _y = x[i][ts[i] + 1, :] _y = np.dot(T, _y.T).T xs.append(_x) ys.append(_y) ## THIS SHOULD NOT BE LIKE THIS , DUE TO IF SEVERAL TRANSFORMATIONS NOT WORK coef_, intercept_, sigma = linear_regression(xs, ys) self.A[si, ...] = coef_[:, :self.dm_obs] #self.B[k, ...] = coef_[:, self.dm_obs:] self.c[si, :] = intercept_ _cov[si, ...] = sigma self.cov = _cov self.covt = np.zeros([self.nb_states, self.dm_obs, self.dm_obs]) for k in range(self.nb_states): i = int(self.rot_lds[k, 0]) j = int(self.rot_lds[k, 1]) T_inv = self.T_inv[j, ...] self.covt[k, ...] = np.dot(T_inv, self.cov[i, ...])
def initialize(self, datas, inputs=None, masks=None, tags=None): # Initialize with linear regressions Ts = [data.shape[0] for data in datas] for k in range(self.K): ts = [ npr.choice(T - self.lags, replace=False, size=(T - self.lags) // self.K) for T in Ts ] Xs = [ np.column_stack([data[t + l] for l in range(self.lags)] + [input[t]]) for t, data, input in zip(ts, datas, inputs) ] ys = [data[t + self.lags] for t, data in zip(ts, datas)] # Solve the linear regression coef_, intercept_, sigmas = fit_linear_regression(Xs, ys) self.As[k] = coef_[:, :self.D * self.lags] self.Vs[k] = coef_[:, self.D * self.lags:] self.bs[k] = intercept_ self.inv_sigmas[k] = np.log(sigmas + 1e-16)
def _naive_mh_step(Pm, Ym, A, W, Cm, curr_ll=None): # Randomly choose two neurons to swap unknowns = np.where(Cm.sum(axis=1) > 1)[0] n1, n2 = npr.choice(unknowns, 2, replace=False) v1 = np.where(Pm[n1])[0][0] v2 = np.where(Pm[n2])[0][0] if not Cm[n1, v2] or not Cm[n2, v1]: return Pm, curr_ll # Forward and Backward proposal probabilities are the same # so we just need to evaluate the log likelihoods curr_ll = curr_ll if curr_ll is not None else \ log_likelihood_single_worm(Ym, A, W, Pm, etasq) P_prop = Pm.copy() P_prop[n1] = Pm[n2] P_prop[n2] = Pm[n1] prop_ll = log_likelihood_single_worm(Ym, A, W, P_prop, etasq) # Randomly accept or reject if np.log(npr.rand()) < prop_ll - curr_ll: return P_prop, prop_ll else: return Pm.copy(), curr_ll
def sample_x(self, z, xhist, input=None, tag=None, with_noise=True): ps = np.exp(self.logits - logsumexp(self.logits, axis=2, keepdims=True)) return np.array( [npr.choice(self.C, p=ps[z, d]) for d in range(self.D)])
def simulate_celegans(A, posx, M, T, num_given, dthresh=0.01, sigmasq_W=None, etasq=0.1, spectral_factor=1.0): N = A.shape[0] rho = np.mean(A.sum(0)) # Set sigmasq_W for stability sigmasq_W = sigmasq_W if sigmasq_W is not None else 1. / (1.1 * N * rho) W = (npr.randn(N, N) * A) W = (W - W.T) / 2 #W =np.identity(N) * A eigmax = np.max(abs(np.linalg.eig(W)[0])) W = W / (spectral_factor * eigmax) assert np.max(abs(np.linalg.eigvals(A * W)) <= 1.00001) # Make a global constraint matrix based on x-position if type(dthresh) is not str: C = np.eye(N, dtype=bool) dpos = abs(posx[:, None] - posx[None, :]) C[dpos < dthresh] = True else: C = np.ones((N, N), dtype=bool) # Sample permutations for each worm perms = [] Ps = np.zeros((M, N, N)) for m in range(M): # perm[i] = index of neuron i in worm m's neurons perm = npr.permutation(N) perms.append(perm) Ps[m, np.arange(N), perm] = 1 #Ps[m, np.arange(N),np.arange(N)] = 1 # Make constraint matrices for each worm Cs = np.zeros((M, N, N), dtype=bool) for m, (Cm, Pm, permm) in enumerate(zip(Cs, Ps, perms)): # C is in canonical x canonical # make it canonical x worm[m] order Cm = C.dot(Pm) # Randomly choose a handful of given neurons given = npr.choice(N, replace=False, size=num_given) Cm[given, :] = 0 Cm[:, permm[given]] = 0 Cm[given, permm[given]] = 1 Cs[m] = Cm assert np.sum(Pm * Cm) == N # Sample some data! Ys = np.zeros((M, T, N)) for m in range(M): Ys[m, 0, :] = np.ones(N) Wm = Ps[m].T.dot((W * A).dot(Ps[m])) for t in range(1, T): mu_mt = np.dot(Wm, Ys[m, t - 1, :]) Ys[m, t, :] = mu_mt + np.sqrt(etasq) * npr.randn(N) return Ys, A, W, Ps, Cs
def callback(weights): print("Train loss:", training_loss(weights)) print_training_prediction(weights) # Build gradient of loss function using autograd. training_loss_and_grad = value_and_grad(training_loss) init_weights = npr.randn(num_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(training_loss, init_weights) print("Training LSTM...") result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG', options={'maxiter': train_iters}, callback=callback) trained_weights = result.x print("\nGenerating text from LSTM model...") num_letters = 30 for t in range(20): text = "" for i in range(num_letters): seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :] logprobs = pred_fun(trained_weights, seqs)[-1].ravel() text += chr(npr.choice(len(logprobs), p=np.exp(logprobs))) print(text)
def sample(self, T, prefix=None, input=None, tag=None, with_noise=True): """ Sample synthetic data from the model. Optionally, condition on a given prefix (preceding discrete states and data). Parameters ---------- T : int number of time steps to sample prefix : (zpre, xpre) Optional prefix of discrete states (zpre) and continuous states (xpre) zpre must be an array of integers taking values 0...num_states-1. xpre must be an array of the same length that has preceding observations. input : (T, input_dim) array_like Optional inputs to specify for sampling tag : object Optional tag indicating which "type" of sampled data with_noise : bool Whether or not to sample data with noise. Returns ------- z_sample : array_like of type int Sequence of sampled discrete states x_sample : (T x observation_dim) array_like Array of sampled data """ K = self.K D = (self.D, ) if isinstance(self.D, int) else self.D M = (self.M, ) if isinstance(self.M, int) else self.M assert isinstance(D, tuple) assert isinstance(M, tuple) assert T > 0 # Check the inputs if input is not None: assert input.shape == (T, ) + M # Get the type of the observations dummy_data = self.observations.sample_x(0, np.empty(0, ) + D) dtype = dummy_data.dtype # Initialize the data array if prefix is None: # No prefix is given. Sample the initial state as the prefix. pad = 1 z = np.zeros(T, dtype=int) data = np.zeros((T, ) + D, dtype=dtype) input = np.zeros((T, ) + M) if input is None else input mask = np.ones((T, ) + D, dtype=bool) # Sample the first state from the initial distribution pi0 = np.exp( self.init_state_distn.log_initial_state_distn( data, input, mask, tag)) z[0] = npr.choice(self.K, p=pi0) data[0] = self.observations.sample_x(z[0], data[:0], input=input[0], with_noise=with_noise) # We only need to sample T-1 datapoints now T = T - 1 else: # Check that the prefix is of the right type zpre, xpre = prefix pad = len(zpre) assert zpre.dtype == int and zpre.min() >= 0 and zpre.max() < K assert xpre.shape == (pad, ) + D # Construct the states, data, inputs, and mask arrays z = np.concatenate((zpre, np.zeros(T, dtype=int))) data = np.concatenate((xpre, np.zeros((T, ) + D, dtype))) input = np.zeros((T + pad, ) + M) if input is None else np.concatenate( (np.zeros((pad, ) + M), input)) mask = np.ones((T + pad, ) + D, dtype=bool) # Convert the discrete states to the range (1, ..., K_total) m = self.state_map K_total = len(m) _, starts = np.unique(m, return_index=True) z = starts[z] # Fill in the rest of the data for t in range(pad, pad + T): Pt = np.exp( self.transitions.log_transition_matrices(data[t - 1:t + 1], input[t - 1:t + 1], mask=mask[t - 1:t + 1], tag=tag))[0] z[t] = npr.choice(K_total, p=Pt[z[t - 1]]) data[t] = self.observations.sample_x(m[z[t]], data[:t], input=input[t], tag=tag, with_noise=with_noise) # Collapse the states z = m[z] # Return the whole data if no prefix is given. # Otherwise, just return the simulated part. if prefix is None: return z, data else: return z[pad:], data[pad:]
def main(argv): del argv n_clusters = FLAGS.num_clusters n_dimensions = FLAGS.num_dimensions n_observations = FLAGS.num_observations alpha = 3.3 * np.ones(n_clusters) a = 1. b = 1. kappa = 0.1 npr.seed(10001) # generate true latents and data pi = npr.gamma(alpha) pi /= pi.sum() mu = npr.normal(0, 1.5, [n_clusters, n_dimensions]) z = npr.choice(np.arange(n_clusters), size=n_observations, p=pi) x = npr.normal(mu[z, :], 0.5**2) # points used for initialization pi_est = np.ones(n_clusters) / n_clusters z_est = npr.choice(np.arange(n_clusters), size=n_observations, p=pi_est) mu_est = npr.normal(0., 0.01, [n_clusters, n_dimensions]) tau_est = 1. init_vals = pi_est, z_est, mu_est, tau_est # instantiate the model log joint log_joint = make_log_joint(x, alpha, a, b, kappa) # run mean field on variational mean parameters def callback(meanparams): fig = plot(meanparams, x) plt.savefig('/tmp/gmm_{:04d}.png'.format(itr)) plt.close(fig.number) supports = (SIMPLEX, INTEGER, REAL, NONNEGATIVE) neg_energy, normalizers, _, initializers, _, _ = \ multilinear_representation(log_joint, init_vals, supports) np_natparams = [initializer(10.) for initializer in initializers] np_meanparams = [ grad(normalizer)(natparam) for normalizer, natparam in zip(normalizers, np_natparams) ] # TODO(trandustin) try using feed_dict's to debug def tf_get_variable(inputs): return tf.get_variable(str(id(inputs)), initializer=tf.constant_initializer(inputs), dtype=tf.float32, shape=inputs.shape) tf_meanparams = container_fmap(tf_get_variable, np_meanparams) tf_natparams = container_fmap(tf_get_variable, np_natparams) # Represent the set of natural/mean parameters for each coordinate update. all_tf_natparams = [None] * len(normalizers) all_tf_natparams_assign_ops = [[]] * len(normalizers) # all_tf_meanparams = [None] * len(normalizers) all_tf_meanparams_assign_ops = [[]] * len(normalizers) for i in range(len(normalizers)): cast = lambda inputs: tf.cast(inputs, dtype=tf.float32) tf_update = make_tffun(grad(neg_energy, i), *np_meanparams) values = container_fmap(cast, tf_update(*tf_meanparams)) for variable, value in zip(tf_meanparams[i], values): assign_op = variable.assign(value) all_tf_natparams_assign_ops[i].append(assign_op) all_tf_natparams[i] = values tf_update = make_tffun(grad(normalizers[i]), np_natparams[i]) values = container_fmap(cast, tf_update(all_tf_natparams[i])) # values = container_fmap(cast, tf_update(tf_natparams)) for variable, value in zip(tf_natparams[i], values): assign_op = variable.assign(value) all_tf_meanparams_assign_ops[i].append(assign_op) # all_tf_meanparams[i] = values # Set config for 1 CPU, 1 core, 1 thread(?). config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1, device_count={'CPU': 1}) # Find out device placement. # config = tf.ConfigProto(log_device_placement=True) sess = tf.Session(config=config) sess.run(tf.global_variables_initializer()) natparams = sess.run(tf_natparams) print("ELBO ", elbo_fn(neg_energy, normalizers, natparams)) start = time.time() for _ in range(FLAGS.num_iterations): for i in range(len(normalizers)): _ = sess.run(all_tf_natparams_assign_ops[i]) _ = sess.run(all_tf_meanparams_assign_ops[i]) runtime = time.time() - start print("CAVI Runtime (s): ", runtime) natparams = sess.run(tf_natparams) print("ELBO ", elbo_fn(neg_energy, normalizers, natparams))
# 2D Accumulator with Poisson observations D = 2 # number of accumulation dimensions K = 3 # number of discrete states M = 2 # number of input dimensions N = 10 # number of observations bin_size = 0.01 latent_acc = LatentAccumulation(N, K, D, M=M, transitions="race", emissions="poisson", emission_kwargs={"bin_size":bin_size}) # set params betas = 0.075*np.ones((D,)) sigmas = np.log(1e-3)*np.ones((D,)) latent_acc.dynamics.params = (betas, sigmas, latent_acc.dynamics.params[2]) latent_acc.emissions.Cs[0] = 4 * npr.randn(N,D) + npr.choice([-15,15],(N,D)) latent_acc.emissions.ds[0] = 40 + 4.0 * npr.randn(N) # Sample state trajectories T = 100 # number of time bins trial_time = 1.0 # trial length in seconds dt = 0.01 # bin size in seconds N_samples = 100 # input statistics total_rate = 40 # the sum of the right and left poisson process rates is 40 us = [] zs = [] xs = [] ys = []
def callback(weights): print "Train loss:", loss_fun(weights, train_inputs, train_targets) print_training_prediction(weights, train_inputs, train_targets) # Build gradient of loss function using autograd. loss_and_grad = grad(loss_fun, return_function_value=True) # Wrap function to only have one argument, for scipy.minimize. def training_loss_and_grad(weights): return loss_and_grad(weights, train_inputs, train_targets) init_weights = npr.randn(num_weights) * param_scale # Check the gradients numerically, just to be safe quick_grad_check(loss_fun, init_weights, (train_inputs, train_targets)) print "Training LSTM..." result = minimize(training_loss_and_grad, init_weights, jac=True, method='CG', options={'maxiter':train_iters}, callback=callback) trained_weights = result.x print "\nGenerating text from LSTM model..." num_letters = 30 for t in xrange(20): text = " " for i in xrange(num_letters): seqs = string_to_one_hot(text, output_size)[:, np.newaxis, :] logprobs = pred_fun(trained_weights, seqs)[-1].ravel() text += chr(npr.choice(len(logprobs), p=np.exp(logprobs))) print text
dm_obs=2, trans_type='poly', obs_prior=obs_prior, trans_kwargs=trans_kwargs) rarhmm.initialize(x) lls = rarhmm.em(x, nb_iter=100, prec=0., verbose=True) print("true_ll=", true_ll, "hmm_ll=", lls[-1]) plt.figure(figsize=(5, 5)) plt.plot(np.ones(len(lls)) * true_ll, '-r') plt.plot(lls) plt.show() _, rarhmm_z = rarhmm.viterbi(x) _seq = npr.choice(len(x)) rarhmm.permute(permutation(true_z[_seq], rarhmm_z[_seq], K1=3, K2=3)) _, rarhmm_z = rarhmm.viterbi(x[_seq]) plt.figure(figsize=(8, 4)) plt.subplot(211) plt.imshow(true_z[_seq][None, :], aspect="auto", cmap=cmap, vmin=0, vmax=len(colors) - 1) plt.xlim(0, len(x[_seq])) plt.ylabel("$z_{\\mathrm{true}}$") plt.yticks([])
M = 1 # number of input dimensions N = 10 # number of observations bin_size = 0.01 latent_acc = LatentAccumulation(N, K, D, M=M, transitions="ddmhard", emissions="poisson", emission_kwargs={"bin_size": bin_size}) # latent_acc.dynamics.Vs[0] = 0.05*np.ones((D,)) beta = 0.075 * np.ones((D, )) log_sigmasq = np.log(2e-3) * np.ones((D, )) A = np.ones((D, D)) latent_acc.dynamics.params = (beta, log_sigmasq, A) latent_acc.emissions.Cs[0] = 4 * npr.randn(N, D) + npr.choice([-15, 15], (N, D)) latent_acc.emissions.ds[0] = 40 + 5 * npr.randn(N) # simulate data # Sample state trajectories T = 100 # number of time bins trial_time = 1.0 # trial length in seconds dt = 0.01 # bin size in seconds N_samples = 200 # input statistics total_rate = 40 # the sum of the right and left poisson process rates is 40 us = [] zs = [] xs = []
def observations_init_func_extra(self, datas, **kwargs): if 'init' in kwargs: init = kwargs['init'] else: init = 'rand' #Default # Sample time bins for each discrete state. # Use the data to cluster the time bins if specified. K, D, M, lags = self.K, self.D, self.M, self.lags Ts = [data.shape[0] for data in datas] #Get size of windows and gap between start of windows for some methods (in units of time bins) if init == 'window' or init == 'ar_clust': if 't_win' in kwargs: t_win = kwargs['t_win'] else: t_win = 10 #default if 't_gap' in kwargs: t_gap = kwargs['t_gap'] else: t_gap = int(np.ceil(t_win / 3)) #default # print('t_win:', t_win) # print('t_gap:', t_gap) #KMeans clustering if init == 'kmeans': km = KMeans(self.K) km.fit(np.vstack(datas)) zs = np.split(km.labels_, np.cumsum(Ts)[:-1]) #Random assignment elif init == 'rand' or init == 'random': #Random zs = [npr.choice(self.K, size=T) for T in Ts] #Fits dynamics matrix on sliding segments of data, and see how well those dynamics fit other segments - then cluster this matrix of model errors elif init == 'ar_clust': num_trials = len(datas) segs = [ ] #Elements of segs contain triplets of 1) trial, 2) time point of beginning of segment, 3) time point of end of segment #Get all segments based on predefined t_win and t_gap for tr in range(num_trials): T = Ts[tr] n_steps = int((T - t_win) / t_gap) + 1 for k in range(n_steps): segs.append([tr, k * t_gap, k * t_gap + t_win]) #Fit a regression (solve for the dynamics matrix) within each segment num_segs = len(segs) sse_mat = np.zeros([num_segs, num_segs]) for j, seg in enumerate(segs): [tr, t_st, t_end] = seg X = datas[tr][t_st:t_end + 1, :] rr = Ridge(alpha=1, fit_intercept=True) rr.fit(X[:-1], X[1:] - X[:-1]) #Then see how well the dynamics from segment J works at making predictions on segment K (determined via sum squared error of predictions) for k, seg2 in enumerate(segs): [tr, t_st, t_end] = seg2 X = datas[tr][t_st:t_end + 1, :] sse_mat[j, k] = sse(X[1:] - X[:-1], rr.predict(X[:-1])) #Make "sse_mat" into a proper, symmetric distance matrix for clustering tmp = sse_mat - np.diag(sse_mat) dist_mat = tmp + tmp.T #Cluster! clustering = SpectralClustering(n_clusters=self.K, affinity='precomputed').fit( 1 / (1 + dist_mat / t_win)) # clustering = AgglomerativeClustering(n_clusters=K,affinity='precomputed',linkage='average').fit(dist_mat/t_win) #Now take the clustered segments, and use them to determine the cluster of the individual time points #In the scenario where the segments are nonoverlapping, then we can simply assign the time point cluster as its segment cluster #In the scenario where the segments are overlapping, we will let a time point's cluster be the cluster to which the majority of its segments belonged #Below zs_init is the assigned discrete states of each time point for a trial. zs_init2 tracks the clusters of each time point across all the segments it's part of zs = [] for tr in range(num_trials): xhat = datas[tr] T = xhat.shape[0] n_steps = int((T - t_win) / t_gap) + 1 t_st = 0 zs_init = np.zeros(T) zs_init2 = np.zeros( [T, K] ) #For each time point, tracks how many segments it's part of belong to each cluster for k in range(n_steps): t_end = t_st + t_win t_idx = np.arange(t_st, t_end) if t_gap == t_win: zs_init[t_idx] = clustering.labels_[k] else: zs_init2[t_idx, clustering.labels_[k]] += 1 t_st = t_st + t_gap if t_gap != t_win: max_els = zs_init2.max(axis=1) for t in range(T): if np.sum(zs_init2[t] == max_els[t]) == 1: zs_init[t] = np.where(zs_init2[t] == max_els[t])[0] else: if zs_init[t - 1] in np.where( zs_init2[t] == max_els[t])[0]: zs_init[t] = zs_init[t - 1] else: zs_init[t] = np.where( zs_init2[t] == max_els[t])[0][0] zs.append( np.hstack([0, zs_init[:-1]]) ) #I think this offset is correct rather than just using zs_init, but it should be double checked. #Cluster based on the means and mean absolute difference of segments elif init == 'window': num_trials = len(datas) n_steps_all = [] # Get values to cluster vals = [] for tr in range(num_trials): t_st = 0 T = Ts[tr] xhat = datas[tr] n_steps = int((T - t_win) / t_gap) + 1 n_steps_all.append(n_steps) for k in range(n_steps): if k == n_steps - 1: t_end = T - 1 else: t_end = t_st + t_win t_idx = np.arange(t_st, t_end) X1 = xhat[t_st:t_end, :] X2 = xhat[t_st + 1:t_end + 1, :] #CLUSTER BY DIFFS tmp = np.mean(np.abs(X2 - X1), axis=0) #mean absolute difference within segment tmp2 = np.mean(X1, axis=0) #mean value within segement vals.append(np.hstack( [tmp, tmp2])) #concatenate the above for clustering # As.append(np.mean(np.abs(X2-X1),axis=0)) t_st = t_st + t_gap vals_all = np.vstack(vals) # combine across all trials ## Cluster ## km = KMeans(n_clusters=K) km.fit(vals_all) #Now take the clustered segments, and use them to determine the cluster of the individual time points #In the scenario where the segments are nonoverlapping, then we can simply assign the time point cluster as its segment cluster #In the scenario where the segments are overlapping, we will let a time point's cluster be the cluster to which the majority of its segments belonged #Below zs_init is the assigned discrete states of each time point for a trial. zs_init2 tracks the clusters of each time point across all the segments it's part of tr_st_idxs = np.hstack([0, np.cumsum(n_steps_all)]) zs = [] for tr in range(num_trials): xhat = datas[tr] T = xhat.shape[0] n_steps = int((T - t_win) / t_gap) + 1 t_st = 0 zs_init = np.zeros(T) zs_init2 = np.zeros([T, K]) for k in range(n_steps): t_end = t_st + t_win t_idx = np.arange(t_st, t_end) if t_gap == t_win: zs_init[t_idx] = km.labels_[k] else: zs_init2[t_idx, km.labels_[k]] += 1 t_st = t_st + t_gap if t_gap != t_win: # zs_init=np.argmax(zs_init2,axis=1) max_els = zs_init2.max(axis=1) for t in range(T): if np.sum(zs_init2[t] == max_els[t]) == 1: zs_init[t] = np.where(zs_init2[t] == max_els[t])[0] else: if zs_init[t - 1] in np.where( zs_init2[t] == max_els[t])[0]: zs_init[t] = zs_init[t - 1] else: zs_init[t] = np.where( zs_init2[t] == max_els[t])[0][0] zs.append(np.hstack([0, zs_init[:-1]])) self.zs_init = zs_init else: print('Not an accepted initialization type') return zs