def G4(positions, cell, numbers, elements, params): cutoff_radius = params['cutoff_radius'] eta = params['eta'] zeta = params['zeta'] lbda = params['lbda'] cos, Rij, Rik, Rjk, i, jk = atomsAngle(positions, cell, cutoff_radius) g4 = (1 + lbda * cos)**zeta * np.exp( -eta * (Rij**2 + Rik**2 + Rjk**2) / cutoff_radius**2) g4 = g4 * cutoff(cutoff_radius, Rij) * cutoff(cutoff_radius, Rik) * cutoff( cutoff_radius, Rjk) g4 *= 2**(1 - zeta) atoms_mask = np.arange(len(positions))[:, None] == i[None, :] # the shape of g4 will become (#atoms, len(g4)) and multiply atoms_mask to get the corresponding center atom's fingerprints g4 = np.repeat(g4[None, :], len(positions), axis=0) g4 *= atoms_mask index = np.indices((len(elements), len(elements))).reshape(2, -1) mask = index[1] >= index[0] index = index[:, mask].T pairs = np.repeat(np.sort(numbers[jk])[:, None], len(index), axis=1) elements = np.sort(elements)[index] pairs_mask = np.sum(pairs == elements, axis=2) pairs_mask = np.where(pairs_mask == 2, 1, 0) g4 = np.dot(g4, pairs_mask) return g4
def estimates(self, log_p, ref): n = ref.shape[0] d = ref.shape[1] if n % 2 == 1: # make it even by removing the last row ref = np.delete(ref, -1, axis=0) n = n - 1 refOdd = ref[::2, :] refEven = ref[1::2, :] estimates = np.zeros((self.n_estimates(n))) dlog_px = log_p.grad_log(ref) Kxx = self.kernel.eval(ref, ref) ddk = self.kernel.gradXY_sum(ref, ref) mat2 = np.zeros((n, n)) mat3 = np.zeros((n, n)) mat1 = (np.matmul(dlog_px, dlog_px.T) * Kxx.T) ## TODO: Eigensum for k in range(d): dk_dX = self.kernel.gradX_Y(ref, ref, k) dk_dY = self.kernel.gradY_X(ref, ref, k) mat2 = mat2 + (np.repeat(dlog_px[:, k, np.newaxis], n, axis=1) * dk_dY) mat3 = mat3 + (np.repeat(dlog_px[:, k, np.newaxis], n, axis=1) * dk_dX.T).T mat4 = mat1 + mat2 + mat3 + ddk e = 2 * np.array(range(self.n_estimates(n))) o = 2 * np.array(range(self.n_estimates(n))) + 1 return mat4[e, o]
def periodic_kernel(x, xstar, hyp): """ Implements the periodic kernel function for Gaussian Process x: input data with shape (N,d) xstar: inpt data with data (Nstar,d) hyp: (log(sigma_f),log(l1),log(l2),...,log(period)) with shape (d+2,) returns: a covariance matrix with shape (N,Nstar) """ sigma_f = np.exp(hyp[0]) N = x.shape[0] Nstar = xstar.shape[0] l = np.exp(hyp[1:-1]) #shape (d,) l = np.repeat(np.repeat(l[np.newaxis, :], Nstar, axis=0)[np.newaxis, :], N, axis=0) #shape (N,Nstar,d) period = np.exp(hyp[-1]) diff = np.sin( np.pi * np.abs(np.expand_dims(x, 1) - np.expand_dims(xstar, 0)) / period) / l #result of shape (N,Nstar,d) K = sigma_f * np.exp(-2 * (diff**2).sum(axis=2)) #should be of shape (N,Nstar) return K
def index2d(channel, stride, kshape, xshape): k_h, k_w = kshape x_h, x_w = xshape c_idx = np.repeat(np.arange(channel), k_h * k_w) c_idx = c_idx.reshape(-1, 1) res_h = int((x_h - k_h) / stride) + 1 res_w = int((x_w - k_w) / stride) + 1 size = channel * k_h * k_w h_idx = np.tile(np.repeat(stride * np.arange(res_h), res_w), size) h_idx = h_idx.reshape(size, -1) h_off = np.tile(np.repeat(np.arange(k_h), k_w), channel) h_off = h_off.reshape(size, -1) h_idx = h_idx + h_off w_idx = np.tile(np.tile(stride * np.arange(res_w), res_h), size) w_idx = w_idx.reshape(size, -1) w_off = np.tile(np.arange(k_w), channel * k_h) w_off = w_off.reshape(size, -1) w_idx = w_idx + w_off return c_idx, h_idx, w_idx
def lstm_predict(params, inputs): def update_lstm(input, hiddens, cells): change = np.tanh(concat_and_multiply(params['change'], input, hiddens)) forget = sigmoid(concat_and_multiply(params['forget'], input, hiddens)) ingate = sigmoid(concat_and_multiply(params['ingate'], input, hiddens)) outgate = sigmoid( concat_and_multiply(params['outgate'], input, hiddens)) cells = cells * forget + ingate * change hiddens = outgate * np.tanh(cells) return hiddens, cells def hiddens_to_output_probs(hiddens): output = concat_and_multiply(params['predict'], hiddens) # Normalize log-probs. return output - logsumexp(output, axis=1, keepdims=True) num_sequences = inputs.shape[1] hiddens = np.repeat(params['init hiddens'], num_sequences, axis=0) cells = np.repeat(params['init cells'], num_sequences, axis=0) output = [hiddens_to_output_probs(hiddens)] for input in inputs: # Iterate over time steps. hiddens, cells = update_lstm(input, hiddens, cells) output.append(hiddens_to_output_probs(hiddens)) return output
def Bwham_NLL_eq(fi,Ni,Ml,Wil): """ Args: fi: shape (S,) Ni: Number of data counts in simulation i (S,) Ml: Number of data in from simulation i=1,...,S in bin l (M,) Wil: 0.5*k*beta*(n-nstar)**2 (S,M) Returns: the value of the negative likelihood function """ S = Wil.shape[0] M = Wil.shape[1] fi = fi - fi[-1] first_term = -(Ni*fi).sum() log_pl = nup.log(Ml) - \ alogsumexp(nup.repeat(fi[:,nup.newaxis],M,axis=1)-Wil,b=nup.repeat(Ni[:,nup.newaxis],M,axis=1),axis=0) second_term = (Ml * log_pl).sum(axis=0) return first_term - second_term
def differentiate(self): "get gradient values using finite difference" C = np.repeat(np.expand_dims(self.cost_fn.C, axis=0), [self.T, 1, 1]) F = np.repeat(np.expand_dims(self.dyn_model.F, axis=0), [self.T, 1, 1]) c = np.repeat(np.expand_dims(self.cost_fn.c, axis=0), (self.T, 1)) f = np.repeat(np.expand_dims(self.dyn_model.f, axis=0), (self.T, 1)) return C, F, c, f
def gp0(self, m, s): """ Compute joint predictions for MGP with uncertain inputs. """ assert hasattr(self, "hyp") if not hasattr(self, "K"): self.cache() x = np.atleast_2d(self.inputs) y = np.atleast_2d(self.targets) n, D = x.shape n, E = y.shape X = self.hyp iK = self.iK beta = self.alpha m = np.atleast_2d(m) inp = x - m # Compute the predicted mean and IO covariance. iL = np.stack([np.diag(exp(-X[i, :D])) for i in range(E)]) iN = np.matmul(inp, iL) B = iL @ s @ iL + np.eye(D) t = np.stack([solve(B[i].T, iN[i].T).T for i in range(E)]) q = exp(-np.sum(iN * t, 2) / 2) qb = q * beta.T tiL = np.matmul(t, iL) c = exp(2 * X[:, D]) / sqrt(det(B)) M = np.sum(qb, 1) * c V = (np.transpose(tiL, [0, 2, 1]) @ np.expand_dims(qb, 2)).reshape( E, D).T * c k = 2 * X[:, D].reshape(E, 1) - np.sum(iN**2, 2) / 2 # Compute the predicted covariance. inp = np.expand_dims(inp, 0) / np.expand_dims(exp(2 * X[:, :D]), 1) ii = np.repeat(inp[:, newaxis, :, :], E, 1) ij = np.repeat(inp[newaxis, :, :, :], E, 0) iL = np.stack([np.diag(exp(-2 * X[i, :D])) for i in range(E)]) siL = np.expand_dims(iL, 0) + np.expand_dims(iL, 1) R = np.matmul(s, siL) + np.eye(D) t = 1 / sqrt(det(R)) iRs = np.stack( [solve(R.reshape(-1, D, D)[i], s) for i in range(E * E)]) iRs = iRs.reshape(E, E, D, D) Q = exp(k[:, newaxis, :, newaxis] + k[newaxis, :, newaxis, :] + maha(ii, -ij, iRs / 2)) S = np.einsum('ji,iljk,kl->il', beta, Q, beta) tr = np.hstack([np.sum(Q[i, i] * iK[i]) for i in range(E)]) S = (S - np.diag(tr)) * t + np.diag(exp(2 * X[:, D])) S = S - np.matmul(M[:, newaxis], M[newaxis, :]) return M, S, V
def compute_path_params(eta, H, psi): ''' Compute the gaussian parameters for each path H (list of nb_layers elements of shape (K_l x r_{l-1}, r_l)): Lambda parameters for each layer psi (list of nb_layers elements of shape (K_l x r_{l-1}, r_{l-1})): Psi parameters for each layer eta (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu parameters for each layer ------------------------------------------------------------------------------------------------ returns (tuple of len 2): The updated parameters mu_s and sigma for all s in Omega ''' #===================================================================== # Retrieving model parameters #===================================================================== L = len(H) k = [len(h) for h in H] k_aug = k + [ 1 ] # Integrating the number of components of the last layer i.e 1 r1 = H[0].shape[1] r2_L = [h.shape[2] for h in H] # r[2:L] r = [r1] + r2_L # r augmented #===================================================================== # Initiating the parameters for all layers #===================================================================== mu_s = [0 for i in range(L + 1)] sigma_s = [0 for i in range(L + 1)] # Initialization with the parameters of the last layer mu_s[-1] = np.zeros((1, r[-1], 1)) # Inverser k et r plus tard sigma_s[-1] = np.eye(r[-1])[n_axis] #================================================================================== # Compute Gaussian parameters from top to bottom for each path #================================================================================== for l in reversed(range(0, L)): H_repeat = np.repeat(H[l], np.prod(k_aug[l + 1:]), axis=0) eta_repeat = np.repeat(eta[l], np.prod(k_aug[l + 1:]), axis=0) psi_repeat = np.repeat(psi[l], np.prod(k_aug[l + 1:]), axis=0) mu_s[l] = eta_repeat + H_repeat @ np.tile(mu_s[l + 1], (k[l], 1, 1)) sigma_s[l] = H_repeat @ np.tile(sigma_s[l + 1], (k[l], 1, 1)) @ t(H_repeat, (0, 2, 1)) \ + psi_repeat return mu_s, sigma_s
def prep_opt(y_train, N, coeffs): summedy_mat = np.sum(y_train, axis=0) summedy = np.reshape(summedy_mat, [np.size(summedy_mat), -1]) a1 = np.reshape([np.repeat(coeffs.T[1], N)], [np.size(summedy), -1]) a0 = np.reshape([np.repeat(coeffs.T[0], N)], [np.size(summedy), -1]) a1y = np.multiply(a1, summedy) a0y = np.multiply(a0, summedy) consts = np.sum(gammaln( y_train + scale)) - D * n_neurons * N * gammaln(scale) - np.sum( coeffs.T[0] * (D * scale * N)) - np.sum(a0y) - np.sum(summedy * np.log(scale)) return summedy, a1y, a0y, a1, consts
def getGRUTranstionDist(params, data): try: inputs = np.concatenate([data[k] for k in ['a', 'u']], axis=2) except: inputs = data['a'] def update_gru(input, hiddens): update = sigmoid( concat_and_multiply(params['transion']['update'], input, hiddens)) reset = sigmoid( concat_and_multiply(params['transion']['reset'], input, hiddens)) hiddens = (1 - update) * hiddens + update * sigmoid( concat_and_multiply(params['transion']['hiddenOut'], input, hiddens * reset)) return hiddens num_sequences = inputs.shape[1] hiddens = np.repeat(params['transion']['init hiddens'], num_sequences, axis=0) output = [(hiddens[:, :hiddens.shape[1] / 2], hiddens[:, hiddens.shape[1] / 2:])] for input in inputs: # Iterate over time steps. hiddens = update_gru(input, hiddens) output.append((hiddens[:, :hiddens.shape[1] / 2], hiddens[:, hiddens.shape[1] / 2:])) return zip(*output)
def get_k(stiffness, ke): # Constructs a sparse stiffness matrix, k, for use in the displace function. nely, nelx = stiffness.shape # get position of the nodes of each element in the stiffness matrix ely, elx = np.meshgrid(range(nely), range(nelx)) # x, y coords ely, elx = ely.reshape(-1, 1), elx.reshape(-1, 1) n1 = (nely + 1) * (elx + 0) + (ely + 0) n2 = (nely + 1) * (elx + 1) + (ely + 0) n3 = (nely + 1) * (elx + 1) + (ely + 1) n4 = (nely + 1) * (elx + 0) + (ely + 1) edof = np.array([ 2 * n1, 2 * n1 + 1, 2 * n2, 2 * n2 + 1, 2 * n3, 2 * n3 + 1, 2 * n4, 2 * n4 + 1 ]) edof = edof.T[0] x_list = np.repeat(edof, 8) # flat list pointer of each node in an element y_list = np.tile(edof, 8).flatten() # flat list pointer of each node in elem # make the stiffness matrix kd = stiffness.T.reshape(nelx * nely, 1, 1) value_list = (kd * np.tile(ke, kd.shape)).flatten() return value_list, y_list, x_list
def optimize(self, curb=None): assert hasattr(self, "inputs") assert hasattr(self, "targets") x = np.atleast_2d(self.inputs) y = np.atleast_2d(self.targets) assert len(x) == len(y) n, D = x.shape n, E = y.shape if curb is not None: self.curb = curb elif not hasattr(self, "curb"): self.curb = Empty() self.curb.snr = 500 self.curb.ls = 100 self.curb.std = std(x, 0) if not hasattr(self, "hyp"): self.hyp = np.zeros([E, D + 2]) self.hyp[:, :D] = np.repeat(log(std(x, 0)).reshape(1, D), E, 0) self.hyp[:, D] = log(std(y, 0)) self.hyp[:, -1] = log(std(y, 0) / 10) print("Train hyperparameters of full GP...") try: self.result = minimize( value_and_grad(self.hyp_crub), self.hyp, jac=True) except Exception: self.result = minimize( value_and_grad(self.hyp_crub), self.hyp, jac=True, method='CG') self.hyp = self.result.get('x').reshape(E, -1) self.cache()
def generate(si, theta, seed=0): np.random.seed(seed) n, p, r = len(si), 6, 5 X = np.random.normal(0, 1, n * p).reshape(n, p) X_kar = np.kron(np.eye(r), X) beta_star = np.random.uniform(0, 1, p * r).reshape(p * r, 1) A = np.random.uniform(0, 1, 25).reshape(5, 5) Gamma = PCA().fit(A).components_ Gamma1 = Gamma[:, :2] Gamma0 = Gamma[:, 2:] out1, out0 = list(), list() for i in range(2): out1.append([(-.9)**abs(i - j) for j in range(2)]) for i in range(3): out0.append([(-.5)**abs(i - j) for j in range(3)]) Omega1 = np.array(out1) Omega0 = np.array(out0) Sigma = np.matmul(np.matmul(Gamma1, Omega1), Gamma1.T) + np.matmul( np.matmul(Gamma0, Omega0), Gamma0.T) if (theta[0] == 0) & (theta[1] == 0): h = np.eye(len(si)) else: h = np.array(rho(si, theta)) Sigma_kr = np.kron(Sigma, h) Err_kr = np.random.multivariate_normal(np.repeat(0, n * r), Sigma_kr).reshape(n * r, 1) Y_kr = np.matmul(X_kar, beta_star) + np.array(Err_kr) Y = Y_kr.reshape(n, r) return (X, Y)
def log_py_zM_bin_j(lambda_bin_j, y_bin_j, zM, k, nj_bin_j): ''' Compute log p(y_j | zM, s1 = k1) of the jth lambda_bin_j ( (r + 1) 1darray): Coefficients of the binomial distributions in the GLLVM layer y_bin_j (numobs 1darray): The subset containing only the binary/count variables in the dataset zM (M x r x k ndarray): M Monte Carlo copies of z for each component k1 of the mixture k (int): The number of components of the mixture nj_bin_j (int): The number of possible values/maximum values of the jth binary/count variable -------------------------------------------------------------- returns (ndarray): p(y_j | zM, s1 = k1) ''' M = zM.shape[0] r = zM.shape[1] numobs = len(y_bin_j) yg = np.repeat(y_bin_j[np.newaxis], axis = 0, repeats = M) yg = yg.astype(np.float) nj_bin_j = np.float(nj_bin_j) coeff_binom = binom(nj_bin_j, yg).reshape(M, 1, numobs) eta = np.transpose(zM, (0, 2, 1)) @ lambda_bin_j[1:].reshape(1, r, 1) eta = eta + lambda_bin_j[0].reshape(1, 1, 1) # Add the constant den = nj_bin_j * log_1plusexp(eta) num = eta @ y_bin_j[np.newaxis, np.newaxis] log_p_y_z = num - den + np.log(coeff_binom) return np.transpose(log_p_y_z, (0, 2, 1)).astype(np.float)
def draw_z2_z1s(chsi, rho, M, r): ''' Draw from f(z^{l+1} | z^{l}, s, Theta) chsi (list of nd-arrays): The chsi parameters for all paths starting at each layer rho (list of ndarrays): The rho parameters (covariance matrices) for all paths starting at each layer M (list of int): The number of MC to draw on each layer r (list of int): The dimension of each layer --------------------------------------------------------------------------- returns (list of nd-arrays): z^{l+1} | z^{l}, s, Theta for all (l,s) ''' L = len(chsi) S = [chsi[l].shape[0] for l in range(L)] z2_z1s = [] for l in range(L): z2_z1s_l = np.zeros((M[l + 1], M[l], S[l], r[l + 1])) for s in range(S[l]): z2_z1s_kl = multivariate_normal(size = M[l + 1], \ mean = rho[l][:,s].flatten(order = 'C'), \ cov = block_diag(*np.repeat(chsi[l][s][n_axis], M[l], axis = 0))) z2_z1s_l[:, :, s] = z2_z1s_kl.reshape(M[l + 1], M[l], r[l + 1], order='C') z2_z1s_l = t(z2_z1s_l, (1, 0, 2, 3)) z2_z1s.append(z2_z1s_l) return z2_z1s
def forward_step(params, X=None, cell_state_0=None, hid_state_0=None): hid_state = np.repeat(hid_state_0, X.shape[0] - hid_state_0.shape[0] + 1, axis=0) cell_state_1 = np.add( np.multiply( # <-- forget old info cell_state_0, sigmoid( c([X, hid_state]) @ params['forget']['w'] + params['forget']['b']), # <-- forget gate ), np.multiply( # <-- write new info sigmoid( c([X, hid_state]) @ params['ingate']['w'] + params['ingate']['b']), # <-- input gate np.tanh( c([X, hid_state]) @ params['change']['w'] + params['change']['b']), # <-- change gate )) hid_state_1 = np.multiply( sigmoid(c([X, hid_state]) @ params['outgate']['w']), # 1, np.tanh(cell_state_1)) return cell_state_1, hid_state_1
def get_states_and_transitions(self): num_acts, num_states = self.num_acts, self.batch_size if isinstance(self.env.observation_space, spaces.Discrete): if num_states is None: states = np.arange(self.env.observation_space.n) else: states = np.random.randint(0, self.env.action_space.n, size=(num_states, )) else: assert num_states is not None state_low, state_high = self.env.observation_space.low, self.env.observation_space.high states = np.random.uniform(state_low, state_high, size=(num_states, len(state_low))) if isinstance(self.env.action_space, spaces.Discrete): num_acts = self.env.action_space.n actions = np.arange(num_acts) else: assert num_acts is not None act_low, act_high = self.env.action_space.low, self.env.action_space.high actions = np.random.uniform(act_low, act_high, size=(num_acts, len(act_low))) states = np.tile(states.T, num_acts).T actions = np.repeat(actions, num_states, axis=0) self.env.vec_set_state(states) next_states, rewards, dones, _ = self.env.vec_step(actions) return states, next_states, rewards, dones
def draw_z_s(mu_s, sigma_s, eta, M): ''' Draw from f(z^{l} | s) for all s in Omega and return the centered and non-centered draws mu_s (list of nd-arrays): The means of the Gaussians starting at each layer sigma_s (list of nd-arrays): The covariance matrices of the Gaussians starting at each layer eta (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu parameters for each layer M (list of int): The number of MC to draw on each layer ------------------------------------------------------------------------- returns (list of ndarrays): z^{l} | s for all s in Omega and all l in L ''' L = len(mu_s) - 1 r = [mu_s[l].shape[1] for l in range(L + 1)] S = [mu_s[l].shape[0] for l in range(L + 1)] z_s = [] zc_s = [] # z centered (denoted c) or all l for l in range(L + 1): zl_s = multivariate_normal(size = (M[l], 1), \ mean = mu_s[l].flatten(order = 'C'), cov = block_diag(*sigma_s[l])) zl_s = zl_s.reshape(M[l], S[l], r[l], order='C') z_s.append(t(zl_s, (0, 2, 1))) if l < L: # The last layer is already centered eta_ = np.repeat(t(eta[l], (2, 0, 1)), S[l + 1], axis=1) zc_s.append(zl_s - eta_) return z_s, zc_s
def getGRUTranstionDist(params, data, latents): inputs = np.concatenate(map(lambda x: np.expand_dims(x, axis=0), latents), axis=0) if ('a' in data and 'u' in data): inputs = np.concatenate([data[k] for k in ['a', 'u']] + [inputs], axis=2) def update_gru(input, hiddens): update = sigmoid( concat_and_multiply(params['transion']['update'], input, hiddens)) reset = sigmoid( concat_and_multiply(params['transion']['reset'], input, hiddens)) hiddens = (1 - update) * hiddens + update * sigmoid( concat_and_multiply(params['transion']['hiddenOut'], input, hiddens * reset)) return hiddens num_sequences = inputs.shape[1] hiddens = np.repeat(params['transion']['init hiddens'], num_sequences, axis=0) output = [] for input in inputs: # Iterate over time steps. hiddens = update_gru(input, hiddens) output.append((hiddens[:, :hiddens.shape[1] / 2], hiddens[:, hiddens.shape[1] / 2:])) return zip(*output)
def compute_rho(eta, H, psi, mu_s, sigma_s, z_c, chsi): ''' Compute rho as defined in equation (8) of the DGMM paper eta (list of nb_layers elements of shape (K_l x r_{l-1}, 1)): mu parameters for each layer H (list of nb_layers elements of shape (K_l x r_{l-1}, r_l)): Lambda parameters for each layer psi (list of nb_layers elements of shape (K_l x r_{l-1}, r_{l-1})): Psi parameters for each layer z_c (list of nd-arrays) z^{(l)} - eta^{(l)} for each layer. chsi (list of nd-arrays): The chsi parameters for each layer ----------------------------------------------------------------------- returns (list of ndarrays): The rho parameters (covariance matrices) for all paths starting at each layer ''' L = len(H) rho = [0 for i in range(L)] k = [len(h) for h in H] k_aug = k + [1] for l in range(0, L): sigma_next_l = np.tile(sigma_s[l + 1], (k[l], 1, 1)) mu_next_l = np.tile(mu_s[l + 1], (k[l], 1, 1)) HxPsi_inv = t(H[l], (0, 2, 1)) @ pinv(psi[l]) HxPsi_inv = np.repeat(HxPsi_inv, np.prod(k_aug[l + 1: ]), axis = 0) rho[l] = chsi[l][n_axis] @ (HxPsi_inv[n_axis] @ z_c[l][..., n_axis] \ + (pinv(sigma_next_l) @ mu_next_l)[n_axis]) return rho
def get_causal_effect(params, do_A, w): "to be called within experiment function." np.random.seed(4) random.seed(4) al, bl = params L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN if nystr: alpha = EYEN - eig_vec_K @ np.linalg.inv( eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2 alpha = alpha @ W_nystr @ Y * N2 else: LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN) alpha = LWL_inv @ L @ W @ Y # L_W_inv = chol_inv(W*N2+L_inv) EYhat_do_A = [] for a in do_A: a = np.repeat(a, [w.shape[0]]).reshape(-1, 1) w = w.reshape(-1, 1) aw = np.concatenate([a, w], axis=-1) ate_L0 = _sqdist(aw, X) ate_L = bl * bl * np.exp(-ate_L0 / al / al / 2) h_out = ate_L @ alpha mean_h = np.mean(h_out).reshape(-1, 1) EYhat_do_A.append(mean_h) print('a = {}, beta_a = {}'.format(np.mean(a), mean_h)) return np.concatenate(EYhat_do_A)
def compute_chsi(H, psi, mu_s, sigma_s): ''' Compute chsi as defined in equation (8) of the DGMM paper H (list of nb_layers elements of shape (K_l x r_l-1, r_l)): Lambda parameters for each layer psi (list of nb_layers elements of shape (K_l x r_l-1, r_l-1)): Psi parameters for each layer mu_s (list of nd-arrays): The means of the Gaussians starting at each layer sigma_s (list of nd-arrays): The covariance matrices of the Gaussians starting at each layer ------------------------------------------------------------------------------------------------ returns (list of ndarray): The chsi parameters for all paths starting at each layer ''' L = len(H) k = [len(h) for h in H] #===================================================================== # Initiating the parameters for all layers #===================================================================== # Initialization with the parameters of the last layer chsi = [0 for i in range(L)] chsi[-1] = pinv(pinv(sigma_s[-1]) + t(H[-1], (0, 2, 1)) @ pinv(psi[-1]) @ H[-1]) #================================================================================== # Compute chsi from top to bottom #================================================================================== for l in range(L - 1): Ht_psi_H = t(H[l], (0, 2, 1)) @ pinv(psi[l]) @ H[l] Ht_psi_H = np.repeat(Ht_psi_H, np.prod(k[l + 1:]), axis = 0) sigma_next_l = np.tile(sigma_s[l + 1], (k[l], 1, 1)) chsi[l] = pinv(pinv(sigma_next_l) + Ht_psi_H) return chsi
def _initialize_variational_params(self, data, input, mask, tag): T = data.shape[0] D = self.D # Initialize the mean with the linear model, if applicable ms = self.model.emissions.invert(data, input=input, mask=mask, tag=tag) # Initialize with no covariance between adjacent time steps # NOTE: it's important to initialize A and Q to be nonzero, # otherwise the gradients wrt them are zero and they never # change during optimization! As = np.repeat(np.eye(D)[None, :, :], T-1, axis=0) bs = np.zeros((T-1, D)) Qi_sqrts = np.repeat(np.eye(D)[None, :, :], T-1, axis=0) Ri_sqrts = 1./np.sqrt(self.initial_variance) * np.repeat(np.eye(D)[None, :, :], T, axis=0) return As, bs, Qi_sqrts, ms, Ri_sqrts
def _parameter_initialiser(self, x, c=None, n=None): if (c is not None) & ((c == 0).all()): x = np.repeat(x, n) p = self._mom(x) else: p = 1., 1. return p
def calc_result(self, x, y, fun_args, all_things, exp_kappa_int, exp_kappa_bdy, grad_kappa_x, grad_kappa_y): result = 0 A_1, A_2, A_3, B = self.__op_cache__.operators A_1_bar, A_2_bar, A_3_bar, B_bar = self.__op_cache__.operators_bar def printer(*args): if self.__verbosity__ > 0: print(*args) for item in all_things: try: function = self.__op_cache__[item] except Exception as ex: printer('Failed to get {}'.format(item)) raise ex new_mat = function(x, y, fun_args) # unbarred if A_1 in item: printer('Transforming A_1') multiplier = np.repeat(grad_kappa_x * exp_kappa_int, y.shape[0], 1) new_mat = multiplier * new_mat elif A_2 in item: printer('Transforming A_2') multiplier = np.repeat(grad_kappa_y * exp_kappa_int, y.shape[0], 1) new_mat = multiplier * new_mat elif A_3 in item: printer('Transforming A_3') multiplier = np.repeat(exp_kappa_int, y.shape[0], 1) new_mat = multiplier * new_mat # barred if A_1_bar in item: printer('Transforming A_1_bar') new_mat = np.repeat(grad_kappa_x.T * exp_kappa_int.T, x.shape[0], 0) * new_mat elif A_2_bar in item: printer('Transforming A_2_bar') new_mat = np.repeat(grad_kappa_y.T * exp_kappa_int.T, x.shape[0], 0) * new_mat elif A_3_bar in item: printer('Transforming A_3_bar') new_mat = np.repeat(exp_kappa_int.T, x.shape[0], 0) * new_mat # boundary if B in item: printer('Transforming B') new_mat = np.repeat(exp_kappa_bdy, y.shape[0], 1) * new_mat if B_bar in item: printer('Transforming B_bar') new_mat = np.repeat(exp_kappa_bdy.T, x.shape[0], 0) * new_mat result += new_mat return result
def getSigSeriesG(sts, nt, a, mu, sig): # sts has shape T x M # the rest are numbers gaus = a*np.exp(-(np.arange(nt)-mu)**2/sig**2) nper = np.int(nt/sts.shape[0]) stsRepeated = np.vstack([np.repeat(sts,nper,axis=0),np.zeros((nt-nper*6,sts.shape[1]))]) return (stsRepeated.T*gaus).T
def sample(self, n, mu, seed): rstate = np.random.get_state() np.random.seed(seed) #x = 11 * np.random.random(200) - 6.0 # x lies in [-6,5] x1 = np.random.normal(np.repeat(0, self.d), 1.0, size=n) y1 = x1 + x1**2 + np.random.random(200) #x = 2 * np.random.random(200) - 2.0 # x lies in [-2,0] x2 = np.random.normal(np.repeat(mu, self.d), 1.0, size=n) y2 = x2 + x2**2 + np.random.random(200) return x1[:, np.newaxis], x2[:, np.newaxis], y1[:, np.newaxis], y2[:, np.newaxis]
def compute_log_prob(enc_w, dec_w, encode, decode_log_like, base_data, conditional_data, samples_per_image, latent_dimensions, rs): (mus, log_sigs) = encode(enc_w, conditional_data) sigs = np.exp(log_sigs) noise = rs.randn(samples_per_image, conditional_data.shape[0], latent_dimensions) Z_samples = mus + sigs * noise Z_samples = np.reshape( Z_samples, (conditional_data.shape[0] * samples_per_image, latent_dimensions), order='F') conditional_repeat = np.repeat(conditional_data, samples_per_image, axis=0) base_repeat = np.repeat(base_data, samples_per_image, axis=0) decoder_input = np.concatenate((Z_samples, base_repeat), axis=1) mean_log_prob = decode_log_like(dec_w, decoder_input, conditional_repeat) return mean_log_prob
def outputs(weights, inputs): """Goes from right to left, updating the state.""" forget_weights = parser.get(weights, 'forget') change_weights = parser.get(weights, 'change') ingate_weights = parser.get(weights, 'ingate') outgate_weights = parser.get(weights, 'outgate') predict_weights = parser.get(weights, 'predict') num_sequences = inputs.shape[1] hiddens = np.repeat(parser.get(weights, 'init_hiddens'), num_sequences, axis=0) cells = np.repeat(parser.get(weights, 'init_cells'), num_sequences, axis=0) output = [] for input in inputs: # Iterate over time steps. hiddens, cells = update_lstm(input, hiddens, cells, forget_weights, change_weights, ingate_weights, outgate_weights) cur_output = activations(predict_weights, hiddens) output.append(cur_output - logsumexp(cur_output)) return output # Output normalized log-probabilities.
def _compute_sigmas(self, data, input, mask, tag): T, D = data.shape sigma_init = np.exp(self.inv_sigma_init) * np.ones((self.lags, self.K, self.D)) sigma_ar = np.repeat(np.exp(self.inv_sigmas)[None, :, :], T-self.lags, axis=0) sigmas = np.concatenate((sigma_init, sigma_ar)) assert sigmas.shape == (T, self.K, D) return sigmas
def outputs(weights, inputs): """Outputs normalized log-probabilities of each character, plus an extra one at the end.""" forget_weights = parser.get(weights, 'forget') change_weights = parser.get(weights, 'change') ingate_weights = parser.get(weights, 'ingate') outgate_weights = parser.get(weights, 'outgate') predict_weights = parser.get(weights, 'predict') num_sequences = inputs.shape[1] hiddens = np.repeat(parser.get(weights, 'init_hiddens'), num_sequences, axis=0) cells = np.repeat(parser.get(weights, 'init_cells'), num_sequences, axis=0) output = [hiddens_to_output_probs(predict_weights, hiddens)] for input in inputs: # Iterate over time steps. hiddens, cells = update_lstm(input, hiddens, cells, forget_weights, change_weights, ingate_weights, outgate_weights) output.append(hiddens_to_output_probs(predict_weights, hiddens)) return output
def calc_result(self, x, y, fun_args, all_things, exp_kappa_int, exp_kappa_bdy, grad_kappa_x, grad_kappa_y): result = 0 A_1, A_2, A_3, B = self.__op_cache__.operators A_1_bar, A_2_bar, A_3_bar, B_bar = self.__op_cache__.operators_bar def printer(*args): if self.__verbosity__ > 0: print(*args) for item in all_things: try: function = self.__op_cache__[item] except Exception as ex: printer('Failed to get {}'.format(item)) raise ex new_mat = function(x, y, fun_args) # unbarred if A_1 in item: printer('Transforming A_1') multiplier = np.repeat(grad_kappa_x*exp_kappa_int, y.shape[0], 1) new_mat = multiplier * new_mat elif A_2 in item: printer('Transforming A_2') multiplier = np.repeat(grad_kappa_y*exp_kappa_int, y.shape[0], 1) new_mat = multiplier * new_mat elif A_3 in item: printer('Transforming A_3') multiplier = np.repeat(exp_kappa_int, y.shape[0], 1) new_mat = multiplier * new_mat # barred if A_1_bar in item: printer('Transforming A_1_bar') new_mat = np.repeat(grad_kappa_x.T*exp_kappa_int.T,x.shape[0],0) * new_mat elif A_2_bar in item: printer('Transforming A_2_bar') new_mat = np.repeat(grad_kappa_y.T*exp_kappa_int.T,x.shape[0],0) * new_mat elif A_3_bar in item: printer('Transforming A_3_bar') new_mat = np.repeat(exp_kappa_int.T,x.shape[0],0) * new_mat # boundary if B in item: printer('Transforming B') new_mat = np.repeat(exp_kappa_bdy, y.shape[0], 1) * new_mat if B_bar in item: printer('Transforming B_bar') new_mat = np.repeat(exp_kappa_bdy.T, x.shape[0], 0) * new_mat result += new_mat return result
def outputs(weights, inputs): """Goes from right to left, updating the state.""" num_sequences = inputs.shape[1] hiddens = np.repeat(parser.get(weights, 'init_hiddens'), num_sequences, axis=0) change_weights = parser.get(weights, 'change') predict_weights = parser.get(weights, 'predict') output = [hiddens_to_output_probs(predict_weights, hiddens)] for input in inputs: # Iterate over time steps. hiddens = update(input, hiddens, change_weights) output.append(hiddens_to_output_probs(predict_weights, hiddens)) return output
def make_pinwheel_data(radial_std, tangential_std, num_classes, num_per_class, rate): rads = np.linspace(0, 2*np.pi, num_classes, endpoint=False) features = npr.randn(num_classes*num_per_class, 2) \ * np.array([radial_std, tangential_std]) features[:,0] += 1. labels = np.repeat(np.arange(num_classes), num_per_class) angles = rads[labels] + rate * np.exp(features[:,0]) rotations = np.stack([np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)]) rotations = np.reshape(rotations.T, (-1, 2, 2)) return 10*npr.permutation(np.einsum('ti,tij->tj', features, rotations))
def lstm_predict(params, inputs): def update_lstm(input, hiddens, cells): change = np.tanh(concat_and_multiply(params['change'], input, hiddens)) forget = sigmoid(concat_and_multiply(params['forget'], input, hiddens)) ingate = sigmoid(concat_and_multiply(params['ingate'], input, hiddens)) outgate = sigmoid(concat_and_multiply(params['outgate'], input, hiddens)) cells = cells * forget + ingate * change hiddens = outgate * np.tanh(cells) return hiddens, cells def hiddens_to_output_probs(hiddens): output = concat_and_multiply(params['predict'], hiddens) return output - logsumexp(output, axis=1, keepdims=True) # Normalize log-probs. num_sequences = inputs.shape[1] hiddens = np.repeat(params['init hiddens'], num_sequences, axis=0) cells = np.repeat(params['init cells'], num_sequences, axis=0) output = [hiddens_to_output_probs(hiddens)] for input in inputs: # Iterate over time steps. hiddens, cells = update_lstm(input, hiddens, cells) output.append(hiddens_to_output_probs(hiddens)) return output
def make_pinwheel(radial_std, tangential_std, num_classes, num_per_class, rate, rs=npr.RandomState(0)): """Based on code by Ryan P. Adams.""" rads = np.linspace(0, 2*np.pi, num_classes, endpoint=False) features = rs.randn(num_classes*num_per_class, 2) \ * np.array([radial_std, tangential_std]) features[:, 0] += 1 labels = np.repeat(np.arange(num_classes), num_per_class) angles = rads[labels] + rate * np.exp(features[:,0]) rotations = np.stack([np.cos(angles), -np.sin(angles), np.sin(angles), np.cos(angles)]) rotations = np.reshape(rotations.T, (-1, 2, 2)) return np.einsum('ti,tij->tj', features, rotations)
def rnn_predict(params, inputs): def update_rnn(input, hiddens): return np.tanh(concat_and_multiply(params['change'], input, hiddens)) def hiddens_to_output_probs(hiddens): output = concat_and_multiply(params['predict'], hiddens) return output - logsumexp(output, axis=1, keepdims=True) num_sequences = inputs.shape[1] hiddens = np.repeat(params['init hiddens'], num_sequences, axis=0) output = [hiddens_to_output_probs(hiddens)] for input in inputs: # Iterate over time steps. hiddens = update_rnn(input, hiddens) output.append(hiddens_to_output_probs(hiddens)) return output
def process_one_batch(inputs, weights): """Process one batch of image sets Recurrent Network process: h1_out = H( W[x_h1]*X(t) + W[h2_h1]*h2_out + bias[h1] ) h2_out = H( W[h1_h2]*h1_out + bias[h2] ) output = H( W[h2_out]*h2_out + bias[out] ) """ batch_size = inputs.shape[1] w_in_2_h1 = parser.get(weights, "input_2_h1") w_h1_2_h2 = parser.get(weights, "h1_2_h2") w_h2_2_out = parser.get(weights, "h2_2_output") h2_out = np.repeat(parser.get(weights, "pre_h2_out"), batch_size, axis=0) outputs = [] for sub_input in inputs: input_x = x_with_bias(np.concatenate((sub_input, h2_out), axis=1)) h1_in = w_act_on_x(input_x, w_in_2_h1) h1_out = hidden_actfun(h1_in) h2_in = w_act_on_x(x_with_bias(h1_out), w_h1_2_h2) h2_out = hidden_actfun(h2_in) output_in = w_act_on_x(x_with_bias(h2_out), w_h2_2_out) output_out = output_actfun(output_in) outputs.append(output_out) return outputs
def fun(x): return to_scalar(np.repeat(x, 1, axis=0)) d_fun = lambda x : to_scalar(grad(fun)(x))
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: for u in range(self.n_unique): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u]).fit(self.n_lags)) ar_alpha.append(ar_mod[u].params[1:]) ar_resid.append(ar_mod[u].resid) else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf]).fit(self.n_lags)) ar_alpha.append(ar_mod[0].params[1:]) ar_resid.append(ar_mod[0].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u] = kmeans[u, 0] - np.dot( np.repeat(kmeans[u, 0], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): if not self.shared_alpha: maxVar = np.max([np.var(ar_resid[i]) for i in range(self.n_unique)]) else: maxVar = np.var(ar_resid[0]) precision_init[u] = 1.0 / maxVar self.precision_ = np.copy(precision_init) if 'a' in params: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = u if self.shared_alpha: ar_idx = 0 alpha_init[u, :] = ar_alpha[ar_idx] self.alpha_ = alpha_init
def backward_pass(self, delta): return np.repeat(delta[:, np.newaxis, :], 2, 1)
def _init_params(self, data, lengths=None, params='stmpaw'): X = data['obs'] if self.n_lags == 0: super(ARTHMM, self)._init_params(data, lengths, params) else: if 's' in params: super(ARTHMM, self)._init_params(data, lengths, 's') if 't' in params: super(ARTHMM, self)._init_params(data, lengths, 't') if 'm' in params or 'a' in params or 'p' in params: kmmod = cluster.KMeans( n_clusters=self.n_unique, random_state=self.random_state).fit(X) kmeans = kmmod.cluster_centers_ ar_mod = [] ar_alpha = [] ar_resid = [] if not self.shared_alpha: count = 0 for u in range(self.n_unique): for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ u,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[count].params[1:]) ar_resid.append(ar_mod[count].resid) count += 1 else: # run one AR model on most part of time series # that has most points assigned after clustering mf = np.argmax(np.bincount(kmmod.labels_)) for f in range(self.n_features): ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \ mf,f]).fit(self.n_lags)) ar_alpha.append(ar_mod[f].params[1:]) ar_resid.append(ar_mod[f].resid) if 'm' in params: mu_init = np.zeros((self.n_unique, self.n_features)) for u in range(self.n_unique): for f in range(self.n_features): ar_idx = u if self.shared_alpha: ar_idx = 0 mu_init[u,f] = kmeans[u, f] - np.dot( np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx]) self.mu_ = np.copy(mu_init) if 'p' in params: precision_init = \ np.zeros((self.n_unique, self.n_features, self.n_features)) for u in range(self.n_unique): if self.n_features == 1: precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u])) else: precision_init[u] = np.linalg.inv\ (np.cov(np.transpose(X[kmmod.labels_ == u]))) # Alternative: Initialization using ar_resid #for f in range(self.n_features): # if not self.shared_alpha: # precision_init[u,f,f] = 1./np.var(ar_resid[count]) # count += 1 # else: # precision_init[u,f,f] = 1./np.var(ar_resid[f])''' self.precision_ = np.copy(precision_init) if 'a' in params: if self.shared_alpha: alpha_init = np.zeros((1, self.n_lags)) alpha_init = ar_alpha[0].reshape((1, self.n_lags)) else: alpha_init = np.zeros((self.n_unique, self.n_lags)) for u in range(self.n_unique): ar_idx = 0 alpha_init[u] = ar_alpha[ar_idx] ar_idx += self.n_features self.alpha_ = np.copy(alpha_init)
def get_pi(beta,X,alpha): linear_pred = np.dot(X,beta[1:]) linear_pred = np.tile(linear_pred,len(alpha)) alpha = np.repeat(alpha,len(np.dot(X,beta[1:]))) return logistic(beta[0]+linear_pred+0*alpha)
def _compute_log_likelihood(self, data, from_=0, to_=-1): ll = self._ll(self.mu_, self.precision_, data['obs'][from_:to_]) rep = self.n_chain return np.repeat(ll, rep).reshape(-1, self.n_unique*rep)