def interpolate(t, y, num_obs=50): """ Interpolates each trajectory such that observation times coincide for each one. Note: initially cubic interpolation gave great power, but this happens as an artifact of the interpolation, as both trajectories have the same number of observations. Type I error was increased as a result. To avoid this we settled for a linear interpolation between observations. Splines were also tried but gave very bad interpolations. """ t = np.array([np.sort(row) for row in t]) t = np.insert(t, 0, 0, axis=1) t = np.insert(t, len(t[0]), 1, axis=1) y = np.insert(y, 0, y[:, 0], axis=1) y = np.insert(y, len(y[0]), y[:, -1], axis=1) new_t = np.zeros(num_obs) new_y = np.zeros(num_obs) for i in range(len(t)): f = interp1d(t[i], y[i], kind='linear') #f = splrep(t[i], y[i]) t_temp = np.random.uniform(low=0.0, high=1.0, size=num_obs) #np.linspace(0.1,0.9,num_obs) y_temp = f(t_temp) #y_temp = splev(t_temp, f, der=0) new_y = np.vstack((new_y, y_temp)) new_t = np.vstack((new_t, t_temp)) return new_t[1:], new_y[1:]
def DEL(new_q, cur_q, prev_q): # SUPER hacky way of adding constrained points for i in pinned_points: new_q = numpy.insert(new_q, i*d, q_initial[i*d]) new_q = numpy.insert(new_q, i*d+1, q_initial[i*d+1]) res = D1_Ld(cur_q, new_q) + D2_Ld(prev_q, cur_q) + mass_matrix @ external_forces # SUPER hacky way of adding constrained points return res[q_mask]
def pre_DEL(new_q, cur_q, prev_q): # SUPER hacky way of adding constrained points for i in pinned_points: new_q = numpy.insert(new_q, i*d, q_initial[i*d]) new_q = numpy.insert(new_q, i*d+1, q_initial[i*d+1]) #res = D1_Ld(cur_q, new_q) + D2_Ld(prev_q, cur_q) + mass_matrix @ external_forces res = discrete_lagrangian(cur_q, new_q) + discrete_lagrangian(prev_q, cur_q)# + mass_matrix @ external_forces # SUPER hacky way of adding constrained points return res
def DEL(new_q, cur_q, prev_q): # SUPER hacky way of adding constrained points for i in pinned_points: x, y = pinned_postions(i) new_q = numpy.insert(new_q, i * d, x) new_q = numpy.insert(new_q, i * d + 1, y) res = D1_Ld(cur_q, new_q) + D2_Ld( prev_q, cur_q) + mass_matrix @ external_forces # SUPER hacky way of adding constrained points return res[q_mask]
def predict(self, X): if (self.fit_intercept == True): bias = np.ones(len(X)) X_new = np.insert(X, 0, bias, axis=1) else: X_new = X Z = -X_new @ self.weights P = softmax(Z, axis=1) return np.argmax(P, axis=1) # (X,Y)= load_digits(return_X_y=True) #sklearn.datasets.load_breast_cancer(return_X_y=True, as_frame=True) # fit model # model = Multiclass() # model.fit_logistic(X, Y) # plot loss # predict # y_hat=model.predict(X) # # check the predicted value and the actual value # print(accuracy(y_hat,Y))
def fit_L2_regularized(self, X, y,lamda, n_iter=10000, lr=0.0001, lr_type='constant'): # checking whether X and y have same number of samples assert (len(X) == len(y)) self.lambda2=lamda # update x based on intercept term self.y=y X_=X.copy() if(self.fit_intercept): bias = np.ones(len(X_)) X_=np.insert(X_,0,bias,axis=1) self.X_=X_ self.theta=np.ones(self.X_.shape[1]) self.theta=self.theta/2 n_samples=self.X_.shape[0] # updating the learning rate based on lr_type for iter in range(1,n_iter+1): if lr_type=='inverse': curr_lr=lr/iter else: curr_lr = lr # updating the coefficients gradient=grad(self.L2_regularised_loss) self.theta=self.theta-(curr_lr)*(gradient(self.theta))
def fit(self, X, y, n_iter=2000, lr=np.e**-5, lr_type='constant'): assert (len(X) == len(y)) # update x based on intercept term X_ = X.copy() if(self.fit_intercept): bias = np.ones((len(X_))) X_=np.insert(X_,0,bias,axis=1) theta=np.zeros(X_.shape[1]) n_samples=X_.shape[0] # updating the learning rate based on lr_type for iter in range(1,n_iter+1): if lr_type=='inverse': curr_lr=lr/iter else: curr_lr = lr # print(np.dot(X_,theta)) y_pred=sigmoid(np.dot(X_,theta)) # updating the coefficients # print(y_pred) theta-=(curr_lr)*np.dot(X_.T,y_pred-y) self.theta=theta self.X_=X_
def view(self): '''print the estimates already computed''' if len(self.results)== 0: raise ValueError("no results yet") inter = pd.DataFrame() for key,elt in self.results.items(): inter[key] = elt[0] inter = inter.T if self.size > len(self.names_pred): inter.columns = np.insert(self.names_pred,0,"error prior") elif self.cond_model.name == "Multilogistic": inter.columns = np.insert(self.names_pred,0,"intercept") else: inter.columns = self.names_pred inter = inter.T return inter
def fit_unregularized_autograd(self,X,y,n_iter=4000,lr=(np.e)**-5,lr_type='constant'): assert (len(X) == len(y)) # update x based on intercept term self.y=y X_ = X.copy() if(self.fit_intercept): bias = np.ones((len(X_))) X_=np.insert(X_,0,bias,axis=1) theta=np.ones(X_.shape[1]) theta=theta/2 n_samples=X_.shape[0] self.X_=X_ # updating the learning rate based on lr_type for iter in range(1,n_iter+1): if lr_type=='inverse': curr_lr=lr/iter else: curr_lr = lr # updating the coefficients gradient=grad(self.unreguralised_loss) theta-=(curr_lr) * gradient(theta) self.theta=theta
def fit(self, x, y, solver="SGD"): x_ = np.insert(x, 0, values=np.ones(x.shape[0]), axis=1) # 在特征空间的最前面一列添加一列1 y_ = np.reshape(y, [-1, 1]) # self.N = x_.shape[0] # 保存数据长度 self.Weights = np.ones([x_.shape[1], 1]) # 初始化参数列表 if solver == 'SGD': self._stocGradAscent(x_, y_)
def predict(self, X): X_ = X.copy() if self.fit_intercept: bias = np.ones(len(X_)) X_ = np.insert(X_, 0, bias, axis=1) z = -np.dot(X_, self.weights) p = softmax(z) return np.argmax(p, axis=1)
def fit(self, X, Y, solver="SGD"): X_ = np.insert(X, 0, values=np.ones(X.shape[0]), axis=1) Y_ = np.reshape(Y, [-1, 1]) self.N = X_.shape[0] self.Weights = np.ones([X_.shape[1], 1]) if solver == "SGD": self._stocGradAscent(X_, Y_) else: self._gradAscent(X_, Y_)
def predict(self, X): X_ = np.insert(X, 0, values=np.ones(X.shape[0]), axis=1) y_pred = np.zeros(X_.shape[0]) h = 1.0 / (1 + np.exp(-(X_ @ self.Weights))) for i in range(h.shape[0]): if h[i] >= 0.5: y_pred[i] = 1 else: y_pred[i] = 0 return y_pred
def predict(self, x): x_ = np.insert(x, 0, values=np.ones(x.shape[0]), axis=1) y_pred = np.zeros(x_.shape[0]) # h = 1.0 / (1 + np.exp((x_ @ self.Weights))) h = np.exp(x_ @ self.Weights) * 1.0 / (1 + np.exp((x_ @ self.Weights))) for i in range(h.shape[0]): if h[i] >= 0.5: y_pred[i] = 1 else: y_pred[i] = 0 return y_pred
def setUp(self,X,y): X_train = X.copy() y_train = y.copy() self.k = len(np.unique(y_train)) self.X = X_train self.y = y_train self.X = np.insert(self.X, 0, values=1, axis=1) self.n,self.p = self.X.shape
def predict(self,X_test): X=X_test.copy() if self.fit_intercept: bias=np.ones(len(X)) X=np.insert(X,0,bias,axis=1) y_hat=sigmoid(np.dot(X,self.theta)) for i in range(0,len(y_hat)): if y_hat[i]>=0.5: y_hat[i]=int(1) else: y_hat[i]=int(0) return y_hat
def resize_traj(tc, tk, xk, yk, thetak): modified = True i = 0 while (modified and i < 100): modified = False n = 0 while n < tk.shape[0]: if (tk[n] > tc * 1.1) and (tk.shape[0] < 300): new_dt = 0.5 * tk[n] tk[n] = new_dt new_x = 0.5 * (xk[n + 1] + xk[n]) new_y = 0.5 * (yk[n + 1] + yk[n]) new_theta = avg_angle(thetak[n], thetak[n + 1]) tk = np.insert(tk, n, new_dt) xk = np.insert(xk, n + 1, new_x) yk = np.insert(yk, n + 1, new_y) thetak = np.insert(thetak, n + 1, new_theta) modified = True n += 1 n += 1 i += 1 return tk, xk, yk, thetak
def interpolate(t, y, num_obs=5): """ Interpolates each trajectory with a cubic function such that observation times coincide for each one """ if isinstance(t, list): t, y = np.array(t), np.array(y) t = [np.insert(t[i], 0, 0, axis=0) for i in range(len(t))] t = [np.insert(t[i], len(t[i]), 1, axis=0) for i in range(len(t))] y = [np.insert(y[i], 0, y[i][0], axis=0) for i in range(len(y))] y = [np.insert(y[i], len(y[i]), y[0][-1], axis=0) for i in range(len(y))] new_t = np.zeros(num_obs) new_y = np.zeros(num_obs) for i in range(len(t)): f = interp1d(t[i], y[i], kind='cubic') t_temp = np.linspace(0.1, 0.9, num=num_obs, endpoint=True) y_temp = f(t_temp) new_y = np.vstack((new_y, y_temp)) new_t = np.vstack((new_t, t_temp)) return new_t[1:], new_y[1:]
def wavelet(df): # Gaussian Regression result = pd.DataFrame() mjds = df['mjd'].unique() # Two observations per unique mjd value t = np.arange(np.min(mjds), np.max(mjds), 0.5) if (len(t) % 2) == 0: t = np.insert(t, len(t), t[len(t) - 1] + 0.5) for obj, agg_df in df.groupby('object_id'): agg_df = agg_df.sort_values(by=['mjd']) X = agg_df['mjd'] Y = agg_df['flux'] Yerr = agg_df['flux_err'] # Start by setting hyperparamaters to unit: log_sigma = 0 log_rho = 0 kernel = celerite.terms.Matern32Term(log_sigma, log_rho) # According to the paper from Narayan et al, 2018, we will use the Matern 3/2 Kernel. gp = celerite.GP(kernel, mean=0.0) gp.compute(X, Yerr) # extract our initial guess at parameters # from the celerite kernel and put it in a # vector: p0 = gp.get_parameter_vector() # run optimization: results = minimize(nll, p0, method='L-BFGS-B', jac=grad_nll, args=(Y, gp)) # set your initial guess parameters # as the output from the scipy optimiser # remember celerite keeps these in ln() form! gp.set_parameter_vector(np.abs(results.x)) # Predict posterior mean and variance mu, var = gp.predict(Y, t, return_var=True) if (sum(np.isnan(mu)) != 0): print('NANs exist in mu vector') return [obj, results.x, mu] # Wavelet Transform # calculate wavelet transform using even numbered array (cA2, cD2), (cA1, cD1) = pywt.swt(mu[1:, ], 'sym2', level=2) obj_df = pd.DataFrame(list(cA2) + list(cA1) + list(cD2) + list(cD1)).transpose() obj_df['object_id'] = obj result = pd.concat([result, obj_df]) result.reset_index(inplace=True) result.drop("index", axis=1, inplace=True) return result
def fit_multiclass_autograd(self, X, y, n_iter, lr): N = len(X.index) Nf = len(X.columns) X = np.array(X) y = np.array(y) self.classes = np.unique(y) num_classes = len(self.classes) curr_coeff = np.zeros((num_classes,Nf+1)) for i in range(n_iter): self.X_auto = np.insert(X, 0, 1, axis=1) self.y_auto = y mse_auto = grad(self.error_function_multiclass) dmse = mse_auto(curr_coeff) curr_coeff -= lr*dmse self.coef = curr_coeff
def fit_logistic(self, X, Y, n_iters=100, lr=0.1): self.Y_onehot = onehot_encoder.fit_transform(Y.reshape(-1, 1)) if (self.fit_intercept == True): bias = np.ones(len(X)) X_new = np.insert(X, 0, bias, axis=1) else: X_new = X self.X = X_new weights = np.zeros((self.X.shape[1], self.Y_onehot.shape[1])) for i in range(n_iters): weights -= lr * self.gradient(self.X, self.Y_onehot, weights) self.weights = weights return weights
def train(self, x, y): """ train function :param x: :param y: :return: """ x, self.x_avg, self.x_std = data_standardization(x) y, self.y_avg, self.y_std = label_standardization(y) x = np.insert(x, 0, values=1, axis=1) y = y.reshape((1, -1))[0] self.w = np.random.rand(1, len(x[0])) # register optimizer self.optimizer.register_model(self.fit) self.optimizer.register_loss(self.loss) n_batch = int(np.ceil(len(x) / self.batch_size)) best_err = sys.maxsize for i in range(self.num_iterations): # early stopping if self.early_stopping and self.optimizer.learning_rate < 1e-8: break randomize = list(range(len(x))) np.random.shuffle(randomize) x = x[randomize] y = y[randomize] # iterating for batch for j_batch in range(n_batch): end = j_batch * self.batch_size + self.batch_size if end > len(x): end = len(x) x_batch = x[j_batch * self.batch_size:end] y_batch = y[j_batch * self.batch_size:end] self.w = self.optimizer.step(self.w, x_batch, y_batch) y_ = self.fit(self.w, x) err = self.loss.errors(y, y_) print('Epoch {} err={}'.format(i, err)) if err < best_err - 0.05: best_err = err else: self.optimizer.learning_rate /= self.learning_rate_decay print('learning rate from {} to {}'.format( self.optimizer.learning_rate * 10, self.optimizer.learning_rate))
def fit(self, X, y, n_iter, lr): N = len(X.index) Nf = len(X.columns) X = np.array(X) y = np.array(y) curr_coeff = np.zeros(Nf+1) for i in range(n_iter): for j in range(N): curr_X = X[j] curr_X = np.insert(np.transpose(curr_X), 0, 1) #Adding extra 1 for the ease of bias calculation of theta curr_y = y[j] X_theta = np.dot(curr_coeff, curr_X) X_diff = self.sigmoid(X_theta) - curr_y X_copy = np.empty(len(curr_X)) X_copy.fill(X_diff) errors = np.multiply(X_copy,curr_X) curr_coeff -= lr*errors self.coef = curr_coeff
def fit_autograd(self, X, y, n_iter, lr): N = len(X.index) Nf = len(X.columns) X = np.array(X) y = np.array(y) curr_coeff = np.zeros(Nf+1) self.coef = curr_coeff for i in range(n_iter): curr_X = np.insert(X, 0, 1, axis=1) self.X_auto = np.transpose(curr_X) self.y_auto = y if(self.reg == "L1"): mse_auto = elementwise_grad(self.error_function_L1) elif(self.reg == "L2"): mse_auto = grad(self.error_function_L2) else: mse_auto = grad(self.error_function) dmse = mse_auto(curr_coeff) curr_coeff -= lr*dmse self.coef = curr_coeff
def add_missing_paths(k, init_paths, init_nb_paths): ''' Add the paths that have been given zeros probabily during init k (dict of list): The number of components on each layer of each head and tail init_paths (ndarray): The already existing non-zero probability paths init_nb_paths (list of Bool): takes the value 1 if the path existed 0 otherwise --------------------------------------------------------------------------------- returns (tuple of size 2): The completed lists of paths (ndarray) and the total number of paths (1d array) ''' L = len(k) all_possible_paths = list(product(*[np.arange(k[l]) for l in range(L)])) existing_paths = [tuple(path.astype(int)) for path in init_paths] # Turn them as a list of tuple nb_existing_paths = deepcopy(init_nb_paths) for idx, path in enumerate(all_possible_paths): if not (path in existing_paths): existing_paths.insert(idx, path) nb_existing_paths = np.insert(nb_existing_paths, idx, 0, axis=0) return existing_paths, nb_existing_paths
def simmed_ps(Sigma, x, n, K): nm = max(n) p = np.clip(x / n, 1 / float(nm + 1), float(nm - 1) / float(nm + 1)) Omega = np.diag([pi * (1 - pi) / ni for ni, pi in zip(n, p)]) Omegai = np.linalg.inv(Omega) Omega_s = Omega[1:, 1:] Omegai_s = Omegai[1:, 1:] obs = np.array(p[1:]) Sigmai = np.linalg.inv(Sigma) Var = np.linalg.inv(Omegai_s + Sigmai) print Var res = np.zeros((K, len(n))) qs = [] for i in range(K): p0 = norm.rvs(p[0], scale=Omega[0, 0]) q = norm.pdf(p0, p[0], Omega[0, 0]) p0_vec = np.array([p0] * (len(x) - 1)) m = np.dot(Var, np.dot(Omegai_s, obs) + np.dot(Sigmai, p0_vec)) p_rest = multivariate_normal.rvs(mean=m, cov=Var) q *= multivariate_normal.pdf(p_rest, mean=m, cov=Var) res[i, :] = np.insert(p_rest, 0, p0) qs.append(q) return res, qs
def fit_multiclass(self, X, y, n_iter, lr): N = len(X.index) Nf = len(X.columns) X = np.array(X) y = np.array(y) self.classes = np.unique(y) num_classes = len(self.classes) curr_coeff = np.zeros((num_classes, Nf+1)) X_copy = np.empty(Nf+1) for i in range(n_iter): for j in range(N): curr_X = X[j] curr_X = np.transpose(curr_X) curr_X = np.insert(curr_X, 0, 1) #Adding extra 1 for the ease of bias calculation of theta curr_y = y[j] X_theta_sum = np.sum(np.exp(np.dot(curr_coeff, curr_X))) for k in range(num_classes): X_theta_exp = np.exp(np.dot(curr_coeff[k], curr_X)) P_k = X_theta_exp/X_theta_sum X_diff = (1 if curr_y==self.classes[k] else 0) - P_k X_copy.fill(X_diff) errors = np.multiply(X_copy,curr_X) curr_coeff[k] += lr*errors self.coef = curr_coeff
def fit(self, X, y, n_iter=1000, lr=0.001): X_ = X.copy() if self.fit_intercept: bias = np.ones(len(X_)) X_ = np.insert(X_, 0, bias, axis=1) self.X_ = X_ self.y_encoded = onehotencoder.fit_transform(y.reshape(-1, 1)) self.n_samples = len(X_) # initializing the weights weights = np.zeros((X_.shape[1], self.y_encoded.shape[1])) for i in range(n_iter): if self.opt == 'autograd': gradient = grad(self.loss) weights -= ((lr) * (gradient(weights))) elif self.opt == 'grad_desc': z = -np.dot(X_, weights) p = softmax(z) weights -= (lr / self.n_samples) * (X_.T @ (self.y_encoded - p)) self.weights = weights
def hom_2d_to_3d(pts): pts = np.insert(pts,2,np.zeros(pts.shape[1]),0) return pts
image_id = np.array(b.core_data['visual_stimuli']['image_name']) selectedTrials = (b.hit | b.miss) & ( ~b.ignore ) #Omit "ignore" trials (aborted trials when the mouse licked too early or catch trials when the image didn't actually change) active_changeTimes = b.frameAppearTimes[ np.array(b.trials['change_frame'][selectedTrials]).astype(int) + 1] #add one here to correct for a one frame shift in frame times from camstim binned_activeChangeTimes = binVariable(active_changeTimes, binwidth) if restrictToChange: changeBins = getChangeBins(binned_activeChangeTimes, binwidth) else: changeBins = npo.ones(binned_activeChangeTimes.size).astype(npo.bool) lick_times = b.lickTimes first_lick_times = lick_times[np.insert(np.diff(lick_times) >= 0.5, 0, True)] reward_frames = b.core_data['rewards']['frame'].values reward_times = b.vsyncTimes[reward_frames] flash_times = b.frameAppearTimes[np.array( b.core_data['visual_stimuli']['frame'])] eventsToInclude = [('change', [active_changeTimes, 8, 0.8, 0.1, -0.2]), ('licks', [lick_times, 5, 0.6, 0.1, -0.3]), ('first_licks', [first_lick_times, 10, 2, 0.2, -1]), ('running', [[b.behaviorRunSpeed.values, b.behaviorRunTime], 5, 2, 0.4, 0]), ('reward', [reward_times, 10, 2, 0.2, -1])] for img in np.unique(image_id):
def get_pi(beta,xi,alpha_i): xi = np.insert(xi, 0, 1) return logistic(np.dot(beta,xi))#+alpha_i)