def tryout_lams(self, lams, **kwargs): # choose number of rounds self.lams = lams num_rounds = len(lams) # container for costs and weights self.cost_vals = [] self.weights = [] # reset initialization self.w_init = 0.1 * np.random.randn(self.x.shape[0] + 1, 1) # loop over lams and try out each for i in range(num_rounds): # set lambda lam = self.lams[i] self.cost.set_lambda(lam) # load in current model w_hist, c_hist = self.optimizer(self.cost.cost, self.x, self.w_init) # determine smallest cost value attained ind = np.argmin(c_hist) weight = w_hist[ind] cost_val = c_hist[ind] self.weights.append(weight) self.cost_vals.append(cost_val) # determine best value of lamba from the above runs ind = np.argmin(self.cost_vals) self.best_lam = self.lams[ind] self.best_weights = self.weights[ind]
def compute_sample_LLs(self, NNs, zs, w_m, w_v, z_m, z_v, e_v, x, y, N): # Compute likelihood factors f_ws, f_zs = self.compute_LL_factors(NNs, zs, w_m, w_v, z_m, z_v, N) # Calculate likelihoods for every data-pair in the batch lls = [] denom = 2 * e_v for k in range(0, self.K): # Append random features z to x x_z = np.concatenate((x, zs[k]), axis=1) out = NNs[k].execute(x_z) nom = np.square(y - out) ll = np.exp(-nom / denom) / (np.sqrt(2 * np.pi * e_v)) + 1e-10 # Multiply multi-dimensional output if applicable ll = np.prod(ll, axis=1, keepdims=True) if (ll == 0).any() == True: print('Warning: A likelihood is zero.', np.argmin(ll)) self.errormsg.append('one ll is zero.', np.argmin(ll)) # Include alpha and divide by likelihood factors factored_ll = (ll**self.alpha / (f_ws[k] * f_zs[k])) if (factored_ll == 0).any() == True: print('Warning: A factored likelihood is zero.') self.errormsg.append('one f_ll is zero.') lls.append(factored_ll) return lls
def animate(k): # clear panels ax.cla() lam = lams[k] # print rendering update if np.mod(k + 1, 25) == 0: print('rendering animation frame ' + str(k + 1) + ' of ' + str(num_frames)) if k == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() # run optimization if algo == 'gradient_descent': weight_history, cost_history = self.gradient_descent( g, w, self.x, self.y, lam, alpha_choice, max_its, batch_size) if algo == 'RMSprop': weight_history, cost_history = self.RMSprop( g, w, self.x, self.y, lam, alpha_choice, max_its, batch_size) # choose set of weights to plot based on lowest cost val ind = np.argmin(cost_history) # classification? then base on accuracy if 'counter' in kwargs: # create counting cost history as well counts = [ counter(v, self.x, self.y, lam) for v in weight_history ] if k == 0: ind = np.argmin(counts) count = counts[ind] acc = 1 - count / self.y.size acc = np.round(acc, 2) # save lowest misclass weights w_best = weight_history[ind][1:] # plot ax.axhline(c='k', zorder=2) # make bar plot ax.bar(np.arange(0, len(w_best)), w_best, color='k', alpha=0.5) # dress panel title1 = r'$\lambda = ' + str(np.round(lam, 2)) + '$' costval = cost_history[ind][0] title2 = ', cost val = ' + str(np.round(costval, 2)) if 'counter' in kwargs: title2 = ', accuracy = ' + str(acc) title = title1 + title2 ax.set_title(title) ax.set_xlabel('learned weights') return artist,
def optimal_rotation_new(Y, p): """Choose the correct representative from each equivalence class. Parameters ---------- Y : ndarray (d*n) Data, with one COLUMN for each of the n data points in d-dimensional space. w : ndarray (d*d) Rotation matrix for p-th root of unity. p : int Order of cyclic group. Returns ------- S : ndarray (n*n) Correct power of w to use for each inner product. Satisfies S + S.T = 0 (mod p) """ # (I'm not convinced this method is actually better, algorithmically.) # Convert Y to complex form: Ycplx = Y[0::2] + 1j * Y[1::2] cplx_ip = Ycplx.T @ Ycplx.conjugate() ip_angles = np.angle(cplx_ip) ip_angles[ip_angles < 0] += 2 * np.pi #np.angles uses range -pi,pi root_angles = np.linspace(0, 2 * np.pi, p + 1) S = np.zeros(ip_angles.shape) for i in range(ip_angles.shape[0]): for j in range(ip_angles.shape[1]): S[i, j] = np.argmin(np.abs(ip_angles[i, j] - root_angles)) S[S == p] = 0 S = S.T # Want the angle to act on the second component. return S
def proj_onto_line(self, w): w_c = copy.deepcopy(w) w_0 = -w_c[0] / w_c[ 2] # amount to subtract from the vertical of each point # setup line to project onto w_1 = -w_c[1] / w_c[2] line_pt = np.asarray([1, w_1]) line_pt.shape = (2, 1) line_hat = line_pt / np.linalg.norm(line_pt) line_hat.shape = (2, 1) # loop over points, compute distance of projections dists = [] for j in range(len(self.y)): pt = copy.deepcopy(self.x[j]) pt[1] -= w_0 pt.shape = (2, 1) proj = np.dot(line_hat.T, pt) * line_hat proj.shape = (2, 1) d = np.linalg.norm(proj - pt) dists.append(d) # find smallest distance to class point ind = np.argmin(dists) pt_min = copy.deepcopy(self.x[ind]) # create new intercept coeff pt_min[1] -= w_0 w_new = -w_1 * pt_min[0] + pt_min[1] return w_new
def figure_subspace_update(self, T, x_true=None): self._fig_subspace.clf() if x_true: X_true = np.array([x_true[k] for k in range(0, T + 1)]) X_true = X_true.squeeze().T X_pred = np.array([self._mu[k] for k in range(0, T + 1)]) X_pred = X_pred.squeeze().T X_pred = X_pred.reshape(self._r, T + 1) used = set() for l in range(self._r): ax = self._fig_subspace.add_subplot(2, int((self._r + 1) / 2), l + 1) if x_true: pl = np.argmin([ np.linalg.norm(X_true[l] - X_pred[m]) if not m in used else np.inf for m in range(self._r) ]) ax.plot(np.squeeze(X_true[l, :]), color="#004488", alpha=0.7) ax.plot(np.squeeze(X_pred[pl, :]), "--", color="#bb5566") used.add(pl) else: ax.plot(np.squeeze(X_pred[l, :]), "--", color="#bb5566") ax.axis("off") plt.pause(0.01)
def factorize_tuned(W, eps, Q0, iters=250, verbose=True): """ Optimize the strategy matrix for the given workload and epsilon. Use this function if you **do not** have a good guess for the learning rate gamma :param W: the workload matrix (p x n) :param eps: the privacy budget (scalar) :param Q0: the initial strategy (m x n) """ gamma = 1.0 f = worst_variance(W, Q0, 'opt') while gamma > 1e-50: try: Q = factorize(W, eps, Q0, 50, gamma, verbose) break except: pass gamma *= 0.5 Q1 = Q Q2 = factorize(W, eps, Q0, 50, gamma / 2, verbose) Q3 = factorize(W, eps, Q0, 50, gamma / 4, verbose) Qs = [Q1, Q2, Q3] gammas = [gamma, gamma / 2, gamma / 4] fs = [worst_variance(W, Q, 'opt') for Q in Qs] i = np.argmin(fs) Q = factorize(W, eps, Qs[i], iters, gammas[i], verbose) return Q
def fit(self, x_train, y_train, params, reg_param=None): ''' Wrapper for MLE through gradient descent ''' assert x_train.shape[0] == self.params['D_in'] assert y_train.shape[0] == self.params['D_out'] ### make objective function for training self.objective, self.gradient = self.make_objective(x_train, y_train, reg_param) ### set up optimization step_size = 0.01 max_iteration = 5000 check_point = 100 weights_init = self.weights.reshape((1, -1)) mass = None optimizer = 'adam' random_restarts = 5 if 'step_size' in params.keys(): step_size = params['step_size'] if 'max_iteration' in params.keys(): max_iteration = params['max_iteration'] if 'check_point' in params.keys(): self.check_point = params['check_point'] if 'init' in params.keys(): weights_init = params['init'] if 'call_back' in params.keys(): call_back = params['call_back'] if 'mass' in params.keys(): mass = params['mass'] if 'optimizer' in params.keys(): optimizer = params['optimizer'] if 'random_restarts' in params.keys(): random_restarts = params['random_restarts'] def call_back(weights, iteration, g): ''' Actions per optimization step ''' objective = self.objective(weights, iteration) self.objective_trace = np.vstack((self.objective_trace, objective)) self.weight_trace = np.vstack((self.weight_trace, weights)) if iteration % check_point == 0: mag = np.linalg.norm(self.gradient(weights, iteration)) # print("Iteration {} lower bound {}; gradient mag: {}".format(iteration, objective, mag)) ### train with random restarts optimal_obj = 1e16 optimal_weights = self.weights for i in range(random_restarts): if optimizer == 'adam': adam(self.gradient, weights_init, step_size=step_size, num_iters=max_iteration, callback=call_back) local_opt = np.min(self.objective_trace[-100:]) if local_opt < optimal_obj: opt_index = np.argmin(self.objective_trace[-100:]) self.weights = self.weight_trace[-100:][opt_index].reshape((1, -1)) weights_init = self.random.normal(0, 1, size=(1, self.D)) self.objective_trace = self.objective_trace[1:] self.weight_trace = self.weight_trace[1:]
def assign_to_modal_uparams(this_uparam, modal_uparam): try: mid_pts = 0.5 * (modal_uparam[1:] + modal_uparam[:-1]) bins = np.concatenate(((-np.inf, ), mid_pts, (np.inf, ))) inds_in_modal = np.digitize(this_uparam, bins) - 1 numerical = True except: print('non-numerical parameter') numerical = False if numerical: uinds = np.unique(inds_in_modal) inds_in_this = np.zeros((0, ), dtype='int') for uind in uinds: candidates = np.where(inds_in_modal == uind)[0] dist_from_modal = np.abs(this_uparam[candidates] - modal_uparam[uind]) to_keep = candidates[np.argmin(dist_from_modal)] inds_in_this = np.concatenate((inds_in_this, (to_keep, ))) inds_in_modal = inds_in_modal[inds_in_this] bool_in_this = np.zeros((len(this_uparam), ), dtype='bool') bool_in_modal = np.zeros((len(modal_uparam), ), dtype='bool') bool_in_this[inds_in_this] = True bool_in_modal[inds_in_modal] = True else: assert (np.all(this_uparam == modal_uparam)) bool_in_this, bool_in_modal = [ np.ones(this_uparam.shape, dtype='bool') for iparam in range(2) ] return bool_in_this, bool_in_modal
def __init__(self, name, x, y, feature_transforms, runs): # point to input/output for cost functions self.x = x self.y = y # make copy of feature transformation self.feature_transforms = feature_transforms # compute representation so far self.rep = 0 if len(runs) == 0: self.rep = 0 else: for i in range(len(runs)): # get current run run = runs[i] cost = run.cost model = run.model feat = run.feature_transforms normalizer = run.normalizer cost_history = run.train_cost_histories[0] weight_history = run.weight_histories[0] # get best weights win = np.argmin(cost_history) w = weight_history[win] self.rep += model(self.x, w) # count parameter layers of input to feature transform self.sig = signature(self.feature_transforms) ### make cost function choice ### # for regression if name == 'least_squares': self.cost = self.least_squares if name == 'least_absolute_deviations': self.cost = self.least_absolute_deviations # for two-class classification if name == 'softmax': self.cost = self.softmax if name == 'perceptron': self.cost = self.perceptron if name == 'twoclass_counter': self.cost = self.counting_cost # for multiclass classification if name == 'multiclass_perceptron': self.cost = self.multiclass_perceptron if name == 'multiclass_softmax': self.cost = self.multiclass_softmax if name == 'multiclass_counter': self.cost = self.multiclass_counting_cost # for autoencoder if name == 'autoencoder': self.feature_transforms = feature_transforms self.feature_transforms_2 = kwargs['feature_transforms_2'] self.cost = self.autoencoder
def plot_model(self): ''' Visualization of a best fit model ''' ind = np.argmin(self.costs) least_weights = self.weights[0][ind] Plotter.Model(self.x, self.y, least_weights, self.costs[0], self.normalizer, self.model)
def worst(): ans = nnm.answer(testing_images) for digit in range(10): where = np.argmax(testing_labels, 1) == digit idx = np.argmin(ans[where, digit]) plt.subplot(2, 5, digit + 1) plt_image(testing_images[where][idx].reshape([28, 28])) print digit, ans[where][idx] plt.show()
def response(params, inputs=None, targets=None, channels=None, hps=None): if np.any(targets) == None: targets = inputs return np.argmin(np.sum(np.square( np.subtract( targets, forward(params, inputs=inputs, channels=channels, hps=hps)[-1])), axis=2, keepdims=True), axis=0)[:, 0]
def draw_fit(self,ax,run,ind): # viewing ranges xmin1 = min(copy.deepcopy(self.x[0,:])) xmax1 = max(copy.deepcopy(self.x[0,:])) xgap1 = (xmax1 - xmin1)*0.05 xmin1 -= xgap1 xmax1 += xgap1 xmin2 = min(copy.deepcopy(self.x[1,:])) xmax2 = max(copy.deepcopy(self.x[1,:])) xgap2 = (xmax2 - xmin2)*0.05 xmin2 -= xgap2 xmax2 += xgap2 ymin = min(copy.deepcopy(self.y)) ymax = max(copy.deepcopy(self.y)) ygap = (ymax - ymin)*0.05 ymin -= ygap ymax += ygap # plot boundary for 2d plot r1 = np.linspace(xmin1,xmax1,300) r2 = np.linspace(xmin2,xmax2,300) s,t = np.meshgrid(r1,r2) s = np.reshape(s,(np.size(s),1)) t = np.reshape(t,(np.size(t),1)) h = np.concatenate((s,t),axis = 1).T # plot total fit cost = run.cost model = run.model feat = run.feature_transforms normalizer = run.normalizer cost_history = run.train_cost_histories[0] weight_history = run.weight_histories[0] # get best weights win = np.argmin(cost_history) w = weight_history[win] model = lambda b: run.model(normalizer(b),w) z = model(h) z = np.sign(z) # reshape it s.shape = (np.size(r1),np.size(r2)) t.shape = (np.size(r1),np.size(r2)) z.shape = (np.size(r1),np.size(r2)) #### plot contour, color regions #### ax.contour(s,t,z,colors='k', linewidths=2.5,levels = [0],zorder = 2) ax.contourf(s,t,z,colors = [self.colors[1],self.colors[0]],alpha = 0.15,levels = range(-1,2)) ### cleanup left plots, create max view ranges ### ax.set_xlim([xmin1,xmax1]) ax.set_ylim([xmin2,xmax2]) ax.set_title(str(ind+1) + ' units fit to data',fontsize = 14)
def worst(): ans = nnm.answer(testing_images) for digit in range(10): where = np.argmax(testing_labels, 1) == digit idx = np.argmin(ans[where, digit]) plt.subplot(2, 5, digit+1) plt_image(testing_images[where][idx].reshape([28, 28])) print digit, ans[where][idx] plt.show()
def draw_fit_trainval(self,ax,run,plot_fit): # set plotting limits xmax = np.max(copy.deepcopy(self.x)) xmin = np.min(copy.deepcopy(self.x)) xgap = (xmax - xmin)*0.1 xmin -= xgap xmax += xgap ymax = np.max(copy.deepcopy(self.y)) ymin = np.min(copy.deepcopy(self.y)) ygap = (ymax - ymin)*0.3 ymin -= ygap ymax += ygap ####### plot total model on original dataset ####### # scatter original data - training and validation sets train_inds = run.train_inds valid_inds = run.val_inds ax.scatter(self.x[:,train_inds],self.y[:,train_inds],color = self.colors[1],s = 40,edgecolor = 'k',linewidth = 0.9) ax.scatter(self.x[:,valid_inds],self.y[:,valid_inds],color = self.colors[0],s = 40,edgecolor = 'k',linewidth = 0.9) if plot_fit == True: # plot fit on residual s = np.linspace(xmin,xmax,2000)[np.newaxis,:] # plot total fit t = 0 # get current run cost = run.cost model = run.model feat = run.feature_transforms normalizer = run.normalizer cost_history = run.train_cost_histories[0] weight_history = run.weight_histories[0] # get best weights win = np.argmin(cost_history) w = weight_history[win] t = model(normalizer(s),w) ax.plot(s.T,t.T,linewidth = 4,c = 'k') ax.plot(s.T,t.T,linewidth = 2,c = 'r') lam = run.lam ax.set_title( 'lam = ' + str(np.round(lam,2)) + ' and fit to original',fontsize = 14) if plot_fit == False: ax.set_title('test',fontsize = 14,color = 'w') ### clean up panels ### ax.set_xlim([xmin,xmax]) ax.set_ylim([ymin,ymax]) # label axes ax.set_xlabel(r'$x$', fontsize = 14) ax.set_ylabel(r'$y$', rotation = 0,fontsize = 14,labelpad = 15)
def draw_boundary(self, ax, runs, ind): ### create boundary data ### # get visual boundary xmin1 = np.min(self.x[0, :]) xmax1 = np.max(self.x[0, :]) xgap1 = (xmax1 - xmin1) * 0.05 xmin1 -= xgap1 xmax1 += xgap1 xmin2 = np.min(self.x[1, :]) xmax2 = np.max(self.x[1, :]) xgap2 = (xmax2 - xmin2) * 0.05 xmin2 -= xgap2 xmax2 += xgap2 # plot boundary for 2d plot r1 = np.linspace(xmin1, xmax1, 300) r2 = np.linspace(xmin2, xmax2, 300) s, t = np.meshgrid(r1, r2) s = np.reshape(s, (np.size(s), 1)) t = np.reshape(t, (np.size(t), 1)) h = np.concatenate((s, t), axis=1) # plot total fit a = 0 for i in range(ind + 1): # get current run run = runs[i] cost = run.cost model = run.model feat = run.feature_transforms normalizer = run.normalizer cost_history = run.train_cost_histories[0] weight_history = run.weight_histories[0] # get best weights win = np.argmin(cost_history) w = weight_history[win] a += model(normalizer(h.T), w) # compute model on train data z1 = np.sign(a) # reshape it s.shape = (np.size(r1), np.size(r2)) t.shape = (np.size(r1), np.size(r2)) z1.shape = (np.size(r1), np.size(r2)) #### plot contour, color regions #### ax.contour(s, t, z1, colors='k', linewidths=2.5, levels=[0], zorder=2) ax.contourf(s, t, z1, colors=[self.colors[1], self.colors[0]], alpha=0.15, levels=range(-1, 2))
def booster(self, x, y, alpha, its): ''' Coordinate descent for Least Squares x - the Px(N+1) data matrix y - the Px1 output vector ''' # cost function for tol checking g = lambda w: np.sum(np.log(1 + np.exp(-y * np.dot(x, w)))) # settings N = np.shape(x)[1] # length of weights w = np.zeros((N, 1)) # initialization w_history = [copy.deepcopy(w)] # record each weight for plotting # outer loop - each is a sweep through every variable once for i in range(its): ### inner loop - each is a single variable update cost_vals = [] w_vals = [] # update weights for n in range(N): # compute numerator of newton update temp1 = x[:, n:n + 1] * y temp2 = y * np.dot(x, w) temp2 = [np.exp(v) for v in temp2] numer = -np.sum( np.asarray([v / (1 + r) for v, r in zip(temp1, temp2)])) # compute denominator temp3 = [v / (1 + v)**2 for v in temp2] temp4 = x[:, n:n + 1]**2 denom = np.sum( np.asarray([v * r for v, r in zip(temp3, temp4)])) # record newton step w_n = w[n] - numer / denom w_vals.append(w_n) # record corresponding cost val w[n] += copy.deepcopy(w_n) g_n = g(w) cost_vals.append(g_n) w[n] -= copy.deepcopy(w_n) # take best ind = np.argmin(cost_vals) w[ind] += alpha * w_vals[ind] # record weights at each step for kicks w_history.append(copy.deepcopy(w)) return w_history
def subset_cv(sub_list, test_x, test_y, train_x, train_y, samp, num_predictors, n_ul = 0, x_ul = 0): """ In MTL, preturn subset using cross-validation. """ scores = [] fold = 2 kf = KFold(test_x.shape[0],n_folds = fold) for s in sub_list: scores_temp = [] for train, test in kf: test_x_cv = test_x[train] test_y_cv = test_y[train] X = np.concatenate([test_x_cv, train_x],axis=0) Y = np.concatenate([test_y_cv, train_y],axis=0) app_xy = np.append(Y,X,axis=1) mask = np.zeros(app_xy.shape[0], dtype = bool) mask[0:test_x_cv.shape[0]] = True mask_var = np.zeros(app_xy.shape[1],dtype=bool) mask_var[0] = True if s.size>0: mask_var[s+1] = True app_xyt = np.concatenate([test_y_cv, test_x_cv],axis=1) sigma = np.cov(app_xyt.T) +2/(np.log(test_x_cv.shape[0])**2)*np.eye(app_xyt.shape[1]) index=0 stay = True while stay: sigma = e_step(sigma,app_xy,mask_var, mask) stay = (index<20) index += 1 cov_xsh = sigma[1:,1:] cov_xysh = sigma[0,1:][:,np.newaxis] beta_cs = np.dot(np.linalg.inv(cov_xsh),cov_xysh) scores_temp.append(np.mean((test_x[test].dot(beta_cs)-test_y[test])**2)) scores.append(np.mean(scores_temp)) return sub_list[np.argmin(scores)]
def model_all_data(data_folder, steps): # Runs the whole pipeline for taking in all data and generating a linear model to predict the next heart rate dataset = read_all_files(data_folder) # Save the gathered data in a .npy file for later use np.save(data_folder + 'dataset.npy', np.array(dataset)) dataset_t = np.transpose(dataset, (1, 0)) hr = dataset_t[0] cad = dataset_t[1] pwr = dataset_t[2] hr_next = dataset_t[3] # least_squares using dataset input def least_squares_set(w, hr, cad, pwr, hr_next): hr1 = model(w, hr, cad, pwr) cost = np.sum((hr1 - hr_next)**2) return cost / hr.size hr_normalizer, hr_inverse_normalizer = standard_normalizer(hr, 0) cad_normalizer, cad_inverse_normalizer = standard_normalizer(cad, 0) pwr_normalizer, pwr_inverse_normalizer = standard_normalizer(pwr, 0) hr_normalized = hr_normalizer(hr) cad_normalized = cad_normalizer(cad) pwr_normalized = pwr_normalizer(pwr) hr_next_normalized = hr_normalizer(hr_next) g = lambda w, hr=hr_normalized, cad=cad_normalized, pwr=pwr_normalized, hr_next=hr_next_normalized: least_squares_set( w, hr, cad, pwr, hr_next) w_size = 4 # The number of weights w_init = 0.1 * np.random.randn(w_size, 1) max_its = steps alpha = 10**(-1) w_hist, train_hist = gradient_descent(g, w_init, alpha, max_its, verbose=True) # print out that cost function history plot plot_series(train_hist) # Get best weights and trained model ind = np.argmin(train_hist) w_best = w_hist[ind] g_best = train_hist[ind] print(w_best) all_data_model = lambda hr, cad, pwr, w=w_best: model( w, hr_normalizer(hr), cad_normalizer(cad), pwr_normalizer(pwr)) return all_data_model, hr_inverse_normalizer
def multi_explore_optimization(objective, t_min, t_max, n_samples=100, n_repeat=10, jac=None): """ Repeat explore optimisation and pick the best optimum from the results. Parameters ---------- objective : callable The objective function to be minimized t_min : numpy.ndarray The minimum bound on the input (d,) t_max : numpy.ndarray The maximum bound on the input (d,) n_samples : int, optional The number of sample points to use n_repeat : int, optional The number of multiple explore optimisation to be performed Returns ------- numpy.ndarray The optimal input parameters (d,) float The optimal objective value """ results = [ explore_optimization(objective, t_min, t_max, n_samples=n_samples, jac=jac) for i in range(n_repeat) ] results = list(map(list, zip(*results))) t_list = results[0] f_list = results[1] i_opt = np.argmin(f_list) t_opt, f_opt = t_list[i_opt], f_list[i_opt] logging.debug('Multi-Explore Optimization Completed') logging.debug( 'List of local optimums and their objective function value: ') [logging.debug(t_list[i], f_list[i]) for i in range(n_repeat)] logging.debug('Best Optimum:') logging.debug('Hyperparameters: %s | Objective: %f' % (str(t_opt), f_opt)) return t_opt, f_opt
def match_given_alpha(diff): n2, n1 = diff.shape if n1 == n2: return np.eye(n1) P = np.zeros((n2, n1)) am = np.argmin(diff, axis=0) if np.unique(am).shape[0] == n1: msoln = (np.arange(n1), am) else: msoln = solve_dense(diff.T) P[msoln[1], msoln[0]] = 1.0 P = P[:, :n1] return P.copy()
def display_results_Gibbs(X, Z, Z_Gibbs, mean_A, A, manual_perm=None): D = np.shape(X)[1] N = np.shape(X)[0] K = np.shape(Z_Gibbs)[1] print('Z (unpermuted): \n', Z[0:10]) # Find the minimizing permutation. accuracy_mat = [[ np.sum(np.abs(Z[:, i] - Z_Gibbs[:, j])) / N for i in range(K) ] for j in range(K)] perm_tmp = np.argmin(accuracy_mat, 1) # check that we have a true permuation if len(perm_tmp) == len(set(perm_tmp)): perm = perm_tmp else: print('** procedure did not give a true permutation') if manual_perm == None: perm = np.arange(K) else: perm = manual_perm print('permutation: ', perm) # print Z (permuted) and nu print('Z (permuted) \n', Z[0:10, perm]) print('round_nu \n', Z_Gibbs[0:10, :]) print('l1 error (after permutation): ', \ [ np.sum(np.abs(Z[:, perm[i]] - Z_Gibbs[:, i]))/N for i in range(K) ]) # examine phi_mu print('\n') print('true A (permuted): \n', A[perm, :]) print('poster mean A: \n', mean_A) # plot posterior predictive pred_x = np.dot(Z_Gibbs, mean_A) for col in range(D): plt.clf() plt.plot(pred_x[:, col], X[:, col], 'ko') diag = np.linspace(np.min(pred_x[:, col]), np.max(pred_x[:, col])) plt.plot(diag, diag) plt.title('Posterior predictive, column' + str(col)) plt.xlabel('predicted X') plt.ylabel('true X') plt.show()
def exact(self, w, grad_eval): # set parameters of linesearch at each step valmax = 10 num_evals = 3000 # set alpha range alpha_range = np.linspace(0, valmax, num_evals) # evaluate function over direction and alpha range, grab alpha giving lowest eval steps = [(w - alpha * grad_eval) for alpha in alpha_range] func_evals = np.array([self.g(s) for s in steps]) ind = np.argmin(func_evals) best_alpha = alpha_range[ind] return best_alpha
def draw_fit(self, ax, runs, ind): # set plotting limits xmax = np.max(copy.deepcopy(self.x)) xmin = np.min(copy.deepcopy(self.x)) xgap = (xmax - xmin) * 0.1 xmin -= xgap xmax += xgap ymax = np.max(copy.deepcopy(self.y)) ymin = np.min(copy.deepcopy(self.y)) ygap = (ymax - ymin) * 0.1 ymin -= ygap ymax += ygap # scatter points or plot continuous version ax.scatter(self.x.flatten(), self.y.flatten(), color='k', s=40, edgecolor='w', linewidth=0.9) # clean up panel ax.set_xlim([xmin, xmax]) ax.set_ylim([ymin, ymax]) # label axes ax.set_xlabel(r'$x$', fontsize=16) ax.set_ylabel(r'$y$', rotation=0, fontsize=16, labelpad=15) # plot total fit s = np.linspace(xmin, xmax, 2000)[np.newaxis, :] t = 0 for i in range(ind): # get current run run = runs[i] cost = run.cost predict = run.model feat = run.feature_transforms normalizer = run.normalizer # get best weights b = np.argmin(run.train_cost_histories[0]) w_best = run.weight_histories[0][b] t += predict(normalizer(s), w_best) ax.plot(s.T, t.T, linewidth=4, c='k') ax.plot(s.T, t.T, linewidth=2, c='r')
def diva_response(params, inputs, targets=None, channels_indexed=None, hps=None): if targets == None: targets = inputs return np.argmin(np.sum(np.square( np.subtract( targets, forward(params, inputs=inputs, channels_indexed=channels_indexed, hps=hps)[-2])), axis=2, keepdims=True), axis=0)[:, 0]
def diagnose(self): idx_min = np.argmin(self.loss_history) plt.plot(self.loss_history,label='Training Loss') plt.xlabel('Iteration') if idx_min != (len(self.loss_history)-1): print("WARNING - Potential convergence issues") self.params = self.par_history[idx_min] plt.axvline(idx_min,color='red', label = f'Min at {idx_min}') plt.legend() plt.show() return
def Gardner_Krauth_Mezard(N, patterns, weights, biases, sc, lr, k, maxiter): ''' Gardner rule rule proposed in (1987) Krauth Learning algorithms with optimal stability in neural networks + Krauth Mezard update strategy ''' Z = np.array(patterns).T M = 0 p = Z.shape[-1] Z_ = np.vstack([Z, np.ones(p)]) w_and_b = deepcopy(np.hstack([weights, biases.reshape(N, 1)])) y_global = ((w_and_b @ Z_).T / (np.sqrt(np.sum(w_and_b**2, axis=1)))) * Z.T # while (np.any(y_global < k) and M < maxiter): for i in range(N): # for each neuron independently # compute normalised stability measure (h_i, sigma_i)/|w_i|^2_2 sum_of_squares = np.sum(weights[i, :]**2 + biases[i]**2) ys = ((weights[i, :] @ Z + biases[i]) / (np.sqrt(sum_of_squares))) * Z[i, :] # #pick the pattern with the weakest y ind_min = np.argmin(ys) weakest_pattern = np.array( deepcopy(patterns[ind_min].reshape(1, N))) h_i = (weights[i, :].reshape(1, N) @ weakest_pattern.T + biases[i]).squeeze() # if the new weakest pattern is not yet stable with the margin k y = (h_i * weakest_pattern[0, i]) / (np.sqrt(sum_of_squares)) # while (y < k): weights[i, :] = deepcopy( weights[i, :] + lr * (weakest_pattern[0, i] * weakest_pattern).squeeze()) #set diagonal elements to zero if sc == True: weights[i, i] = 0 biases[i] = biases[i] + lr * weakest_pattern[0, i] sum_of_squares = np.sum(weights[i, :]**2 + biases[i]**2) h_i = (weights[i, :].reshape(1, N) @ weakest_pattern.T + biases[i]).squeeze() y = (h_i * weakest_pattern[0, i]) / (np.sqrt(sum_of_squares) ) # w_and_b = deepcopy(np.hstack([weights, biases.reshape(N, 1)])) y_global = ((w_and_b @ Z_).T / (np.sqrt(np.sum(w_and_b**2, axis=1)))) * Z.T # M += 1 if M >= maxiter: print('Maximum number of iterations has been exceeded') return weights, biases
def __init__(self, vec, other_vec, whole, otherwhole, actInd, **kwargs): actInd = (int(actInd[0]), int(actInd[1])) DistanceMat = np.array([[distance(i, j) for j in otherwhole] for i in whole]) # print("Mindistance:", np.min(DistanceMat)) # print("len(whole),len(otherwhole)",len(whole),len(otherwhole)) insectPT = np.nonzero( DistanceMat < INTERSECTLIM) # The index of the intersection point try: insectPT = (insectPT[0][0], insectPT[1][0]) except IndexError: # the case where is no pair of points fall in the threshold, find the least distance point instead insectPT = np.unravel_index(np.argmin(DistanceMat, axis=None), DistanceMat.shape) egopath = whole[actInd[0]:insectPT[0]] otherpath = otherwhole[actInd[1]:insectPT[1]] # path a None in is just to call the lambda function egoPTs = np.sum( (self._distance(self._diff(lambda x: egopath[:, 0]), self._diff(lambda y: egopath[:, 1])))(None)) otherPTs = np.sum( self._distance(self._diff(lambda x: otherpath[:, 0]), self._diff(lambda y: otherpath[:, 1]))(None)) otherDistance = self._cumsum( self._distance( self._diff(lambda x: otherwhole[actInd[1]:, 0]), self._diff(lambda y: otherwhole[actInd[1]:, 1])))(None) self.other_vec = other_vec self.whole = whole self.otherwhole = otherwhole self.actInd = actInd self.insectPT = insectPT self.egoPTs = egoPTs self.otherPTs = otherPTs self.otherDistance = otherDistance super().__init__(vec, other_vec, whole=whole, otherwhole=otherwhole, actInd=actInd, intersectInd=insectPT, intersectPTs=(self.egoPTs, self.otherPTs), otherDist=otherDistance, **kwargs)
def coordinate_descent_zero_order(g, alpha_choice, max_its, w): # run coordinate search N = np.size(w) weight_history = [] # container for weight history cost_history = [] # container for corresponding cost function history alpha = 0 for k in range(1, max_its + 1): # check if diminishing steplength rule used if alpha_choice == 'diminishing': alpha = 1 / float(k) else: alpha = alpha_choice # random shuffle of coordinates c = np.random.permutation(N) # forming the dirction matrix out of the loop DIRECTION = np.eye(N) cost = g(w) # loop over each coordinate direction for n in range(N): #direction = np.zeros((N,1)) #direction[c[n]] = 1 direction = DIRECTION[:, [c[n]]] # record weights and cost evaluation weight_history.append(w) cost_history.append(cost) # evaluate all candidates evals = [g(w + alpha * direction)] evals.append(g(w - alpha * direction)) evals = np.array(evals) # if we find a real descent direction take the step in its direction ind = np.argmin(evals) if evals[ind] < cost_history[-1]: # take step w = w + ((-1)**(ind)) * alpha * direction cost = evals[ind] # record weights and cost evaluation weight_history.append(w) cost_history.append(g(w)) return weight_history, cost_history
def coordinate_descent_zero_order(g, alpha_choice, max_its, w): # run coordinate search N = np.size(w) weight_history = [] # container for weight history cost_history = [] # container for corresponding cost function history alpha = 0 for k in range(1, max_its + 1): # check if diminishing steplength rule used if alpha_choice == 'diminishing': alpha = 1 / float(k) else: alpha = alpha_choice # loop over each coordinate direction for n in range(N): direction = np.zeros((1, N)) direction[0][n] = 1 directions = np.concatenate((direction, -direction), axis=0) # record weights and cost evaluation weight_history.append(w) cost_history.append(g(w)) ### pick best descent direction # compute all new candidate points w_candidates = w + alpha * directions # evaluate all candidates evals = np.array([g(w_val) for w_val in w_candidates]) # if we find a real descent direction take the step in its direction ind = np.argmin(evals) if g(w_candidates[ind]) < g(w): # pluck out best descent direction d = directions[ind, :] # take step w = w + alpha * d # record weights and cost evaluation weight_history.append(w) cost_history.append(g(w)) return weight_history, cost_history
def polyinterp(points, doPlot=None, xminBound=None, xmaxBound=None): """ polynomial interpolation Parameters ---------- points: shape(pointNum, 3), three columns represents x, f, g doPolot: set to 1 to plot, default 0 xmin: min value that brackets minimum (default: min of points) xmax: max value that brackets maximum (default: max of points) set f or g to sqrt(-1)=1j if they are not known the order of the polynomial is the number of known f and g values minus 1 Returns ------- minPos: fmin: """ if doPlot == None: doPlot = 0 nPoints = points.shape[0] order = np.sum(np.imag(points[:, 1:3]) == 0) -1 # code for most common case: cubic interpolation of 2 points if nPoints == 2 and order == 3 and doPlot == 0: [minVal, minPos] = [np.min(points[:,0]), np.argmin(points[:,0])] notMinPos = 1 - minPos d1 = points[minPos,2] + points[notMinPos,2] - 3*(points[minPos,1]-\ points[notMinPos,1])/(points[minPos,0]-points[notMinPos,0]) t_d2 = d1**2 - points[minPos,2]*points[notMinPos,2] if t_d2 > 0: d2 = np.sqrt(t_d2) else: d2 = np.sqrt(-t_d2) * np.complex(0,1) if np.isreal(d2): t = points[notMinPos,0] - (points[notMinPos,0]-points[minPos,0])*\ ((points[notMinPos,2]+d2-d1)/(points[notMinPos,2]-\ points[minPos,2]+2*d2)) minPos = np.min([np.max([t,points[minPos,0]]), points[notMinPos,0]]) else: minPos = np.mean(points[:,0]) fmin = minVal return (minPos, fmin) xmin = np.min(points[:,0]) xmax = np.max(points[:,0]) # compute bounds of interpolation area if xminBound == None: xminBound = xmin if xmaxBound == None: xmaxBound = xmax # constraints based on available function values A = np.zeros((0, order+1)) b = np.zeros((0, 1)) for i in range(nPoints): if np.imag(points[i,1]) == 0: constraint = np.zeros(order+1) for j in np.arange(order,-1,-1): constraint[order-j] = points[i,0]**j A = np.vstack((A, constraint)) b = np.append(b, points[i,1]) # constraints based on availabe derivatives for i in range(nPoints): if np.isreal(points[i,2]): constraint = np.zeros(order+1) for j in range(1,order+1): constraint[j-1] = (order-j+1)* points[i,0]**(order-j) A = np.vstack((A, constraint)) b = np.append(b,points[i,2]) # find interpolating polynomial params = np.linalg.solve(A, b) # compute critical points dParams = np.zeros(order) for i in range(params.size-1): dParams[i] = params[i] * (order-i) if np.any(np.isinf(dParams)): cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0])) else: cp = np.concatenate((np.array([xminBound, xmaxBound]), points[:,0], \ np.roots(dParams))) # test critical points fmin = np.infty; minPos = (xminBound + xmaxBound)/2. for xCP in cp: if np.imag(xCP) == 0 and xCP >= xminBound and xCP <= xmaxBound: fCP = np.polyval(params, xCP) if np.imag(fCP) == 0 and fCP < fmin: minPos = np.double(np.real(xCP)) fmin = np.double(np.real(fCP)) # plot situation (omit this part for now since we are not going to use it # anyway) return (minPos, fmin)