def plot_data(self): # construct figure fig, axs = plt.subplots(1, 3, figsize=(8, 3)) # create subplot with 2 panels gs = gridspec.GridSpec(1, 3, width_ratios=[1, 2, 1]) ax1 = plt.subplot(gs[0]) ax1.axis('off') ax2 = plt.subplot(gs[1]) ax3 = plt.subplot(gs[2]) ax3.axis('off') if np.shape(self.x)[1] == 2: ax2 = plt.subplot(gs[1], projection='3d') ind0 = np.argwhere(self.y == +1) ax2.scatter(self.x[ind0, 0], self.x[ind0, 1], self.y[ind0], s=55, color=self.colors[0], edgecolor='k') ind1 = np.argwhere(self.y == -1) ax2.scatter(self.x[ind1, 0], self.x[ind1, 1], self.y[ind1], s=55, color=self.colors[1], edgecolor='k')
def plot_subproblem_data(self): C = len(np.unique(self.y)) # construct figure fig = plt.figure(figsize=(9,2.5)) # create subplot with 2 panels gs = gridspec.GridSpec(1, C) # scatter points for c in range(C): # create subproblem data y_temp = copy.deepcopy(self.y) ind = np.argwhere(y_temp.astype(int) == (c)) ind = ind[:,0] ind2 = np.argwhere(y_temp.astype(int) != (c)) ind2 = ind2[:,0] y_temp[ind] = 1 y_temp[ind2] = -1 # create new axis to plot ax = plt.subplot(gs[c]) xmin,xmax = self.scatter_pts(ax,self.x,y_temp) # pretty up panel title = 'class ' + str(c+1) + ' versus all' ax.set_title(title,fontsize = 14)
def standard_normalizer(self, x): # compute the mean and standard deviation of the input x_means = np.nanmean(x, axis=1)[:, np.newaxis] x_stds = np.nanstd(x, axis=1)[:, np.newaxis] # check to make sure thta x_stds > small threshold, for those not # divide by 1 instead of original standard deviation ind = np.argwhere(x_stds < 10**(-2)) if len(ind) > 0: ind = [v[0] for v in ind] adjust = np.zeros((x_stds.shape)) adjust[ind] = 1.0 x_stds += adjust # fill in any nan values with means ind = np.argwhere(np.isnan(x) == True) for i in ind: x[i[0], i[1]] = x_means[i[0]] # create standard normalizer function normalizer = lambda data: (data - x_means) / x_stds # create inverse standard normalizer inverse_normalizer = lambda data: data * x_stds + x_means # return normalizer return normalizer, inverse_normalizer
def plot_classif(self, id_1, id_2, labels): # create figure for plotting fig = plt.figure(figsize=(5, 5)) # setup colors / labels for plot red_patch = mpatches.Patch(color='red', label=labels[0]) blue_patch = mpatches.Patch(color='blue', label=labels[1]) plt.legend(handles=[red_patch, blue_patch]) plt.legend(handles=[red_patch, blue_patch], loc=2) # scatter plot data ind = np.argwhere(self.y == -1) ind = [v[1] for v in ind] plt.scatter(self.x_orig[id_1, ind], self.x_orig[id_2, ind], color='r', s=30) #plotting the data ind = np.argwhere(self.y == +1) ind = [v[1] for v in ind] plt.scatter(self.x_orig[id_1, ind], self.x_orig[id_2, ind], color='b', s=30) #plotting the data plt.show()
def plot_all(X, y, w0,w1,w2): # custom colors for plotting points # red = [1, 0, 0.4] # blue = [0, 0.4, 1] # green = [0.4, 1, 0] # yellow = [1, 0.4, 0] # scatter plot points fig = plt.figure(figsize=(4, 4)) ind = np.argwhere(y == 0) ind = [s[0] for s in ind] plt.scatter(X[ind,1], X[ind,2], color='blue', edgecolor='k', s=25) ind = np.argwhere(y == 1) ind = [s[0] for s in ind] plt.scatter(X[ind,1], X[ind,2], color='red', edgecolor='k', s=25) ind = np.argwhere(y == 2) ind = [s[0] for s in ind] plt.scatter(X[ind,1], X[ind,2], color='green', edgecolor='k', s=25) plt.grid('off') # plot separator s = np.linspace(0, 1, 100) plt.plot(s, (-w0[0] - w0[1] * s) / w0[2], color='k', linewidth=2) plt.plot(s, (-w1[0] - w1[1] * s) / w1[2], color='k', linewidth=2) plt.plot(s, (-w2[0] - w2[1] * s) / w2[2], color='k', linewidth=2) # clean up plot plt.xlim([-0.1, 1.1]) plt.ylim([-0.1, 1.1]) plt.show()
def train(x,y,feature_transforms,**kwargs): # get and run optimizer to solve two-class problem N = np.shape(x)[0] C = np.size(np.unique(y)) max_its = 100; alpha_choice = 1; cost_name = 'softmax'; normalize = 'standard' w = 0.1*np.random.randn(N+1,1); # switches for user choices if 'max_its' in kwargs: max_its = kwargs['max_its'] if 'alpha_choice' in kwargs: alpha_choice = kwargs['alpha_choice'] if 'cost_name' in kwargs: cost_name = kwargs['cost_name'] if 'w' in kwargs: w = kwargs['w'] if 'normalize' in kwargs: normalize = kwargs['normalize'] # loop over subproblems and solve weight_histories = [] for c in range(0,C): # prepare temporary C vs notC sub-probem labels y_temp = copy.deepcopy(y) ind = np.argwhere(y_temp.astype(int) == c) ind = ind[:,1] ind2 = np.argwhere(y_temp.astype(int) != c) ind2 = ind2[:,1] y_temp[0,ind] = 1 y_temp[0,ind2] = -1 # run on normalized data run = basic_runner.Setup(x,y_temp,feature_transforms,cost_name,normalize = normalize) run.fit(w=w,alpha_choice = alpha_choice,max_its = max_its) # store each weight history weight_histories.append(run.weight_history) # combine each individual classifier weights into single weight # matrix per step R = len(weight_histories[0]) combined_weights = [] for r in range(R): a = [] for c in range(C): a.append(weight_histories[c][r]) a = np.array(a).T a = a[0,:,:] combined_weights.append(a) # run combined weight matrices through fusion rule to calculate # number of misclassifications per step counter = basic_runner.Setup(x,y,feature_transforms,'multiclass_counter',normalize = normalize).cost_func count_history = [counter(v) for v in combined_weights] return combined_weights, count_history
def two_input_contour_plot(self, weight_history, x, y, **kwargs): cost_name = 'softmax' if 'cost_name' in kwargs: cost_name = kwargs['cost_name'] # compute number of classes C = np.shape(weight_history[0])[1] ##### construct figure with panels ##### # construct figure fig = plt.figure(figsize=(10, 6)) # create figure with single plot for contour gs = gridspec.GridSpec(2, 2) ### make contour right plot - as well as horizontal and vertical axes ### for c in range(C): # prepare temporary C vs notC sub-probem labels y_temp = copy.deepcopy(y) ind = np.argwhere(y_temp.astype(int) == c) ind = ind[:, 1] ind2 = np.argwhere(y_temp.astype(int) != c) ind2 = ind2[:, 1] y_temp[0, ind] = 1 y_temp[0, ind2] = -1 g = cost_lib.choose_cost(x, y_temp, cost_name) # create panel ax = plt.subplot(gs[c]) ax.set_aspect('equal') # plot contour and path w_hist = [ weight_history[v][:, c][:, np.newaxis] for v in range(len(weight_history)) ] self.contour_plot_setup(c, C, g, ax, **kwargs) # draw contour plot self.draw_weight_path(ax, w_hist, **kwargs) # draw path on contour plot # label axes ax.set_xlabel(r'$w_0^{(' + str(c + 1) + ')}$', fontsize=15) ax.set_ylabel(r'$w_1^{(' + str(c + 1) + ')}$', fontsize=15, labelpad=15, rotation=0) # remove whitespace from figure #gs.update(wspace=0.005, hspace=0.15) # set the spacing between axes. #fig.subplots_adjust(left=0, right=1, bottom=0, top=1) # remove whitespace fig.subplots_adjust(wspace=0.001, hspace=0.001) # plot plt.show()
def animate(k): # clear panels ax1.cla() ax2.cla() ax3.cla() # print rendering update if np.mod(k+1,25) == 0: print ('rendering animation frame ' + str(k+1) + ' of ' + str(num_frames)) if k == num_frames - 1: print ('animation rendering complete!') time.sleep(1.5) clear_output() # scatter data ind0 = np.argwhere(self.y == +1) ind0 = [e[1] for e in ind0] ind1 = np.argwhere(self.y == -1) ind1 = [e[1] for e in ind1] for ax in [ax1,ax2,ax3]: ax.scatter(self.x[0,ind0],self.x[1,ind0],s = pt_size, color = self.colors[0], edgecolor = 'k',antialiased=True) ax.scatter(self.x[0,ind1],self.x[1,ind1],s = pt_size, color = self.colors[1], edgecolor = 'k',antialiased=True) if k == 0: ax1.set_title(str(0) + ' units fit to data',fontsize = 14,color = 'w') ax1.set_title(str(0) + ' units fit to data',fontsize = 14,color = 'w') ax1.set_title(str(0) + ' units fit to data',fontsize = 14,color = 'w') ax1.set_xlim([xmin1,xmax1]) ax1.set_ylim([xmin2,xmax2]) ax2.set_xlim([xmin1,xmax1]) ax2.set_ylim([xmin2,xmax2]) ax3.set_xlim([xmin1,xmax1]) ax3.set_ylim([xmin2,xmax2]) # plot fit if k > 0: # get current run a1 = inds1[k-1] a2 = inds2[k-1] a3 = inds3[k-1] run1 = runs1[a1] a1 = len(run1.w_init) - 1 run2 = runs2[a2] model3 = runs3.models[a3] steps = runs3.best_steps[:a3+1] # plot models to data self.draw_fit(ax1,run1,a1) self.draw_fit(ax2,run2,a2 + 1) self.draw_boosting_fit(ax3,steps,a3) return artist,
def animate(k): # clear panels ax.cla() # print rendering update if np.mod(k + 1, 25) == 0: print('rendering animation frame ' + str(k + 1) + ' of ' + str(num_frames)) if k == num_frames - 1: print('animation rendering complete!') time.sleep(1.5) clear_output() #### scatter data #### # plot points in 2d and 3d ind0 = np.argwhere(self.y == +1) ind0 = [e[1] for e in ind0] ax.scatter(self.x[0, ind0], self.x[1, ind0], s=55, color=self.colors[0], edgecolor='k') ind1 = np.argwhere(self.y == -1) ind1 = [e[1] for e in ind1] ax.scatter(self.x[0, ind1], self.x[1, ind1], s=55, color=self.colors[1], edgecolor='k') # plot boundary if k > 0: # get current run for cost function history plot a = inds[k - 1] model = run.models[a] steps = run.best_steps[:a + 1] # pluck out current weights self.draw_boosting_fit(ax, steps, a) # cleanup panel ax.set_yticklabels([]) ax.set_xticklabels([]) ax.set_xticks([]) ax.set_yticks([]) ax.set_xlabel(r'$x_1$', fontsize=15) ax.set_ylabel(r'$x_2$', fontsize=15, rotation=0, labelpad=20) return artist,
def animate(k): # clear panels ax1.cla() ax2.cla() # print rendering update if np.mod(k+1,25) == 0: print ('rendering animation frame ' + str(k+1) + ' of ' + str(num_frames)) if k == num_frames - 1: print ('animation rendering complete!') time.sleep(1.5) clear_output() #### scatter data #### # plot points in 2d and 3d ind0 = np.argwhere(self.y == +1) ind0 = [e[1] for e in ind0] ax1.scatter(self.x[0,ind0],self.x[1,ind0],s = 55, color = self.colors[0], edgecolor = 'k') ind1 = np.argwhere(self.y == -1) ind1 = [e[1] for e in ind1] ax1.scatter(self.x[0,ind1],self.x[1,ind1],s = 55, color = self.colors[1], edgecolor = 'k') # plot boundary if k > 0: # get current run for cost function history plot a = inds[k-1] run = runs[a] # pluck out current weights self.draw_fit(ax1,run,a) # cost function value ax2.plot(np.arange(1,num_elements + 1),cost_evals,color = 'k',linewidth = 2.5,zorder = 1) ax2.scatter(a + 1,cost_evals[a],color = self.colors[0],s = 70,edgecolor = 'w',linewidth = 1.5,zorder = 3) # cleanup panels ax1.set_yticklabels([]) ax1.set_xticklabels([]) ax1.set_xticks([]) ax1.set_yticks([]) ax1.set_xlabel(r'$x_1$',fontsize = 15) ax1.set_ylabel(r'$x_2$',fontsize = 15,rotation = 0,labelpad = 20) ax2.set_xlabel('number of units',fontsize = 12) ax2.set_title('cost function plot',fontsize = 14) ax2.set_xlim([minxc,maxxc]) ax2.set_ylim([ymin,ymax]) return artist,
def ZCA_sphere(x,**kwargs): ''' A function for producing the ZCA sphereing on an input dataset X. ''' # Step 1: mean-center the data x_means = np.mean(x,axis = 1)[:,np.newaxis] x_centered = x - x_means # Step 2: compute pca transform on mean-centered data d,V = PCA(x_centered,**kwargs) # Step 3: divide off standard deviation of each (transformed) input, # which are equal to the returned eigenvalues in 'd'. stds = (d[:,np.newaxis])**(0.5) # check to make sure thta x_stds > small threshold, for those not # divide by 1 instead of original standard deviation ind = np.argwhere(stds < 10**(-2)) if len(ind) > 0: ind = [v[0] for v in ind] adjust = np.zeros((stds.shape)) adjust[ind] = 1.0 stds += adjust pca_sphered_x = np.dot(V.T,x - x_means)/stds # Step 3: divide off standard deviation of each (transformed) input, # which are equal to the returned eigenvalues in 'd'. # Then rotate back to original orientation of space stds = (d[:,np.newaxis])**(0.5) normalizer = lambda data: np.dot(V,np.dot(V.T,data - x_means)/stds) return normalizer
def PCA_sphereing(self, x, **kwargs): # Step 1: mean-center the data x_means = np.mean(x, axis=1)[:, np.newaxis] x_centered = x - x_means # Step 2: compute pca transform on mean-centered data d, V = self.PCA(x_centered, **kwargs) # Step 3: divide off standard deviation of each (transformed) input, # which are equal to the returned eigenvalues in 'd'. stds = (d[:, np.newaxis])**(0.5) # check to make sure thta x_stds > small threshold, for those not # divide by 1 instead of original standard deviation ind = np.argwhere(stds < 10**(-2)) if len(ind) > 0: ind = [v[0] for v in ind] adjust = np.zeros((stds.shape)) adjust[ind] = 1.0 stds += adjust normalizer = lambda data: np.dot(V.T, data - x_means) / stds # create inverse normalizer inverse_normalizer = lambda data: np.dot(V, data * stds) + x_means # return normalizer return normalizer, inverse_normalizer
def plot_data(self,ax,special_class,special_size): # scatter points in both panels class_nums = np.unique(self.y) C = len(class_nums) for c in range(C): ind = np.argwhere(self.y == class_nums[c]) ind = [v[1] for v in ind] s = 80 if class_nums[c] == special_class: s = special_size ax.scatter(self.x[0,ind],self.x[1,ind],s = s,color = self.color_opts[c],edgecolor = 'k',linewidth = 1.5) # control viewing limits minx = min(self.x[0,:]) maxx = max(self.x[0,:]) gapx = (maxx - minx)*0.1 minx -= gapx maxx += gapx miny = min(self.x[1,:]) maxy = max(self.x[1,:]) gapy = (maxy - miny)*0.1 miny -= gapy maxy += gapy ax.set_xlim([minx,maxx]) ax.set_ylim([miny,maxy]) #ax.axis('equal') ax.axis('off')
def animate_weightings(self,csvname,**kwargs): self.x,self.y,special_class = self.load_data(csvname) self.color_opts = np.array([[1,0,0.4], [ 0, 0.4, 1],[0, 1, 0.5],[1, 0.7, 0.5],[0.7, 0.6, 0.5]]) # pick out user-defined arguments num_slides = 2 if 'num_slides' in kwargs: num_slides = kwargs['num_slides'] # make range for plot base_size = 100 size_range = np.linspace(base_size, 20*base_size, num_slides) weight_range = np.linspace(1,10,num_slides) # generate figure to plot onto fig = plt.figure(figsize=(5,5)) artist = fig ax = plt.subplot(111) # animation sub-function ind1 = np.argwhere(self.y == special_class) ind1 = [v[1] for v in ind1] # run animator max_its = 5 w = 0.1*np.random.randn(3,1) g = bits.softmax def animate(k): ax.cla() # print rendering update if np.mod(k+1,25) == 0: print ('rendering animation frame ' + str(k+1) + ' of ' + str(num_slides)) if k == num_slides - 1: print ('animation rendering complete!') time.sleep(1.5) clear_output() # define beta special_size = size_range[k] special_weight = weight_range[k] beta = np.ones((1,self.y.size)) beta[:,ind1] = special_weight # run optimizer w_hist,g_hist = bits.newtons_method(g,w,self.x,self.y,beta,max_its) w_best = w_hist[-1] self.model = lambda data: bits.model(data,w_best) # scatter plot all data self.plot_data(ax,special_class,special_size) # draw decision boundary self.draw_decision_boundary(ax) return artist, anim = animation.FuncAnimation(fig, animate ,frames=num_slides, interval=num_slides, blit=True) return(anim)
def pad_tensor(self, tensor, kernel_size): odd_nums = np.array([int(2 * n + 1) for n in range(100)]) pad_val = np.argwhere(odd_nums == kernel_size)[0][0] tensor_padded = np.zeros( (np.shape(tensor)[0], np.shape(tensor)[1] + 2 * pad_val, np.shape(tensor)[2] + 2 * pad_val)) tensor_padded[:, pad_val:-pad_val, pad_val:-pad_val] = tensor return tensor_padded
def measureED(x, y, yerr, tpeak, fwhm, num_fwhm=10): ''' Measure the equivalent duration of a flare in a smoothed light curve. FINDflare typically doesnt identify the entire flare, so integrate num_fwhm/2*fwhm away from the peak. As long as the light curve is flat away from the flare, the region around the flare should not significantly contribute. Parameters ---------- x : numpy array time values from the entire light curve y : numpy array flux values from the entire light curve yerr : numpy array error in the flux values tpeak : float Peak time of the flare detection fwhm : float Full-width half maximum of the flare num_fwhm : float, optional Size of the integration window in units of fwhm Returns ------- ED - Equivalent duration of the flare ED_err - The uncertainty in the equivalent duration ''' try: width = fwhm * num_fwhm istart = np.argwhere(x > tpeak - width / 2)[0] ipeak = np.argwhere(x > tpeak)[0] istop = np.argwhere(x > tpeak + width / 2)[0] dx = np.diff(x) x = x[:-1] y = y[:-1] yerr = yerr[:-1] mask = (x > x[istart]) & (x < x[istop]) ED = np.trapz(y[mask], x[mask]) ED_err = np.sqrt(np.sum((dx[mask] * yerr[mask])**2)) except IndexError: return -1, -1 return ED, ED_err
def d_x_d_t_numpy_batchs(y, x, t, rrpc, delta_t): alpha = 1 - ((x * x) + (y * y))**0.5 cast = (t / delta_t).astype(int) tensor_temp = 1 + cast tensor_temp = tensor_temp % len(rrpc) specific_rrpc_values = rrpc[tensor_temp] omega = np.zeros_like(x).astype(float) zero_indexes = np.argwhere(specific_rrpc_values == 0)[:, 0] non_zero_indexes = np.argwhere(specific_rrpc_values != 0)[:, 0] omega[zero_indexes] = (2.0 * math.pi / 1e-3) omega[non_zero_indexes] = (2.0 * math.pi / specific_rrpc_values[non_zero_indexes]) f_x = alpha * x - omega * y return f_x
def make_matrix_full_row_rank(x, min_ev=1e-8): """Return a matrix with full row rank such that x.T @ x = new_x.T @ new_x """ u, s, vh = np.linalg.svd(x, full_matrices=False) s_nonzero = np.argwhere(s > min_ev)[:,0] new_x = np.diag(s[s_nonzero]) @ vh[s_nonzero, :] return new_x
def plot_data_and_subproblem_separators(self): # determine plotting ranges minx = min(min(self.x[:, 0]), min(self.x[:, 1])) maxx = max(max(self.x[:, 0]), max(self.x[:, 1])) gapx = (maxx - minx) * 0.1 minx -= gapx maxx += gapx # initialize figure, plot data, and dress up panels with axes labels etc. num_classes = np.size(np.unique(self.y)) ##### setup figure to plot ##### # initialize figure fig = plt.figure(figsize=(9, 5)) gs = gridspec.GridSpec(2, num_classes) # create subplots for each sub-problem r = np.linspace(minx, maxx, 400) for a in range(0, num_classes): # setup current axis ax = plt.subplot(gs[a], aspect='equal') # get current weights w = self.W[a] # color current class ax.scatter(self.x[:, 0], self.x[:, 1], s=30, color='0.75') t = np.argwhere(self.y == a) t = t[:, 0] ax.scatter(self.x[t, 0], self.x[t, 1], s=50, color=self.colors[a], edgecolor='k', linewidth=1.5) # draw subproblem separator z = -w[0] / w[2] - w[1] / w[2] * r ax.plot(r, z, linewidth=2, color=self.colors[a], zorder=3) ax.plot(r, z, linewidth=2.75, color='k', zorder=2) # dress panel correctly ax.set_xlim(minx, maxx) ax.set_ylim(minx, maxx) ax.axis('off') # plot final panel with all data and separators ax4 = plt.subplot(gs[num_classes + 1], aspect='equal') self.plot_data(ax4) self.plot_all_separators(ax4) # dress panel ax4.set_xlim(minx, maxx) ax4.set_ylim(minx, maxx) ax4.axis('off') plt.show()
def plot_data(self,ax): # initialize figure, plot data, and dress up panels with axes labels etc. num_classes = np.size(np.unique(self.y)) # color current class for a in range(0,num_classes): t = np.argwhere(self.y == a+1) t = t[:,0] ax.scatter(self.x[t,0],self.x[t,1], s = 50,color = self.colors[a],edgecolor = 'k',linewidth = 1.5)
def naive_fitting_demo(self, **kwargs): ##### setup figure to plot ##### # initialize figure fig = plt.figure(figsize=(8, 4)) artist = fig # create subplot with 2 panels gs = gridspec.GridSpec(2, 1, height_ratios=[1, 1]) ax1 = plt.subplot(gs[0], aspect='equal') ax2 = plt.subplot(gs[1], aspect='equal') #### plot data in both panels #### self.scatter_pts(ax1) self.scatter_pts(ax2) #### fit line to data and plot #### # make plotting range xmin = copy.deepcopy(min(self.x)) xmax = copy.deepcopy(max(self.x)) xgap = (xmax - xmin) * 0.4 xmin -= xgap xmax += xgap # produce fit x_fit = np.linspace(xmin, xmax, 300) w = self.w_hist[-1] y_fit = w[0] + x_fit * w[1] # plot linear fit ax2.plot(x_fit, y_fit, color='lime', linewidth=1.5) # plot sign version of linear fit f = np.sign(y_fit) bot_ind = np.argwhere(f == -1) bot_ind = [s[0] for s in bot_ind] bot_in = x_fit[bot_ind] bot_out = f[bot_ind] ax2.plot(bot_in, bot_out, color='r', linewidth=1.5, linestyle='--') top_ind = np.argwhere(f == +1) top_ind = [s[0] for s in top_ind] top_in = x_fit[top_ind] top_out = f[top_ind] ax2.plot(top_in, top_out, color='r', linewidth=1.5, linestyle='--')
def counting_cost(self,w): # compute predicted labels y_hat = np.sign(self.model(self.x,w)) # compare to true labels ind = np.argwhere(self.y != y_hat) ind = [v[1] for v in ind] cost = np.sum(len(ind)) return cost
def scatter_2d_classification_data(self, ax, scatter, **kwargs): ### from above ax.set_xlabel(r'$x_1$', fontsize=15) ax.set_ylabel(r'$x_2$', fontsize=15, rotation=0, labelpad=20) ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f')) ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f')) # plot points in 2d and 3d C = len(np.unique(self.y)) if C == 2: ind0 = np.argwhere(self.y == +1) ind0 = [v[0] for v in ind0] ind1 = np.argwhere(self.y == -1) ind1 = [v[0] for v in ind1] if scatter == 'on': ax.scatter(self.x[ind0, 0], self.x[ind0, 1], s=55, color=self.colors[0], edgecolor='k') ax.scatter(self.x[ind1, 0], self.x[ind1, 1], s=55, color=self.colors[1], edgecolor='k') else: ax.scatter(self.x[ind0, 0], self.x[ind0, 1], s=55, color=self.colors[0]) #, edgecolor = 'k') ax.scatter(self.x[ind1, 0], self.x[ind1, 1], s=55, color=self.colors[1]) #, edgecolor = 'k') else: for c in range(C): ind0 = np.argwhere(self.y == c) ax.scatter(self.x[ind0, 0], self.x[ind0, 1], s=55, color=self.colors[c], edgecolor='k')
def surface_plot(self,g,ax,wmax,view): ##### Produce cost function surface ##### r = np.linspace(-wmax,wmax,300) # create grid from plotting range w1_vals,w2_vals = np.meshgrid(r,r) w1_vals.shape = (len(r)**2,1) w2_vals.shape = (len(r)**2,1) w_ = np.concatenate((w1_vals,w2_vals),axis = 1) g_vals = [] for i in range(len(r)**2): g_vals.append(g(w_[i,:])) g_vals = np.asarray(g_vals) w1_vals.shape = (np.size(r),np.size(r)) w2_vals.shape = (np.size(r),np.size(r)) ### is this a counting cost? if so re-calculate ### levels = np.unique(g_vals) if np.size(levels) < 30: # plot each level of the counting cost levels = np.unique(g_vals) for u in levels: # make copy of cost and nan out all non level entries z = g_vals.copy() ind = np.argwhere(z != u) ind = [v[0] for v in ind] z[ind] = np.nan # plot the current level z.shape = (len(r),len(r)) ax.plot_surface(w1_vals,w2_vals,z,alpha = 1,color = '#696969',zorder = 0,shade = True,linewidth=0) else: # smooth cost function, plot usual # reshape and plot the surface, as well as where the zero-plane is g_vals.shape = (np.size(r),np.size(r)) # plot cost surface ax.plot_surface(w1_vals,w2_vals,g_vals,alpha = 1,color = 'w',rstride=25, cstride=25,linewidth=1,edgecolor = 'k',zorder = 2) ### clean up panel ### ax.xaxis.pane.fill = False ax.yaxis.pane.fill = False ax.zaxis.pane.fill = False ax.xaxis.pane.set_edgecolor('white') ax.yaxis.pane.set_edgecolor('white') ax.zaxis.pane.set_edgecolor('white') ax.xaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.yaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.zaxis._axinfo["grid"]['color'] = (1,1,1,0) ax.view_init(view[0],view[1])
def load_data(self,csvname): data = np.loadtxt(csvname,delimiter = ',') self.data = data x = data[0:2,:] y = data[-1,:][np.newaxis,:] # remove points from one class for illustrative purposes ind0 = np.argwhere(y == -1) ind0 = [v[1] for v in ind0] ind1 = np.argwhere(y == +1) ind1 = [v[1] for v in ind1] ind0 = ind0[-5:] inds = ind0 + ind1 x = x[:,inds] y = y[:,inds] special_class = -1 return x,y,special_class
def pred(self, xt, x1, ykdt, params): kern_params, wnoise, mean_params = self.unpack_params(params, fudge=self.fudge) k, d, t = ykdt.shape if self.mean: mu = self.mean(self.xt, params)[None] # D x T ### TO BE CHANGED yc = (ykdt - mu).reshape([self.k, -1]) else: yc = ykdt.reshape([k, -1]) KXX = self.build_Kxx(xt, xt, params, prior=True) # select points to condition on val_inds = np.argwhere(np.isnan(yc[0]) == False).squeeze() nval_inds = np.argwhere(np.isnan(yc[0]) == True).squeeze() KXX = KXX[:, val_inds] KXX = KXX[val_inds] yc = yc[:, val_inds] L = np.linalg.cholesky(KXX) iL = inv(L) Kinv = iL.T @ iL KXx = self.build_Kxx(xt, x1, params, prior=False) t0 = xt.shape[0] t1 = x1.shape[0] KXx = KXx.reshape([t0, d, t1, d]).reshape([t0 * d, -1]) noise = np.kron(np.diag(wnoise), np.eye(t0)) noise[nval_inds, nval_inds] = 0 KXx[:t0 * d, :t0 * d] += noise KXx = KXx.reshape([t0, d, t1, d]).reshape([d * t0, -1]) KXx = KXx[val_inds] Kxx = self.build_Kxx(x1, x1, params, prior=True) mu_pred = KXx.T.dot(Kinv).dot(yc.T).T cov_pred = Kxx - KXx.T.dot(Kinv).dot(KXx) mu_pred = mu_pred.reshape([k, d, -1]) return mu_pred, np.sqrt( np.diag(cov_pred).reshape([d, -1]) + self.fudge)
def inds_to_effect_change(leverage, desired_delta): # Argsort sorts low to high. # We are removing points, so multiply by -1. sort_inds = np.argsort(leverage * np.sign(desired_delta)) deltas = -1 * np.cumsum(leverage[sort_inds]) change_sign_inds = np.argwhere( np.sign(desired_delta) * (desired_delta - deltas) <= 0.) if len(change_sign_inds) > 0: first_ind_change_sign = np.min(change_sign_inds) remove_inds = sort_inds[:(first_ind_change_sign + 1)] return remove_inds else: return None
def generate_mixture_data(num_obs, true_centroids, true_probs, x_covs): true_z = np.random.multinomial(1, true_probs, num_obs) true_z_ind = np.full(num_obs, -1) for row in np.argwhere(true_z): true_z_ind[row[0]] = row[1] x = np.array([ np.random.multivariate_normal( mean=np.squeeze(true_centroids[true_z_ind[n], :]), cov=np.squeeze(x_covs[n, :])) for n in range(num_obs) ]) return x, true_z, true_z_ind
def plot_subproblem_fits(self,weights,**kwargs): C = len(np.unique(self.y)) # construct figure fig = plt.figure(figsize=(9,2.5)) # create subplot with 2 panels gs = gridspec.GridSpec(1, C) # scatter points for c in range(C): # create subproblem data y_temp = copy.deepcopy(self.y) ind = np.argwhere(y_temp.astype(int) == (c)) ind = ind[:,0] ind2 = np.argwhere(y_temp.astype(int) != (c)) ind2 = ind2[:,0] y_temp[ind] = 1 y_temp[ind2] = -1 # create new axis to plot ax = plt.subplot(gs[c]) xmin,xmax = self.scatter_pts(ax,self.x,y_temp) # create fit s = np.linspace(xmin,xmax,300)[np.newaxis,:] transformer = lambda a: a if 'transformer' in kwargs: transformer = kwargs['transformer'] a = self.model(transformer(s),weights[:,c]) # plot counting cost t = np.sign(a).flatten() ax.plot(s.flatten(),t,linewidth = 4,color = 'b',zorder = 2) # pretty up panel title = 'class ' + str(c+1) + ' versus all' ax.set_title(title,fontsize = 14)
def confusion_matrix(self,w): # compute predicted labels y_hat = np.sign(self.model(self.x,w)) # determine indices of real and predicted label values ind1 = np.argwhere(self.y == +1) ind1 = [v[1] for v in ind1] ind2 = np.argwhere(self.y == -1) ind2 = [v[1] for v in ind2] ind3 = np.argwhere(y_hat == +1) ind3 = [v[1] for v in ind3] ind4 = np.argwhere(y_hat == -1) ind4 = [v[1] for v in ind4] # compute elements of confusion matrix A = len(list(set.intersection(*[set(ind1), set(ind3)]))) B = len(list(set.intersection(*[set(ind1), set(ind4)]))) C = len(list(set.intersection(*[set(ind2), set(ind3)]))) D = len(list(set.intersection(*[set(ind2), set(ind4)]))) return A,B,C,D