def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None): plt.clf() plt.scatter(x, y, s=10) plt.title("Web traffic over the last month") plt.xlabel("Time") plt.ylabel("Hits/hour") plt.xticks([w * 7 * 24 for w in range(10)], ["week %i" % w for w in range(10)]) if models: if mx is None: mx = sp.linspace(0, x[-1], 1000) for model, style, color in zip(models, linestyles, colors): # print "Model:",model # print "Coeffs:",model.coeffs plt.plot(mx, model(mx), linestyle=style, linewidth=2, c=color) plt.legend(["d=%i" % m.order for m in models], loc="upper left") plt.autoscale(tight=True) plt.ylim(ymin=0) if ymax: plt.ylim(ymax=ymax) if xmin: plt.xlim(xmin=xmin) plt.grid(True, linestyle="-", color="0.75")
def show_plot(X, y, n_neighbors=10, h=0.2): # Create color maps cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA', '#AAFFAA', '#AAAAFF','#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000',]) for weights in ['uniform', 'distance']: # we create an instance of Neighbours Classifier and fit the data. clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights) clf.fit(X, y) clf.n_neighbors = n_neighbors # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, cmap=cmap_light) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("3-Class classification (k = %i, weights = '%s')" % (n_neighbors, weights)) plt.show()
def work(self, **kwargs): self.__dict__.update(kwargs) self.worked = True samples = LGMM1(rng=self.rng, size=(self.n_samples,), **self.LGMM1_kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] centers = .5 * edges[:-1] + .5 * edges[1:] print edges pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(centers, y) plt.plot(centers, pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def altitude(): global alt, i #we want to create temporary file to parse, so that we don't mess with the nmea.txt file f1 = open('temp.txt', 'w') #creates and opens a writable txt file f1.truncate() #erase contents of file shutil.copyfile('nmea.txt', 'temp.txt') #copy nmea.txt to temp.txt f1.close() #close writable file f1 = open('temp.txt', 'r') #open and read only try: #best to use try/finally so that the file opens and closes correctly for line in f1: #read each line in temp.txt if(line[4] == 'G'): # fifth character in $GPGGA if(len(line) > 50): # when there is a lock, the sentence gets filled with data #print line gpgga = nmea.GPGGA() gpgga.parse(line) alt = gpgga.antenna_altitude i +=1 #increment the counter print i print alt plt.scatter(x=[i], y=[float(alt)], s = 1, c='r') #plot each point finally: f1.close() i=0 #axis is autoscaled plt.ylabel('meters') plt.xlabel('counts') plt.title('ALTITUDE') plt.show()
def plot_2d_simple(data,y=None): if y==None: plt.scatter(data[:,0],data[:,1],s=50) else: nY=len(y) Ycol=[collist[ y.astype(int)[i] -1 % len(collist)] for i in xrange(nY)] plt.scatter(data[:,0],data[:,1],c=Ycol,s=40 )
def scatter_time_vs_s(time, norm, point_labels, title): plt.figure() size = 100 for i, l in enumerate(sorted(norm.keys())): if l is not "fbpca": plt.scatter(time[l], norm[l], label=l, marker='o', c='b', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, -80), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) else: plt.scatter(time[l], norm[l], label=l, marker='^', c='red', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, 30), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) plt.legend(loc="best") plt.suptitle(title) plt.ylabel("norm discrepancy") plt.xlabel("running time [s]")
def work(self): self.worked = True kwargs = dict( weights=self.weights, mus=self.mus, sigmas=self.sigmas, low=self.low, high=self.high, q=self.q, ) samples = GMM1(rng=self.rng, size=(self.n_samples,), **kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] #print samples pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if self.show: plt.scatter(edges[:-1], y) plt.plot(edges[:-1], pdf) plt.show() err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not self.show: assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def scatter(x, y, equal=False, xlabel=None, ylabel=None, xinvert=False, yinvert=False): """ Plot a scatter with simple formatting options """ plt.scatter(x, y, 200, color=[0.3, 0.3, 0.3], edgecolors="white", linewidth=1, zorder=2) sns.despine() if xlabel: plt.xlabel(xlabel) if ylabel: plt.ylabel(ylabel) if equal: plt.axes().set_aspect("equal") plt.plot([0, max([x.max(), y.max()])], [0, max([x.max(), y.max()])], color=[0.6, 0.6, 0.6], zorder=1) bmin = min([x.min(), y.min()]) bmax = max([x.max(), y.max()]) rng = abs(bmax - bmin) plt.xlim([bmin - rng * 0.05, bmax + rng * 0.05]) plt.ylim([bmin - rng * 0.05, bmax + rng * 0.05]) else: xrng = abs(x.max() - x.min()) yrng = abs(y.max() - y.min()) plt.xlim([x.min() - xrng * 0.05, x.max() + xrng * 0.05]) plt.ylim([y.min() - yrng * 0.05, y.max() + yrng * 0.05]) if xinvert: plt.gca().invert_xaxis() if yinvert: plt.gca().invert_yaxis()
def draw(data, classes, model, resolution=100): mycm = mpl.cm.get_cmap('Paired') one_min, one_max = data[:, 0].min()-0.1, data[:, 0].max()+0.1 two_min, two_max = data[:, 1].min()-0.1, data[:, 1].max()+0.1 xx1, xx2 = np.meshgrid(np.arange(one_min, one_max, (one_max-one_min)/resolution), np.arange(two_min, two_max, (two_max-two_min)/resolution)) inputs = np.c_[xx1.ravel(), xx2.ravel()] z = [] for i in range(len(inputs)): z.append(predict(model, inputs[i])[0]) result = np.array(z).reshape(xx1.shape) plt.contourf(xx1, xx2, result, cmap=mycm) plt.scatter(data[:, 0], data[:, 1], s=50, c=classes, cmap=mycm) t = np.zeros(15) for i in range(15): if i < 5: t[i] = 0 elif i < 10: t[i] = 1 else: t[i] = 2 plt.scatter(model[:, 0], model[:, 1], s=150, c=t, cmap=mycm) plt.xlim([0, 10]) plt.ylim([0, 10]) plt.show()
def influence_plot(X, y_true, y_pred, **kwargs): """Produces an influence plot. Parameters ---------- X : array Design matrix. y_true : array_like Observed labels, either 0 or 1. y_pred : array_like Predicted probabilities, floats on [0, 1]. Notes ----- .. plot:: pyplots/influence_plot.py """ r = pearson_residuals(y_true, y_pred) leverages = pregibon_leverages(X, y_pred) delta_X2 = case_deltas(r, leverages) dbetas = pregibon_dbetas(r, leverages) plt.scatter(y_pred, delta_X2, s=dbetas * 800, **kwargs) __, __, y1, y2 = plt.axis() plt.axis((0, 1, y1, y2)) plt.xlabel('Predicted Probability') plt.ylabel(r'$\Delta \chi^2$') plt.tight_layout()
def tuning(x, y, err=None, smooth=None, ylabel=None, pal=None): """ Plot a tuning curve """ if smooth is not None: xs, ys = smoothfit(x, y, smooth) plt.plot(xs, ys, linewidth=4, color="black", zorder=1) else: ys = asarray([0]) if pal is None: pal = sns.color_palette("husl", n_colors=len(x) + 6) pal = pal[2 : 2 + len(x)][::-1] plt.scatter(x, y, s=300, linewidth=0, color=pal, zorder=2) if err is not None: plt.errorbar(x, y, yerr=err, linestyle="None", ecolor="black", zorder=1) plt.xlabel("Wall distance (mm)") plt.ylabel(ylabel) plt.xlim([-2.5, 32.5]) errTmp = err errTmp[isnan(err)] = 0 rng = max([nanmax(ys), nanmax(y + errTmp)]) plt.ylim([0 - rng * 0.1, rng + rng * 0.1]) plt.yticks(linspace(0, rng, 3)) plt.xticks(range(0, 40, 10)) sns.despine() return rng
def regress_show4( yEv, yEv_calc, disp = True, graph = True, plt_title = None, ms_sz = None): # if the output is a vector and the original is a metrix, # the output is translated to a matrix. r_sqr, RMSE, MAE, DAE = estimate_accuracy4( yEv, yEv_calc, disp = disp) if graph: #plt.scatter( yEv.tolist(), yEv_calc.tolist()) plt.figure() if ms_sz is None: ms_sz = max(min( 6000 / yEv.shape[0], 8), 3) # plt.plot( yEv.tolist(), yEv_calc.tolist(), '.', ms = ms_sz) # Change ms plt.scatter( yEv.tolist(), yEv_calc.tolist(), s = ms_sz) ax = plt.gca() lims = [ np.min([ax.get_xlim(), ax.get_ylim()]), # min of both axes np.max([ax.get_xlim(), ax.get_ylim()]), # max of both axes ] # now plot both limits against eachother #ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0) ax.plot(lims, lims, '-', color = 'pink') plt.xlabel('Experiment') plt.ylabel('Prediction') if plt_title is None: plt.title( '$r^2$={0:.1e}, RMSE={1:.1e}, MAE={2:.1e}, MedAE={3:.1e}'.format( r_sqr, RMSE, MAE, DAE)) elif plt_title != "": plt.title( plt_title) # plt.show() return r_sqr, RMSE, MAE, DAE
def plot(i, pcanc, lr, pp, labelFlag, Y): if len(str(i)) == 1: fig = plt.figure(i) else: fig = plt.subplot(i) if pcanc == 0: plt.title( ' learning_rate: ' + str(lr) + ' perplexity: ' + str(pp)) print("Plotting tSNE") else: plt.title( 'PCA-n_components: ' + str(pcanc) + ' learning_rate: ' + str(lr) + ' perplexity: ' + str(pp)) print("Plotting PCA-tSNE") plt.scatter(Y[:, 0], Y[:, 1], c=colors) if labelFlag == 1: for label, cx, cy in zip(y, Y[:, 0], Y[:, 1]): plt.annotate( label.decode('utf-8'), xy = (cx, cy), xytext = (-10, 10), fontproperties=font, textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.9)) #arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') print("Done.")
def create_plots(iterations, data, M, step_size): NN_b, NN_a, E = steepest_decent_training(iterations, data, M, step_size) X = data[:,0] X_cont = np.arange(-10,10,0.1) t = data[:,1] Y = [NN_a(np.array([[x],[1]]))[0] for x in X_cont] f = np.vectorize(lambda(x): sin(x) / x) plt.figure(1) plt.plot(X_cont,Y,label='Neural network \n (M = %d)' % M) plt.plot(X_cont,f(X_cont),label='sinc(x)') plt.legend() plt.savefig('images/nn_vs_real_%d_%d.%s' % (iterations,M,img_format), format=img_format) plt.figure(2) plt.plot(X_cont,Y,label='Neural network \n (M = %d)' % M) plt.scatter(X,t,color='red',label='Training data') plt.legend() plt.savefig('images/nn_vs_training_%d_%d.%s' % (iterations,M,img_format), format=img_format) plt.figure(3) plt.plot(E, label='Error (M = %d)' % M) plt.yscale('log') plt.legend() plt.savefig('images/error_%d_%d.%s' % (iterations,M,img_format), format=img_format) plt.show()
def plot_dpi_dpr_distribution(args, dpis, dprs, diagnoses): print log.INFO, 'Plotting estimate distributions...' diagnoses = np.array(diagnoses) diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5 # Setup plot fig, ax = plt.subplots() pt.setup_axes(plt, ax) biomarkers_str = args.method if args.biomarkers is None else ', '.join(args.biomarkers) ax.set_title('DP estimation using {0} at {1}'.format(biomarkers_str, ', '.join(args.visits))) ax.set_xlabel('DP') ax.set_ylabel('DPR') plt.scatter(dpis, dprs, c=diagnoses, edgecolor='none', s=25.0, vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=0.5) # Plot legend # noinspection PyUnresolvedReferences rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.5,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.5,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.5,), linewidth=0)] labels = ['CN', 'MCI', 'AD'] legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.show() plt.close(fig)
def fig(data, target): #FIXME plt.scatter(data, target, color='black') plt.xticks(()) plt.yticks(()) plt.show()
def plot_words (V,labels=None,color='b',mark='o',fa='bottom'): W = tsne(V,2) i = 0 plt.scatter(W[:,0], W[:,1],c=color,marker=mark,s=50.0) for label,x,y in zip(labels, W[:,0], W[:,1]): plt.annotate(label.decode('utf8'), xy=(x,y), xytext=(-1,1), textcoords='offset points', ha= 'center', va=fa, bbox=dict(boxstyle='round,pad=0.1', fc='white', alpha=0)) i += 1
def plot_obs_expc_new(obs, expc, expc_upper, expc_lower, analysis, log, ax = None): """Modified version of obs-expc plot suggested by R2. The points are separated by whether their CIs are above, below, or overlapping the empirical value Input: obs - list of observed values expc_mean - list of mean simulated values for the corresponding observed values expc_upper - list of the 97.5% quantile of the simulated vlaues expc_lower - list of the 2.5% quantile of the simulated values analysis - whether it is patitions or compositions log - whether the y axis is to be transformed. If True, expc/obs is plotted. If Flase, expc - obs is plotted. ax - whether the plot is generated on a given figure, or a new plot object is to be created """ obs, expc, expc_upper, expc_lower = list(obs), list(expc), list(expc_upper), list(expc_lower) if not ax: fig = plt.figure(figsize = (3.5, 3.5)) ax = plt.subplot(111) ind_above = [i for i in range(len(obs)) if expc_lower[i] > obs[i]] ind_below = [i for i in range(len(obs)) if expc_upper[i] < obs[i]] ind_overlap = [i for i in range(len(obs)) if expc_lower[i] <= obs[i] <= expc_upper[i]] if log: expc_standardize = [expc[i] / obs[i] for i in range(len(obs))] expc_upper_standardize = [expc_upper[i] / obs[i] for i in range(len(obs))] expc_lower_standardize = [expc_lower[i] / obs[i] for i in range(len(obs))] axis_min = 0.9 * min([expc_lower_standardize[i] for i in range(len(expc_lower_standardize)) if expc_lower_standardize[i] != 0]) axis_max = 1.5 * max(expc_upper_standardize) else: expc_standardize = [expc[i] - obs[i] for i in range(len(obs))] expc_upper_standardize = [expc_upper[i] - obs[i] for i in range(len(obs))] expc_lower_standardize = [expc_lower[i] - obs[i] for i in range(len(obs))] axis_min = 1.1 * min(expc_lower_standardize) axis_max = 1.1 * max(expc_upper_standardize) if analysis == 'partition': col = '#228B22' else: col = '#CD69C9' ind_full = [] for index in [ind_below, ind_overlap, ind_above]: expc_standardize_ind = [expc_standardize[i] for i in index] sort_ind_ind = sorted(range(len(expc_standardize_ind)), key = lambda i: expc_standardize_ind[i]) sorted_index = [index[i] for i in sort_ind_ind] ind_full.extend(sorted_index) xaxis_max = len(ind_full) for i, ind in enumerate(ind_full): plt.plot([i, i],[expc_lower_standardize[ind], expc_upper_standardize[ind]], '-', c = col, linewidth = 0.4) plt.scatter(range(len(ind_full)), [expc_standardize[i] for i in ind_full], c = col, edgecolors='none', s = 8) if log: plt.plot([0, xaxis_max + 1], [1, 1], 'k-', linewidth = 1.5) ax.set_yscale('log') else: plt.plot([0, xaxis_max + 1], [0, 0], 'k-', linewidth = 1.5) plt.plot([len(ind_below) - 0.5, len(ind_below) - 0.5], [axis_min, axis_max], 'k--') plt.plot([len(ind_below) + len(ind_overlap) - 0.5, len(ind_below) + len(ind_overlap) - 0.5], [axis_min, axis_max], 'k--') plt.xlim(0, xaxis_max) plt.ylim(axis_min, axis_max) plt.tick_params(axis = 'y', which = 'major', labelsize = 8, labelleft = 'on') plt.tick_params(axis = 'x', which = 'major', top = 'off', bottom = 'off', labelbottom = 'off') return ax
def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): # setup marker generator and color map markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) # plot the decision surface x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) # plot class samples for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl) # Highlight test samples if test_idx: X_test, y_test = X[test_idx, :], y[test_idx] plt.scatter(X_test[:, 0], X_test[:, 1], c='', alpha=1.0, linewidths=1, marker='o', s=55, label='test set')
def base(Point):#画出基础 for x in np.linspace(Point[0] - 0.3, Point[0] + 0.3, 100): plt.scatter(x, Point[1], color='k', s=0.1, marker='o', label=str) for t in np.linspace(Point[0] - 0.2, Point[0] + 0.2, 3): P1 = [t, Point[1]] P2 = [t - 0.15, Point[1] - 0.25] line(P1, P2, width=0.1)
def disp_external(init_r=None,extfile=None,rad_scale=1000.,col_scale=1.,cut_zero=True,control=None): '''displaying as scatter plot ''' if extfile: from numpy import loadtxt anal_data['ext_disp']=loadtxt(extfile) nrow=len(anal_data['ext_disp'])//2 anal_data['ext_peri']=anal_data['ext_disp'][:nrow] anal_data['ext_disp']=anal_data['ext_disp'][nrow:] # gpos not correct rad=anal_data['ext_disp'].ravel()*rad_scale col=anal_data['ext_peri'].ravel()*col_scale if control and 'xstep' in control: from numpy import arange ir=arange(0.,ncol)*control['xstep'] ithet=arange(0.,nrow)*control['ystep'] elif 'gpos' in anal_data: ir,itheta=array(anal_data['gpos']).astype('float').transpose() else: return itheta*=pi/180. if init_r: ir=abs('ir')+init_r ix,iy=ir*cos(itheta),ir*sin(itheta) from matplotlib.pyplot import scatter if cut_zero: sel=col>0 rad=rad[sel] col=col[sel] ix=ix[sel] iy=iy[sel] scatter(ix,iy,rad,col,hold=0)
def plt_data(): t = [[0,1], [1,0], [1, 1], [0, 0]] t2 = [1, 1, 1, 0] X = np.array(t) Y = np.array(t2) h = .02 # step size in the mesh logreg = linear_model.LogisticRegression(C=1e5) # we create an instance of Neighbours Classifier and fit the data. logreg.fit(X, Y) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure(1, figsize=(4, 3)) plt.pcolormesh(xx, yy, Z, cmap=plt.cm.Paired) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', cmap=plt.cm.Paired) plt.xlabel('Sepal length') plt.ylabel('Sepal width') plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(()) plt.show()
def plot_data(models,dataframe,flag = 0): """need good and bad models, plots all the data""" if flag == 0: for key in models[0]: g=dataframe[(dataframe['module_category']==key[0]) & \ (dataframe['component_category']==key[1])] plt.scatter(g['time'],g['number_repair'],c =np.random.rand(3,1)) plt.xlabel("Time") plt.ylabel("number of repairs") plt.title("%s, and %s" %(key[0], key[1])) plt.show() if flag ==1: for key in models[1]: g=dataframe[(dataframe['module_category']==key[0]) & \ (dataframe['component_category']==key[1])] plt.scatter(g['time'],g['number_repair'],c =np.random.rand(3,1)) plt.xlabel("Time") plt.ylabel("number of repairs") if models[1][key] == [1,1,1]: plt.title("too little data: %s, and %s" %(key[0],key[1])) else: plt.title("no curve fit: %s, and %s" %(key[0], key[1])) plt.show()
def export(self, query, n_topics, n_words, title="PCA Export", fname="PCAExport"): vec = DictVectorizer() rows = topics_to_vectorspace(self.model, n_topics, n_words) X = vec.fit_transform(rows) pca = skPCA(n_components=2) X_pca = pca.fit(X.toarray()).transform(X.toarray()) match = [] for i in range(n_topics): topic = [t[1] for t in self.model.show_topic(i, len(self.dictionary.keys()))] m = None for word in topic: if word in query: match.append(word) break pyplot.figure() for i in range(X_pca.shape[0]): pyplot.scatter(X_pca[i, 0], X_pca[i, 1], alpha=.5) pyplot.text(X_pca[i, 0], X_pca[i, 1], s=' '.join([str(i), match[i]])) pyplot.title(title) pyplot.savefig(fname) pyplot.close()
def scatter(frame, var1, var2, var3=None, reg=False, **args): import matplotlib.cm as cm if type(frame) is copper.Dataset: frame = frame.frame x = frame[var1] y = frame[var2] if var3 is None: plt.scatter(x.values, y.values, **args) else: options = list(set(frame[var3])) for i, option in enumerate(options): f = frame[frame[var3] == option] x = f[var1] y = f[var2] c = cm.jet(i/len(options),1) plt.scatter(x, y, c=c, label=option, **args) plt.legend() if reg: slope, intercept, r_value, p_value, std_err = stats.linregress(x,y) line = slope * x + intercept # regression line plt.plot(x, line, c='r') plt.xlabel(var1) plt.ylabel(var2)
def kmeans(points, k): centroids = random.sample(points, k) allColors = list(colors.cnames.keys()) iterations = 0 oldCentroids = None while not shouldStop(oldCentroids, centroids, iterations): oldCentroids = centroids iterations += 1 #we need numpy arrays to do some cool linalg stuff points = np.array(points) centroids = np.array(centroids) labels = getLabels(points, centroids) centroids = getCentroids(points, labels, k) #plotting centroids as a red star x, y = zip(*centroids) plt.scatter(x,y, marker = '*', color = 'r', s = 80) #life is a coloring book so lets put colors on stuff counter = 0 for centroid in labels.keys(): for point in labels[centroid]: plt.scatter(point[0], point[1], color = allColors[counter]) #6 was chosen to avoid white, white is apparantly some multiple of 5 counter += 6 print (iterations) return centroids
def plot_contour_with_labels(contour, frame_index=0): """ Makes a beautiful plot with all the points labeled. Parameters: One frame's worth of a contour """ contour_x = contour[:, 0, frame_index] contour_y = contour[:, 1, frame_index] plt.plot(contour_x, contour_y, 'r', lw=3) plt.scatter(contour_x, contour_y, s=35) labels = list(str(l) for l in range(0, len(contour_x))) for label_index, (label, x, y), in enumerate( zip(labels, contour_x, contour_y)): # Orient the label for the first half of the points in one direction # and the other half in the other if label_index <= len(contour_x) // 2 - \ 1: # Minus one since indexing xytext = (20, -20) # is 0-based else: xytext = (-20, 20) plt.annotate( label, xy=( x, y), xytext=xytext, textcoords='offset points', ha='right', va='bottom', bbox=dict( boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict( arrowstyle='->', connectionstyle='arc3,rad=0')) # , xytext=(0,0))
def visualizeEigenvalues(eVal, verboseLevel): real = [] imag = [] for z in eVal: rp = z.real im = z.imag if not (rp == np.inf or rp == - np.inf) \ and not (im == np.inf or im == - np.inf): real.append(rp) imag.append(im) if verboseLevel>=1: print("length of regular real values=" + str(len(real))) print("length of regular imag values=" + str(len(imag))) print("minimal real part=" + str(min(real)), "& maximal real part=" + str(max(real))) print("minimal imag part=" + str(min(imag)), "& maximal imag part=" + str(max(imag))) if verboseLevel==2: print("all real values:", str(real)) print("all imag values:", str(imag)) # plt.scatter(real[4:],img[4:]) plt.scatter(real, imag) plt.grid(True) plt.xlabel("realpart") plt.ylabel("imagpart") plt.xlim(-10, 10) plt.ylim(-10, 10) plt.show()
def plotscatterdate(x,y): plt.scatter(x,y) plt.xlim(0,) plt.xlabel('Number of Railways') plt.ylabel('Price in Pounds') plt.title('Scatter of Price against Number of Railways') plt.show()
def plot_convergence(): data = np.loadtxt("smooth-error.out") nx = data[:,0] aerr = data[:,1] ax = plt.subplot(111) ax.set_xscale('log') ax.set_yscale('log') plt.scatter(nx, aerr, marker="x", color="r") plt.plot(nx, aerr[0]*(nx[0]/nx)**2, "--", color="k") plt.xlabel("number of zones") plt.ylabel("L2 norm of abs error") plt.title(r"convergence for smooth advection problem", fontsize=11) f = plt.gcf() f.set_size_inches(5.0,5.0) plt.xlim(8,256) plt.savefig("smooth_converge.eps", bbox_inches="tight")
# scar_loc = np.append(scar_loc,find_nearest_state(4.2,-4.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(5.2,-5.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1,-0.7,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1,-1.2,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1.5,-1.6,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1.5,-1.9,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-2.4,-2.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-2.4,-2.3,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-3.2,-2.5,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-4.2,-3.5,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-4.2,-4.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-5.2,-5.0,e,overlap)) # scar_loc = np.sort(scar_loc.astype(int)) plt.scatter(e, overlap) for n in range(0, np.size(scar_loc, axis=0)): plt.scatter(e[scar_loc[n]], overlap[scar_loc[n]], s=200, color="red", alpha=0.6) for n in range(0, np.size(subband_loc, axis=0)): plt.scatter(e[subband_loc[n]], overlap[subband_loc[n]], s=200, color="cyan", alpha=0.6) plt.show()
k1 = pd.DataFrame(df.iloc[:, 0:1]) k2 = pd.DataFrame(df.iloc[:, 1:2]) k3 = pd.DataFrame(df.iloc[:, 2:3]) diff = pd.DataFrame(df.iloc[:, 4:5]) xinv = pd.DataFrame(np.linalg.pinv(x.values), x.columns, x.index) theta = pd.DataFrame(np.dot(xinv, y)) print(theta) output = pd.DataFrame(np.dot(x, theta)) print(np.sqrt(metrics.mean_squared_error(y, output))) plt.scatter(diff, y) plt.xlabel('difficulty') plt.ylabel('average marks') #plt.plot(diff,output,'r') plt.scatter(diff, output) plt.show() #getting the python output to an html page sum = 0 while (1): print('enter the mark distribution') ip = [] for i in range(3):
test = data[~mask] # import Linear Regression model from sklearn import linear_model regr = linear_model.LinearRegression() # convert the column (list) into array data structure train_x = np.asanyarray(train[["ENGINE_SIZE"]]) # almost same as np.array train_y = np.asanyarray(train[["CO2"]]) # fit into the module => Output: regr.coefficient, regr.interception regr.fit(train_x, train_y) # remember the basic linear formel: y = (coefficient)*x + (interception) !!!!!! print('Coefficients: ', regr.coef_) print('Intercept: ', regr.intercept_) # Plot outputs !!!!!!!!!!!! plt.scatter(train.ENGINE_SIZE, train.CO2, color='blue') plt.plot(train_x, regr.coef_[0][0] * train_x + regr.intercept_[0], '-r') # !!!!!!!!!!!!!!!!!!!! plt.xlabel("ENGINE_SIZE") plt.ylabel("Emission") plt.show() ############ Evaluation #################3 from sklearn.metrics import r2_score test_x = np.asanyarray(test[['ENGINE_SIZE']]) test_y = np.asanyarray(test[['CO2']]) test_y_predict = regr.predict(test_x) print("Mean absolute error: {:.2f}".format( np.mean(np.absolute(test_y_predict - test_y))))
from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() sc_y = StandardScaler() X = sc_X.fit_transform(X) y = sc_y.fit_transform(y) # Fitting SVR to the dataset from sklearn.svm import SVR regressor = SVR(kernel='rbf') regressor.fit(X, y) # Predicting a new result y_pred = sc_y.inverse_transform( regressor.predict(sc_X.transform(np.array([[6.5]])))) # Visualising the Regression results plt.scatter(X, y, color='red') plt.plot(X, regressor.predict(X), color='blue') plt.title('Truth or Bluff SVR Model') plt.xlabel('Position level') plt.ylabel('Salary') plt.show() # Visualising the Regression results (for higher resolution and smoother curve) X_grid = np.arange(min(X), max(X), 0.1) X_grid = X_grid.reshape((len(X_grid), 1)) plt.scatter(X, y, color='red') plt.plot(X_grid, regressor.predict(X_grid), color='blue') plt.title('Truth or Bluff (Regression Model)') plt.xlabel('Position level') plt.ylabel('Salary') plt.show()
for i in range(1, 11): kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42) kmeans.fit(X) wcss.append(kmeans.inertia_) plt.plot(range(1, 11), wcss) plt.title('The Elbow Method') plt.xlabel('Number of clusters') plt.ylabel('WCSS') plt.show() # Fitting K-Means to the dataset kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42) y_kmeans = kmeans.fit_predict(X) # Visualising the clusters plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1') plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3') plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids') plt.title('data_scientist_v2') plt.xlabel('Applications') plt.ylabel('Selected applicants') plt.legend() plt.show()
x_show = np.stack((x1.flat, x2.flat), axis=1) # 测试点 print(x_show.shape) cm_light = mpl.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF']) cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b']) y_show_hat = model.predict(x_show) # 预测值 # print( y_show_hat.shape) # print(y_show_hat) y_show_hat = y_show_hat.reshape(x1.shape) # 使之与输入的形状相同 print(y_show_hat) plt.figure(1, figsize=(10, 4), facecolor='w') plt.subplot(1, 2, 1) # 1行2列的第一张子图 plt.pcolormesh(x1, x2, y_show_hat, cmap=cm_light) # 预测值的显示 plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test.ravel(), edgecolors='k', s=150, zorder=10, cmap=cm_dark, marker='*') # 测试数据 plt.scatter(x[:, 0], x[:, 1], c=y.ravel(), edgecolors='k', s=40, cmap=cm_dark) # 全部数据 plt.xlabel('sepal length', fontsize=15) plt.ylabel('sepal width', fontsize=15) plt.xlim(x1_min, x1_max) plt.ylim(x2_min, x2_max) plt.grid(True)
plt.plot(errors[0], label=kernel_name) plt.legend(loc='best') plt.xlabel('Days to predict') plt.ylabel('Relative absolute error') plt.title('Errors as a function of time') # %% # Our conclusion from the above is that the shape of the error does not # depend much on the kernel # %% # We now plot the error after 4 days as a function of kernel params plt.figure() error, start, middle = zip(*errors_by_kernel.values()) plt.scatter(start, middle, np.array(error)[:, 1]) plt.scatter(start, middle, s=300 * np.array(error)[:, 1], c=np.array(error)[:, 3], marker='o') plt.colorbar() plt.xlabel('start parameter') plt.ylabel('middle parameter') plt.title('Errors as a function of ramp kernel parameter') # %% # These results tell us that we want a ramp with a length of 10 and # ramping all the way # %%
sin_default.append(float(splitted[1])) sin_quirez.append(float(splitted[2])) sin_ff.append(float(splitted[3])) sin_quirez_ff.append(float(splitted[4])) sin_float.append(float(splitted[5])) sin_mpfr.append(float(splitted[6])) sin_default_abs.append(abs(float(splitted[1]) - float(splitted[6]))) sin_quirez_abs.append(abs(float(splitted[2]) - float(splitted[6]))) sin_ff_abs.append(abs(float(splitted[3]) - float(splitted[6]))) sin_quirez_ff_abs.append(abs(float(splitted[4]) - float(splitted[6]))) sin_float_abs.append(abs(float(splitted[5]) - float(splitted[6]))) plt.rcParams.update({'font.size': 9}) plt.scatter(theta[beginIndex:endIndex], sin_mpfr[beginIndex:endIndex], color='xkcd:grey', marker = "o", label="Real", s = 70) plt.scatter(theta[beginIndex:endIndex], sin_quirez_ff[beginIndex:endIndex], color='k', marker = "*", label="Our CORDIC (posit)") plt.scatter(theta[beginIndex:endIndex], sin_float[beginIndex:endIndex], color='b', marker = "x", label="Naive CORDIC (float)") plt.ylim(2.55 * pow(10, -6), 4 * pow(10, -6)) plt.xlim(1.57079245, 1.57079365) plt.xticks([1.5707925, 1.5707930, 1.5707935]) f = mtick.ScalarFormatter(useOffset=False, useMathText=True) g = lambda x,pos : "${}$".format(f._formatSciNotation('%1.10e' % x)) #h = lambda x,pos : "$2^{{-{}}}$".format('%.f' % x) h = lambda x,pos : "${}$".format(f._formatSciNotation('%1.10e' % x)) plt.gca().yaxis.set_major_formatter(mtick.FuncFormatter(g)) plt.gca().xaxis.set_major_formatter(mtick.FuncFormatter(h)) plt.legend() plt.xlabel(r"$\theta$")
stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Logistic Regression (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() plt.show() # Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = X_test, y_test X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1,
def plotting(points): print(points) plt.scatter(points[:,0], points[:,1]) plt.show()
x_test = x[split:] y = dataset[labels[1]].reshape(len(dataset[labels[1]]), 1) y_train = y[:split] y_test = y[split:] #Creating the linear regression model object reg = linear_model.LinearRegression() #Fitting the model on the training data reg.fit(x_train, y_train) #Getting the coefficients that were produced for y = mx+b print "Coefficients:", reg.coef_ #Calculating the mse (Mean Squared Error) print "MSE:", np.mean((reg.predict(x_test) - y_test)**2) #Calculatig the variance score of the model print "Variance:", reg.score(x_test, y_test) #Plotting the output # Plot outputs plt.scatter(x_test, y_test, color='black') plt.plot(x_test, reg.predict(x_test), color='blue', linewidth=3) plt.xticks(()) plt.yticks(()) plt.show()
from matplotlib import pyplot as plt # Первый каноничный импорт import pandas as pd # Второй каноничный импорт для обработки DataSet'а plt.style.use('fivethirtyeight') # Назначаем стилистику визуализации data_set = pd.read_csv('Rap.csv') # Считываем данные SCV-файла с DataSet'ом bpm = data_set['bpm'] # Переменная для параметра BPM в каждой строке year = data_set['year'] # Переменная для параметра "год релиза" в каждой строке plt.scatter( # Построение точечного графика и его настройка bpm, year, c=bpm, s=bpm*1.5, cmap='gist_heat', edgecolor='black', linewidth=.7 ) bar = plt.colorbar( # Построение шкалы BPM orientation='horizontal', shrink=0.8, extend='both', extendfrac=.1 ) bar.set_label('Шкала ударов в минуту', fontsize=18) # Подпись шкалы plt.title('Популярность скорости ' # Заголовок графика 'исполнения в Rap\'е ', fontsize=25) plt.xlabel('BPM', fontsize=18) # Ось абсцисс
def optimize(self, x, y): self.optimizer.zero_grad() _, loss = self.forward_with_loss(x, y) loss.backward() self.optimizer.step() return loss # train model = Model() for epoch in range(100): for x, y in data_loader: loss = model.optimize(x, y) print("Epoch {:4d} | loss : {:f}".format(epoch, loss)) # feed mu, sigma = model(torch.tensor(x_space)) mu = mu.detach().numpy() sigma = sigma.detach().numpy() # visualize plt.scatter(train_x, train_y, marker='x', c='black', alpha=0.5, label='train data') plt.plot(x_space, mu, label='prediction mean') plt.fill_between(x_space, mu + sigma, mu - sigma, alpha=0.5, label='+-') plt.plot(x_space, true_y, label='true function') plt.legend() plt.ylim(-0.5, 1.3) plt.show()
ma_X = rules.antecedents.apply(to_list) + rules.consequents.apply(to_list) ma_X = ma_X.apply(sorted) rules_sets = list(ma_X) unique = [list(m) for m in set(tuple(i) for i in rules_sets)] index_rules = [] for i in unique: index_rules.append(rules_sets.index(i)) rules_r = rules.iloc[ index_rules, :] # getting rules without any redudancy, 18 rules rules_r.sort_values('lift', ascending=False).head(10) # visualizing results plt.scatter(rules_r['support'], rules_r['confidence'], alpha=0.5) plt.xlabel('support') plt.ylabel('confidence') plt.title('Support vs Confidence') plt.scatter(rules_r['support'], rules_r['lift'], alpha=0.5) plt.xlabel('support') plt.ylabel('lift') plt.title('Support vs lift') fit = np.polyfit(rules_r['lift'], rules_r['confidence'], 1) # 1 denote degree of polynomial fit_fn = np.poly1d(fit) plt.plot(rules_r['lift'], rules_r['confidence'], 'rs', rules_r['lift'], fit_fn(rules_r['lift'])) plt.xlabel('lift')
stride = 1 XX = cases[0:cases.size - q - lag * dd:stride] for i in range(1,lag): X=cases[i*dd:cases.size - q - (lag - i) * dd:stride] M=mobility[i*dd:mobility.size - q - (lag - i) * dd:stride] XX=np.column_stack((XX,X,M)) yy = cases[lag*dd+q::stride]; tt = t[lag*dd+q::stride] model = Ridge(alpha=a, fit_intercept=False).fit(XX, yy) print(XX) print(model.intercept_, model.coef_) fakeMobility = np.array([-.4]*288).reshape(-1,1) ZZ = cases[0:cases.size - q - lag * dd:stride] for i in range(1,lag): X=cases[i*dd:cases.size - q - (lag - i) * dd:stride] M=fakeMobility[i*dd:fakeMobility.size - q - (lag - i) * dd:stride] ZZ=np.column_stack((ZZ,X,M)) yy = cases[lag*dd+q::stride]; tt = t[lag*dd+q::stride] model = Ridge(alpha=a, fit_intercept=False).fit(ZZ, yy) fakePred = model.predict(ZZ) plt.scatter(t, cases, color='black'); plt.scatter(tt, fakePred, color='blue') plt.plot(t, fakeMobility, color='r') plt.title('Mobility Set To -40%') plt.xlabel("time (days)"); plt.ylabel("#cases") plt.legend(["mobility", "training data","predictions"],loc='upper right') plt.show()
def draw_Battery_Use(consumption, total_power, solar_power, wind_power, dic, configuration, max_power): # Creating mutiple text variables to display in the graph power_generated = total_power.sum() power = total_power total_power = np.mean(np.reshape(total_power[:8760], (365, 24)), axis=1) t1 = "Storage capacity: \nAmount of windturbines: \nCable area: \nMaximum Power Output: \nTotal Power Generated: \nTotal costs: " t2 = str(int(dic['total_storage'])) + " kWh\n" + \ str(int(configuration[-2])) + "\n" + \ str(int(dic['cable_area'])) + " mm²\n" + \ str(int(max_power)) + " kW\n" + \ str(int(power_generated)) + " kWh\n" +\ '€' + str(int(dic['cost'])) # Creating the solar stats text variables to display in the graph t3 = "" for I in range(4): if configuration[0 + I * 3] > 0: t3 = t3 + "SP" + str(I + 1) + " - Area: " + str(int(configuration[0 + I*3])) +\ "m² - Angle: " + str(int(configuration[1 + I*3])) +\ "° - Orientation: " + str(int(configuration[2 + I*3])) + "°\n" plt.subplot(2, 1, 1) plt.plot(total_power, color='green', alpha=0.5, label='Total energy production') plt.plot(solar_power, color='yellow', alpha=0.5, label='Solar energy') plt.plot(wind_power, color='blue', alpha=0.5, label='Wind energy') plt.plot(consumption, color='red', label='Energy demand') plt.text(330, total_power.max() * 1.04, t2, ha='left', va='top', style='italic', wrap=False) plt.text(330, total_power.max() * 1.04, t1, ha='right', va='top', wrap=False) plt.text(362, total_power.max() * 0.725, t3, ha='right', va='top', wrap=False) plt.legend(loc='upper center') plt.title("Power Average per Day") plt.xlabel('Days') plt.ylabel('kW') plt.xlim(0, 365) plt.subplot(2, 1, 2) power = power - 6000 for x in range(2): if x == 0: batterycharge = [int(dic['total_storage'])] else: batterycharge = [batterycharge[-1]] Powershortage = [] for I in power: batterycharge.append(batterycharge[-1] + I) if (int(dic['total_storage']) < batterycharge[-1]): batterycharge[-1] = int(dic['total_storage']) elif (0 > batterycharge[-1]): batterycharge[-1] = 0 Powershortage.append(len(batterycharge) - 1) plt.plot(batterycharge, color='green', alpha=0.5) if len(Powershortage) == 0: plt.scatter(np.zeros(len(Powershortage)), Powershortage, color='red') plt.title("Power supply level over a Year") plt.xlabel('Hour') plt.ylabel('kWh') plt.xlim(0, 8760) plt.show()
def DataPlot(self, data, name_of_bed, fig=False, save_data=False): ''' Method to plot data args:: data: Data to be plotted name_of_bed: Bed to be plotted fig =False (To plot a shart without saving the plot) or Yes (to save the plot) save_data = False(This prevent the data from saving as a CSV file to save, pass True) ''' self.data = data self.name_of_bed = name_of_bed self.fig = fig self.save_data = save_data assert isinstance( self.name_of_bed, str ), f"Name of Bed must be a String, but {type(self.name_of_bed)} was passed." if self.fig == True or self.save_data == False: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='b') plt.scatter(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold') fig.savefig(f'{self.name_of_bed}.png', pdi=fig.dpi) elif self.fig == False or self.save_data == True: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='b') plt.scatter(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold') data.to_csv(f'{self.name_of_bed}.csv', index=False) elif self.fig == True or self.save_data == True: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='b') plt.scatter(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold') fig.savefig(f'{self.name_of_bed}.png', pdi=fig.dpi) data.to_csv(f'{self.name_of_bed}.csv', index=False) else: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='b') plt.scatter(self.data['Phi_scale'], self.data['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold')
y_train = np.array(y[:num_training]) # Test data X_test = np.array(X[num_training:]).reshape((num_test,1)) y_test = np.array(y[num_training:]) # Create linear regression object linear_regressor = linear_model.LinearRegression() # Train the model using the training sets linear_regressor.fit(X_train, y_train) # Predict the train data output y_train_pred = linear_regressor.predict(X_train) plt.figure() plt.scatter(X_train, y_train, color='green') plt.plot(X_train, y_train_pred, color='black', linewidth=4) plt.title('Training data') plt.show() # Predict the test data output y_test_pred = linear_regressor.predict(X_test) plt.figure() plt.scatter(X_test, y_test, color='green') plt.plot(X_test, y_test_pred, color='black', linewidth=4) plt.title('Test data') plt.xticks(()) plt.yticks(()) plt.show() # Measure performance
data.shape data.dtypes data.describe() data.info() #find rows where the number of returns is 0 and drop these rows data[data['N1']==0].count() data.drop(data[data.N1 == 0].index, inplace=True) data.shape data[data.N1 == 0] #create refund column data['refund'] = data['A11902']/data['N11902'] #graphs plt.scatter(data['A85300'],data['refund']) plt.show() plt.scatter(data['N11901'],data['refund']) plt.show() '''#categorical variables without zipcode cat_var = df[['STATE','agi_stub']] cat_var = cat_var.astype(str) dummies = pd.get_dummies(cat_var) dummies.columns dummies.shape''' #create percentage columns df = data.copy() df['single'] = df['mars1']/df['N1'] df['joint'] = df['MARS2']/df['N1']
# Combine lat = np.hstack((np.array(y), np.array(y1)+lat_eur, np.array(y2)+lat_us)) lon = np.hstack((np.array(x), np.array(x1)+lon_eur, np.array(x2)+lon_us)) plt.figure(figsize=(10, 3.5)) labels = ["a", "b", "c"] nbins = 12 for _i in range(3): ax = plt.subplot(131 + _i, projection=AzimuthalEquidistant(0, 0)) ax.set_global() lwsspy.maps.plot_map() weights = lwsspy.geo.azi_weights(0, 0, lat, lon, nbins=nbins*(_i+1)) plt.scatter(lon, lat, c=weights, cmap='rainbow', norm=LogNorm(vmin=min(weights), vmax=max(weights)), transform=PlateCarree(), edgecolors='k', linewidth=0.25) formatter = ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) cb = lwsspy.plot.nice_colorbar(orientation='horizontal', ticks=[0.3, 0.4, 0.6, 1.0, 1.5, 2.0, 3.0], # np.arange(0.3, 3.0, 0.3), format=formatter, aspect=40, pad=0.05) lwsspy.plot.plot_label( ax, f"{labels[_i]})", location=6, box=False, dist=0.0) cb.set_label("Weights") plt.title(f"$N_b = {nbins*(_i+1)}$", fontdict=dict(fontsize='small')) lwsspy.plot.plot_label( ax, f"min: {np.min(weights):3.2f}\n" f"max: {np.max(weights):3.2f}\n" f"median: {np.median(weights):3.2f}\n", location=3, box=False, dist=-0.1, fontdict=dict(fontsize='small'))
for cluster in clusters: x = [point[0] for point in cluster] y = [point[1] for point in cluster] z = [point[2] for point in cluster] if col != len(clusters): plt.plot(x, y, z, marker = "o", c = "{}".format(colors[col]), ms = 3) else: plt.plot(x, y, z, marker = "o", c = "w", alpha = 0.3, ms = 2) col += 1 print(time.clock() - start) start = time.clock() fig = plt.figure() ax = plt.gca() ax.set_axis_bgcolor((0, 0, 0)) col = 1 for cluster in clusters: x = [point[0] for point in cluster] y = [point[1] for point in cluster] if col != len(clusters): plt.scatter(x, y, s = 10, c = "{}".format(colors[col])) else: plt.scatter(x, y, s = 5, c = "w", alpha = 0.15) col += 1 print(time.clock() - start)
# -------------- import matplotlib.pyplot as plt # code starts here X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=6) cols=X_train.columns fig,axes=plt.subplots(nrows=3,ncols=3,figsize=(15,10)) for i in range(3): for j in range(3): col=cols[i * 3 + j] plt.scatter(X_train[col],y_train) # code ends here # -------------- # Code starts here corr = X_train.corr() corr del X_train['play_star_rating'] del X_train['val_star_rating'] del X_test['play_star_rating'] del X_test['val_star_rating'] # Code ends here
import matplotlib.pyplot as plt from sklearn import datasets def pca(data, base_num = 1): n, d = data.shape #n:データ数 d:次元数 n>dじゃないとダメです。 data_mean = data.mean(0) data_norm = data - data_mean cov = np.dot(data_norm.T, data_norm) / float(n) w, vl = spla.eig(cov) index = w.argsort()[-min(base_num, d) :] t = vl[:, index[:: -1]].T return t if __name__ == "__main__": data = np.random.multivariate_normal([0, 0], [[1, 2], [3, 4]], 100) iris = datasets.load_iris() print iris.data[:, :4] data = iris.data[:, :2] base = pca(data) #data = np.dot(data,base) #ここから可視化 plt.scatter(data[:, 0], data[:, 1]) leng = (data.max()-data.min())/2 pc_line = np.array([-leng, leng]) * (base[0][1] / base[0][0]) plt.plot([-leng, leng], pc_line, "r") #plt.show()
def visualize_points(x, y, alpha=1., colors=('red', 'green')): c = [] for i in y: c.append(colors[i]) plt.scatter(x[:, 0], x[:, 1], color=c, alpha=alpha) plt.grid(True)
def finalize(self, population, engine): best_indv = population.best_indv(engine.fitness) x = best_indv.solution y = engine.ori_fmax msg = 'Optimal solution: ({}, {})'.format(x, y) self.logger.info(msg) if '__main__' == __name__: # Run the GA engine and print every generation engine.run(ng=500) best_indv = engine.population.best_indv(engine.fitness) print('Max({0},{1})'.format(best_indv.solution[0], engine.fitness(best_indv))) x = np.linspace(0, 15, 10000) y = [-3 * (i - 30)**2 * math.sin(i) for i in x] plt.plot(x, y) plt.xlabel('x') plt.ylabel('y') plt.title('function') plt.axis([-1, 16, -3000, 3000]) plt.scatter(best_indv.solution[0], engine.fitness(best_indv), color='r') a = round(best_indv.solution[0], 4) b = round(engine.fitness(best_indv), 4) plt.annotate('Max(' + str(a) + ',' + str(b) + ')', xy=(best_indv.solution[0], engine.fitness(best_indv)), xytext=(7, 2500), arrowprops=dict(facecolor='black', shrink=0.1, width=2)) plt.show()
def show(): d = pd.read_excel("./one_hot.xlsx", sheetname='123') d = np.array(d) scaler = preprocessing.StandardScaler().fit(d) d = scaler.transform(d) #以下是将聚类结果可视化出来 #PCA(n_components=2)表示将4个特征的向量降维到二维,即可以画在平面 pca_model = PCA(n_components=2) #将iris.data转换成标准形式,然后存入reduced_data中 reduced_data = pca_model.fit_transform(d) iters, centers, assignments = k_means_cluster(reduced_data, 8) print(centers, assignments) assignments1 = pd.DataFrame(assignments) print(assignments1) assignments1.to_excel("./one_hot1.xlsx", sheet_name="234", index=False, header=True) #h表示间距 h = .2 #下面求x_min, x_max和y_min, y_max,主要是为了确定坐标轴 x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1 y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) xx_pt = list(xx.ravel()) yy_pt = list(yy.ravel()) xy_pts = np.array([[x, y] for x, y in zip(xx_pt, yy_pt)]) mytree = cKDTree(centers) dist, indexes = mytree.query(xy_pts) indexes = indexes.reshape(xx.shape) #下面使用matplotlib将图给画出来 plt.clf() plt.imshow(indexes, interpolation='nearest', extent=(xx.min(), xx.max(), yy.min(), yy.max()), cmap=plt.cm.Paired, aspect='auto', origin='lower') symbols = ['o', '^', 'D', 's', '.', ',', '<', '*'] #sym=[sysmbols[i] for i in assignments] for i in range(8): x = [] y = [] for j in range(assignments.shape[0]): if assignments[j] == i: x.append(reduced_data[j][0]) y.append(reduced_data[i][1]) plt.plot(x, y, symbols[i], markersize=10) """ temp_group = reduced_data[(i*50) : (50)*(i+1)] plt.plot(temp_group[:, 0], temp_group[:, 1], symbols[i], markersize=10) """ plt.scatter(centers[:, 0], centers[:, 1], marker='x', color='black', s=169, linewidths=3, zorder=10) plt.title('K-means clustering') plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) plt.savefig('./whfypca.png') plt.show()
pca = PCA(n_components=2) X_r = pca.fit(X).transform(X) lda = LinearDiscriminantAnalysis(n_components=2) X_r2 = lda.fit(X, y).transform(X) # Percentage of variance explained for each components print('explained variance ratio (first two components): %s' % str(pca.explained_variance_ratio_)) plt.figure() colors = ['navy', 'turquoise', 'darkorange'] lw = 2 for color, i, target_name in zip(colors, [0, 1, 2], target_names): plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, lw=lw, label=target_name) plt.legend(loc='best', shadow=False, scatterpoints=1) plt.title('PCA of IRIS dataset') plt.figure() for color, i, target_name in zip(colors, [0, 1, 2], target_names): plt.scatter(X_r2[y == i, 0], X_r2[y == i, 1], alpha=.8, color=color, label=target_name) plt.legend(loc='best', shadow=False, scatterpoints=1) plt.title('LDA of IRIS dataset') # plt.show() import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA, KernelPCA
#!/usr/bin/env python # -*- noplot -*- """ This example demonstrates how to set a hyperlinks on various kinds of elements. This currently only works with the SVG backend. """ import numpy as np import matplotlib.cm as cm import matplotlib.mlab as mlab import matplotlib.pyplot as plt f = plt.figure() s = plt.scatter([1, 2, 3], [4, 5, 6]) s.set_urls(['http://www.bbc.co.uk/news', 'http://www.google.com', None]) f.canvas.print_figure('scatter.svg') f = plt.figure() delta = 0.025 x = y = np.arange(-3.0, 3.0, delta) X, Y = np.meshgrid(x, y) Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0) Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1) Z = Z2 - Z1 # difference of Gaussians im = plt.imshow(Z, interpolation='bilinear', cmap=cm.gray, origin='lower', extent=[-3, 3, -3, 3])
# !/usr/bin/python3' import numpy as np from matplotlib.pyplot import scatter from matplotlib.pyplot import show a = np.loadtxt('magic04.txt', delimiter=',', usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) b = a[:, 0] # 取出属性一 c = a[:, 1] # 取出属性二 covariance = np.cov(b, c) corrc = np.corrcoef(b, c) # 计算相关系数矩阵 print(corrc) scatter(b, c, 20, b, ".") # 绘制散点图 show()
from sklearn import linear_model reg = linear_model.LinearRegression() reg.fit(feature_train, target_train) # print("Slope:", reg.coef_) # print("Intercept:", reg.intercept_) pred = reg.predict(feature_test) # Explained variance score: 1 is perfect prediction from sklearn.metrics import r2_score print('Variance score: %.3f' % r2_score(target_test, pred)) ### draw the scatterplot, with color-coded training and testing points import matplotlib.pyplot as plt for feature, target in zip(feature_test, target_test): plt.scatter(feature, target, color=test_color) for feature, target in zip(feature_train, target_train): plt.scatter(feature, target, color=train_color) ### labels for the legend plt.scatter(feature_test[0], target_test[0], color=test_color, label="test") plt.scatter(feature_test[0], target_test[0], color=train_color, label="train") ### draw the regression line, once it's coded try: plt.plot(feature_test, reg.predict(feature_test)) except NameError: pass reg.fit(feature_test, target_test) plt.plot(feature_train, reg.predict(feature_train), color="g")