def plot_models(x, y, models, fname, mx=None, ymax=None, xmin=None): plt.clf() plt.scatter(x, y, s=10) plt.title("Web traffic over the last month") plt.xlabel("Time") plt.ylabel("Hits/hour") plt.xticks([w * 7 * 24 for w in range(10)], ["week %i" % w for w in range(10)]) if models: if mx is None: mx = sp.linspace(0, x[-1], 1000) for model, style, color in zip(models, linestyles, colors): # print "Model:",model # print "Coeffs:",model.coeffs plt.plot(mx, model(mx), linestyle=style, linewidth=2, c=color) plt.legend(["d=%i" % m.order for m in models], loc="upper left") plt.autoscale(tight=True) plt.ylim(ymin=0) if ymax: plt.ylim(ymax=ymax) if xmin: plt.xlim(xmin=xmin) plt.grid(True, linestyle="-", color="0.75")
def show_plot(X, y, n_neighbors=10, h=0.2): # Create color maps cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA', '#AAFFAA', '#AAAAFF','#FFAAAA', '#AAFFAA', '#AAAAFF','#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000','#FF0000',]) for weights in ['uniform', 'distance']: # we create an instance of Neighbours Classifier and fit the data. clf = neighbors.KNeighborsClassifier(n_neighbors, weights=weights), y) clf.n_neighbors = n_neighbors # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, x_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure() plt.pcolormesh(xx, yy, Z, cmap=cmap_light) # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.title("3-Class classification (k = %i, weights = '%s')" % (n_neighbors, weights))
def work(self, **kwargs): self.__dict__.update(kwargs) self.worked = True samples = LGMM1(rng=self.rng, size=(self.n_samples,), **self.LGMM1_kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] centers = .5 * edges[:-1] + .5 * edges[1:] print edges pdf = np.exp(LGMM1_lpdf(centers, **self.LGMM1_kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if plt.scatter(centers, y) plt.plot(centers, pdf) err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def altitude(): global alt, i #we want to create temporary file to parse, so that we don't mess with the nmea.txt file f1 = open('temp.txt', 'w') #creates and opens a writable txt file f1.truncate() #erase contents of file shutil.copyfile('nmea.txt', 'temp.txt') #copy nmea.txt to temp.txt f1.close() #close writable file f1 = open('temp.txt', 'r') #open and read only try: #best to use try/finally so that the file opens and closes correctly for line in f1: #read each line in temp.txt if(line[4] == 'G'): # fifth character in $GPGGA if(len(line) > 50): # when there is a lock, the sentence gets filled with data #print line gpgga = nmea.GPGGA() gpgga.parse(line) alt = gpgga.antenna_altitude i +=1 #increment the counter print i print alt plt.scatter(x=[i], y=[float(alt)], s = 1, c='r') #plot each point finally: f1.close() i=0 #axis is autoscaled plt.ylabel('meters') plt.xlabel('counts') plt.title('ALTITUDE')
def plot_2d_simple(data,y=None): if y==None: plt.scatter(data[:,0],data[:,1],s=50) else: nY=len(y) Ycol=[collist[ y.astype(int)[i] -1 % len(collist)] for i in xrange(nY)] plt.scatter(data[:,0],data[:,1],c=Ycol,s=40 )
def scatter_time_vs_s(time, norm, point_labels, title): plt.figure() size = 100 for i, l in enumerate(sorted(norm.keys())): if l is not "fbpca": plt.scatter(time[l], norm[l], label=l, marker='o', c='b', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, -80), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) else: plt.scatter(time[l], norm[l], label=l, marker='^', c='red', s=size) for label, x, y in zip(point_labels, list(time[l]), list(norm[l])): plt.annotate(label, xy=(x, y), xytext=(0, 30), textcoords='offset points', ha='right', arrowprops=dict(arrowstyle="->", connectionstyle="arc3"), va='bottom', size=11, rotation=90) plt.legend(loc="best") plt.suptitle(title) plt.ylabel("norm discrepancy") plt.xlabel("running time [s]")
def work(self): self.worked = True kwargs = dict( weights=self.weights, mus=self.mus, sigmas=self.sigmas, low=self.low, high=self.high, q=self.q, ) samples = GMM1(rng=self.rng, size=(self.n_samples,), **kwargs) samples = np.sort(samples) edges = samples[::self.samples_per_bin] #print samples pdf = np.exp(GMM1_lpdf(edges[:-1], **kwargs)) dx = edges[1:] - edges[:-1] y = 1 / dx / len(dx) if plt.scatter(edges[:-1], y) plt.plot(edges[:-1], pdf) err = (pdf - y) ** 2 print np.max(err) print np.mean(err) print np.median(err) if not assert np.max(err) < .1 assert np.mean(err) < .01 assert np.median(err) < .01
def scatter(x, y, equal=False, xlabel=None, ylabel=None, xinvert=False, yinvert=False): """ Plot a scatter with simple formatting options """ plt.scatter(x, y, 200, color=[0.3, 0.3, 0.3], edgecolors="white", linewidth=1, zorder=2) sns.despine() if xlabel: plt.xlabel(xlabel) if ylabel: plt.ylabel(ylabel) if equal: plt.axes().set_aspect("equal") plt.plot([0, max([x.max(), y.max()])], [0, max([x.max(), y.max()])], color=[0.6, 0.6, 0.6], zorder=1) bmin = min([x.min(), y.min()]) bmax = max([x.max(), y.max()]) rng = abs(bmax - bmin) plt.xlim([bmin - rng * 0.05, bmax + rng * 0.05]) plt.ylim([bmin - rng * 0.05, bmax + rng * 0.05]) else: xrng = abs(x.max() - x.min()) yrng = abs(y.max() - y.min()) plt.xlim([x.min() - xrng * 0.05, x.max() + xrng * 0.05]) plt.ylim([y.min() - yrng * 0.05, y.max() + yrng * 0.05]) if xinvert: plt.gca().invert_xaxis() if yinvert: plt.gca().invert_yaxis()
def draw(data, classes, model, resolution=100): mycm ='Paired') one_min, one_max = data[:, 0].min()-0.1, data[:, 0].max()+0.1 two_min, two_max = data[:, 1].min()-0.1, data[:, 1].max()+0.1 xx1, xx2 = np.meshgrid(np.arange(one_min, one_max, (one_max-one_min)/resolution), np.arange(two_min, two_max, (two_max-two_min)/resolution)) inputs = np.c_[xx1.ravel(), xx2.ravel()] z = [] for i in range(len(inputs)): z.append(predict(model, inputs[i])[0]) result = np.array(z).reshape(xx1.shape) plt.contourf(xx1, xx2, result, cmap=mycm) plt.scatter(data[:, 0], data[:, 1], s=50, c=classes, cmap=mycm) t = np.zeros(15) for i in range(15): if i < 5: t[i] = 0 elif i < 10: t[i] = 1 else: t[i] = 2 plt.scatter(model[:, 0], model[:, 1], s=150, c=t, cmap=mycm) plt.xlim([0, 10]) plt.ylim([0, 10])
def influence_plot(X, y_true, y_pred, **kwargs): """Produces an influence plot. Parameters ---------- X : array Design matrix. y_true : array_like Observed labels, either 0 or 1. y_pred : array_like Predicted probabilities, floats on [0, 1]. Notes ----- .. plot:: pyplots/ """ r = pearson_residuals(y_true, y_pred) leverages = pregibon_leverages(X, y_pred) delta_X2 = case_deltas(r, leverages) dbetas = pregibon_dbetas(r, leverages) plt.scatter(y_pred, delta_X2, s=dbetas * 800, **kwargs) __, __, y1, y2 = plt.axis() plt.axis((0, 1, y1, y2)) plt.xlabel('Predicted Probability') plt.ylabel(r'$\Delta \chi^2$') plt.tight_layout()
def tuning(x, y, err=None, smooth=None, ylabel=None, pal=None): """ Plot a tuning curve """ if smooth is not None: xs, ys = smoothfit(x, y, smooth) plt.plot(xs, ys, linewidth=4, color="black", zorder=1) else: ys = asarray([0]) if pal is None: pal = sns.color_palette("husl", n_colors=len(x) + 6) pal = pal[2 : 2 + len(x)][::-1] plt.scatter(x, y, s=300, linewidth=0, color=pal, zorder=2) if err is not None: plt.errorbar(x, y, yerr=err, linestyle="None", ecolor="black", zorder=1) plt.xlabel("Wall distance (mm)") plt.ylabel(ylabel) plt.xlim([-2.5, 32.5]) errTmp = err errTmp[isnan(err)] = 0 rng = max([nanmax(ys), nanmax(y + errTmp)]) plt.ylim([0 - rng * 0.1, rng + rng * 0.1]) plt.yticks(linspace(0, rng, 3)) plt.xticks(range(0, 40, 10)) sns.despine() return rng
def regress_show4( yEv, yEv_calc, disp = True, graph = True, plt_title = None, ms_sz = None): # if the output is a vector and the original is a metrix, # the output is translated to a matrix. r_sqr, RMSE, MAE, DAE = estimate_accuracy4( yEv, yEv_calc, disp = disp) if graph: #plt.scatter( yEv.tolist(), yEv_calc.tolist()) plt.figure() if ms_sz is None: ms_sz = max(min( 6000 / yEv.shape[0], 8), 3) # plt.plot( yEv.tolist(), yEv_calc.tolist(), '.', ms = ms_sz) # Change ms plt.scatter( yEv.tolist(), yEv_calc.tolist(), s = ms_sz) ax = plt.gca() lims = [ np.min([ax.get_xlim(), ax.get_ylim()]), # min of both axes np.max([ax.get_xlim(), ax.get_ylim()]), # max of both axes ] # now plot both limits against eachother #ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0) ax.plot(lims, lims, '-', color = 'pink') plt.xlabel('Experiment') plt.ylabel('Prediction') if plt_title is None: plt.title( '$r^2$={0:.1e}, RMSE={1:.1e}, MAE={2:.1e}, MedAE={3:.1e}'.format( r_sqr, RMSE, MAE, DAE)) elif plt_title != "": plt.title( plt_title) # return r_sqr, RMSE, MAE, DAE
def plot(i, pcanc, lr, pp, labelFlag, Y): if len(str(i)) == 1: fig = plt.figure(i) else: fig = plt.subplot(i) if pcanc == 0: plt.title( ' learning_rate: ' + str(lr) + ' perplexity: ' + str(pp)) print("Plotting tSNE") else: plt.title( 'PCA-n_components: ' + str(pcanc) + ' learning_rate: ' + str(lr) + ' perplexity: ' + str(pp)) print("Plotting PCA-tSNE") plt.scatter(Y[:, 0], Y[:, 1], c=colors) if labelFlag == 1: for label, cx, cy in zip(y, Y[:, 0], Y[:, 1]): plt.annotate( label.decode('utf-8'), xy = (cx, cy), xytext = (-10, 10), fontproperties=font, textcoords = 'offset points', ha = 'right', va = 'bottom', bbox = dict(boxstyle = 'round,pad=0.5', fc = 'yellow', alpha = 0.9)) #arrowprops = dict(arrowstyle = '->', connectionstyle = 'arc3,rad=0')) ax.xaxis.set_major_formatter(NullFormatter()) ax.yaxis.set_major_formatter(NullFormatter()) plt.axis('tight') print("Done.")
def create_plots(iterations, data, M, step_size): NN_b, NN_a, E = steepest_decent_training(iterations, data, M, step_size) X = data[:,0] X_cont = np.arange(-10,10,0.1) t = data[:,1] Y = [NN_a(np.array([[x],[1]]))[0] for x in X_cont] f = np.vectorize(lambda(x): sin(x) / x) plt.figure(1) plt.plot(X_cont,Y,label='Neural network \n (M = %d)' % M) plt.plot(X_cont,f(X_cont),label='sinc(x)') plt.legend() plt.savefig('images/nn_vs_real_%d_%d.%s' % (iterations,M,img_format), format=img_format) plt.figure(2) plt.plot(X_cont,Y,label='Neural network \n (M = %d)' % M) plt.scatter(X,t,color='red',label='Training data') plt.legend() plt.savefig('images/nn_vs_training_%d_%d.%s' % (iterations,M,img_format), format=img_format) plt.figure(3) plt.plot(E, label='Error (M = %d)' % M) plt.yscale('log') plt.legend() plt.savefig('images/error_%d_%d.%s' % (iterations,M,img_format), format=img_format)
def plot_dpi_dpr_distribution(args, dpis, dprs, diagnoses): print log.INFO, 'Plotting estimate distributions...' diagnoses = np.array(diagnoses) diagnoses[(0.25 <= diagnoses) & (diagnoses <= 0.75)] = 0.5 # Setup plot fig, ax = plt.subplots() pt.setup_axes(plt, ax) biomarkers_str = args.method if args.biomarkers is None else ', '.join(args.biomarkers) ax.set_title('DP estimation using {0} at {1}'.format(biomarkers_str, ', '.join(args.visits))) ax.set_xlabel('DP') ax.set_ylabel('DPR') plt.scatter(dpis, dprs, c=diagnoses, edgecolor='none', s=25.0, vmin=0.0, vmax=1.0, cmap=pt.progression_cmap, alpha=0.5) # Plot legend # noinspection PyUnresolvedReferences rects = [mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_cn + (0.5,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_mci + (0.5,), linewidth=0), mpl.patches.Rectangle((0, 0), 1, 1, fc=pt.color_ad + (0.5,), linewidth=0)] labels = ['CN', 'MCI', 'AD'] legend = ax.legend(rects, labels, fontsize=10, ncol=len(rects), loc='upper center', framealpha=0.9) legend.get_frame().set_edgecolor((0.6, 0.6, 0.6)) # Draw or save the plot plt.tight_layout() if args.plot_file is not None: plt.savefig(args.plot_file, transparent=True) else: plt.close(fig)
def fig(data, target): #FIXME plt.scatter(data, target, color='black') plt.xticks(()) plt.yticks(())
def plot_words (V,labels=None,color='b',mark='o',fa='bottom'): W = tsne(V,2) i = 0 plt.scatter(W[:,0], W[:,1],c=color,marker=mark,s=50.0) for label,x,y in zip(labels, W[:,0], W[:,1]): plt.annotate(label.decode('utf8'), xy=(x,y), xytext=(-1,1), textcoords='offset points', ha= 'center', va=fa, bbox=dict(boxstyle='round,pad=0.1', fc='white', alpha=0)) i += 1
def plot_obs_expc_new(obs, expc, expc_upper, expc_lower, analysis, log, ax = None): """Modified version of obs-expc plot suggested by R2. The points are separated by whether their CIs are above, below, or overlapping the empirical value Input: obs - list of observed values expc_mean - list of mean simulated values for the corresponding observed values expc_upper - list of the 97.5% quantile of the simulated vlaues expc_lower - list of the 2.5% quantile of the simulated values analysis - whether it is patitions or compositions log - whether the y axis is to be transformed. If True, expc/obs is plotted. If Flase, expc - obs is plotted. ax - whether the plot is generated on a given figure, or a new plot object is to be created """ obs, expc, expc_upper, expc_lower = list(obs), list(expc), list(expc_upper), list(expc_lower) if not ax: fig = plt.figure(figsize = (3.5, 3.5)) ax = plt.subplot(111) ind_above = [i for i in range(len(obs)) if expc_lower[i] > obs[i]] ind_below = [i for i in range(len(obs)) if expc_upper[i] < obs[i]] ind_overlap = [i for i in range(len(obs)) if expc_lower[i] <= obs[i] <= expc_upper[i]] if log: expc_standardize = [expc[i] / obs[i] for i in range(len(obs))] expc_upper_standardize = [expc_upper[i] / obs[i] for i in range(len(obs))] expc_lower_standardize = [expc_lower[i] / obs[i] for i in range(len(obs))] axis_min = 0.9 * min([expc_lower_standardize[i] for i in range(len(expc_lower_standardize)) if expc_lower_standardize[i] != 0]) axis_max = 1.5 * max(expc_upper_standardize) else: expc_standardize = [expc[i] - obs[i] for i in range(len(obs))] expc_upper_standardize = [expc_upper[i] - obs[i] for i in range(len(obs))] expc_lower_standardize = [expc_lower[i] - obs[i] for i in range(len(obs))] axis_min = 1.1 * min(expc_lower_standardize) axis_max = 1.1 * max(expc_upper_standardize) if analysis == 'partition': col = '#228B22' else: col = '#CD69C9' ind_full = [] for index in [ind_below, ind_overlap, ind_above]: expc_standardize_ind = [expc_standardize[i] for i in index] sort_ind_ind = sorted(range(len(expc_standardize_ind)), key = lambda i: expc_standardize_ind[i]) sorted_index = [index[i] for i in sort_ind_ind] ind_full.extend(sorted_index) xaxis_max = len(ind_full) for i, ind in enumerate(ind_full): plt.plot([i, i],[expc_lower_standardize[ind], expc_upper_standardize[ind]], '-', c = col, linewidth = 0.4) plt.scatter(range(len(ind_full)), [expc_standardize[i] for i in ind_full], c = col, edgecolors='none', s = 8) if log: plt.plot([0, xaxis_max + 1], [1, 1], 'k-', linewidth = 1.5) ax.set_yscale('log') else: plt.plot([0, xaxis_max + 1], [0, 0], 'k-', linewidth = 1.5) plt.plot([len(ind_below) - 0.5, len(ind_below) - 0.5], [axis_min, axis_max], 'k--') plt.plot([len(ind_below) + len(ind_overlap) - 0.5, len(ind_below) + len(ind_overlap) - 0.5], [axis_min, axis_max], 'k--') plt.xlim(0, xaxis_max) plt.ylim(axis_min, axis_max) plt.tick_params(axis = 'y', which = 'major', labelsize = 8, labelleft = 'on') plt.tick_params(axis = 'x', which = 'major', top = 'off', bottom = 'off', labelbottom = 'off') return ax
def plot_decision_regions(X, y, classifier, test_idx=None, resolution=0.02): # setup marker generator and color map markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) # plot the decision surface x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) Z = Z.reshape(xx1.shape) plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) # plot class samples for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl) # Highlight test samples if test_idx: X_test, y_test = X[test_idx, :], y[test_idx] plt.scatter(X_test[:, 0], X_test[:, 1], c='', alpha=1.0, linewidths=1, marker='o', s=55, label='test set')
def base(Point):#画出基础 for x in np.linspace(Point[0] - 0.3, Point[0] + 0.3, 100): plt.scatter(x, Point[1], color='k', s=0.1, marker='o', label=str) for t in np.linspace(Point[0] - 0.2, Point[0] + 0.2, 3): P1 = [t, Point[1]] P2 = [t - 0.15, Point[1] - 0.25] line(P1, P2, width=0.1)
def disp_external(init_r=None,extfile=None,rad_scale=1000.,col_scale=1.,cut_zero=True,control=None): '''displaying as scatter plot ''' if extfile: from numpy import loadtxt anal_data['ext_disp']=loadtxt(extfile) nrow=len(anal_data['ext_disp'])//2 anal_data['ext_peri']=anal_data['ext_disp'][:nrow] anal_data['ext_disp']=anal_data['ext_disp'][nrow:] # gpos not correct rad=anal_data['ext_disp'].ravel()*rad_scale col=anal_data['ext_peri'].ravel()*col_scale if control and 'xstep' in control: from numpy import arange ir=arange(0.,ncol)*control['xstep'] ithet=arange(0.,nrow)*control['ystep'] elif 'gpos' in anal_data: ir,itheta=array(anal_data['gpos']).astype('float').transpose() else: return itheta*=pi/180. if init_r: ir=abs('ir')+init_r ix,iy=ir*cos(itheta),ir*sin(itheta) from matplotlib.pyplot import scatter if cut_zero: sel=col>0 rad=rad[sel] col=col[sel] ix=ix[sel] iy=iy[sel] scatter(ix,iy,rad,col,hold=0)
def plt_data(): t = [[0,1], [1,0], [1, 1], [0, 0]] t2 = [1, 1, 1, 0] X = np.array(t) Y = np.array(t2) h = .02 # step size in the mesh logreg = linear_model.LogisticRegression(C=1e5) # we create an instance of Neighbours Classifier and fit the data., Y) # Plot the decision boundary. For that, we will assign a color to each # point in the mesh [x_min, m_max]x[y_min, y_max]. x_min, x_max = X[:, 0].min() - .5, X[:, 0].max() + .5 y_min, y_max = X[:, 1].min() - .5, X[:, 1].max() + .5 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = logreg.predict(np.c_[xx.ravel(), yy.ravel()]) # Put the result into a color plot Z = Z.reshape(xx.shape) plt.figure(1, figsize=(4, 3)) plt.pcolormesh(xx, yy, Z, # Plot also the training points plt.scatter(X[:, 0], X[:, 1], c=Y, edgecolors='k', plt.xlabel('Sepal length') plt.ylabel('Sepal width') plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.xticks(()) plt.yticks(())
def plot_data(models,dataframe,flag = 0): """need good and bad models, plots all the data""" if flag == 0: for key in models[0]: g=dataframe[(dataframe['module_category']==key[0]) & \ (dataframe['component_category']==key[1])] plt.scatter(g['time'],g['number_repair'],c =np.random.rand(3,1)) plt.xlabel("Time") plt.ylabel("number of repairs") plt.title("%s, and %s" %(key[0], key[1])) if flag ==1: for key in models[1]: g=dataframe[(dataframe['module_category']==key[0]) & \ (dataframe['component_category']==key[1])] plt.scatter(g['time'],g['number_repair'],c =np.random.rand(3,1)) plt.xlabel("Time") plt.ylabel("number of repairs") if models[1][key] == [1,1,1]: plt.title("too little data: %s, and %s" %(key[0],key[1])) else: plt.title("no curve fit: %s, and %s" %(key[0], key[1]))
def export(self, query, n_topics, n_words, title="PCA Export", fname="PCAExport"): vec = DictVectorizer() rows = topics_to_vectorspace(self.model, n_topics, n_words) X = vec.fit_transform(rows) pca = skPCA(n_components=2) X_pca = match = [] for i in range(n_topics): topic = [t[1] for t in self.model.show_topic(i, len(self.dictionary.keys()))] m = None for word in topic: if word in query: match.append(word) break pyplot.figure() for i in range(X_pca.shape[0]): pyplot.scatter(X_pca[i, 0], X_pca[i, 1], alpha=.5) pyplot.text(X_pca[i, 0], X_pca[i, 1], s=' '.join([str(i), match[i]])) pyplot.title(title) pyplot.savefig(fname) pyplot.close()
def scatter(frame, var1, var2, var3=None, reg=False, **args): import as cm if type(frame) is copper.Dataset: frame = frame.frame x = frame[var1] y = frame[var2] if var3 is None: plt.scatter(x.values, y.values, **args) else: options = list(set(frame[var3])) for i, option in enumerate(options): f = frame[frame[var3] == option] x = f[var1] y = f[var2] c = cm.jet(i/len(options),1) plt.scatter(x, y, c=c, label=option, **args) plt.legend() if reg: slope, intercept, r_value, p_value, std_err = stats.linregress(x,y) line = slope * x + intercept # regression line plt.plot(x, line, c='r') plt.xlabel(var1) plt.ylabel(var2)
def kmeans(points, k): centroids = random.sample(points, k) allColors = list(colors.cnames.keys()) iterations = 0 oldCentroids = None while not shouldStop(oldCentroids, centroids, iterations): oldCentroids = centroids iterations += 1 #we need numpy arrays to do some cool linalg stuff points = np.array(points) centroids = np.array(centroids) labels = getLabels(points, centroids) centroids = getCentroids(points, labels, k) #plotting centroids as a red star x, y = zip(*centroids) plt.scatter(x,y, marker = '*', color = 'r', s = 80) #life is a coloring book so lets put colors on stuff counter = 0 for centroid in labels.keys(): for point in labels[centroid]: plt.scatter(point[0], point[1], color = allColors[counter]) #6 was chosen to avoid white, white is apparantly some multiple of 5 counter += 6 print (iterations) return centroids
def plot_contour_with_labels(contour, frame_index=0): """ Makes a beautiful plot with all the points labeled. Parameters: One frame's worth of a contour """ contour_x = contour[:, 0, frame_index] contour_y = contour[:, 1, frame_index] plt.plot(contour_x, contour_y, 'r', lw=3) plt.scatter(contour_x, contour_y, s=35) labels = list(str(l) for l in range(0, len(contour_x))) for label_index, (label, x, y), in enumerate( zip(labels, contour_x, contour_y)): # Orient the label for the first half of the points in one direction # and the other half in the other if label_index <= len(contour_x) // 2 - \ 1: # Minus one since indexing xytext = (20, -20) # is 0-based else: xytext = (-20, 20) plt.annotate( label, xy=( x, y), xytext=xytext, textcoords='offset points', ha='right', va='bottom', bbox=dict( boxstyle='round,pad=0.5', fc='yellow', alpha=0.5), arrowprops=dict( arrowstyle='->', connectionstyle='arc3,rad=0')) # , xytext=(0,0))
def visualizeEigenvalues(eVal, verboseLevel): real = [] imag = [] for z in eVal: rp = z.real im = z.imag if not (rp == np.inf or rp == - np.inf) \ and not (im == np.inf or im == - np.inf): real.append(rp) imag.append(im) if verboseLevel>=1: print("length of regular real values=" + str(len(real))) print("length of regular imag values=" + str(len(imag))) print("minimal real part=" + str(min(real)), "& maximal real part=" + str(max(real))) print("minimal imag part=" + str(min(imag)), "& maximal imag part=" + str(max(imag))) if verboseLevel==2: print("all real values:", str(real)) print("all imag values:", str(imag)) # plt.scatter(real[4:],img[4:]) plt.scatter(real, imag) plt.grid(True) plt.xlabel("realpart") plt.ylabel("imagpart") plt.xlim(-10, 10) plt.ylim(-10, 10)
def plotscatterdate(x,y): plt.scatter(x,y) plt.xlim(0,) plt.xlabel('Number of Railways') plt.ylabel('Price in Pounds') plt.title('Scatter of Price against Number of Railways')
def plot_convergence(): data = np.loadtxt("smooth-error.out") nx = data[:,0] aerr = data[:,1] ax = plt.subplot(111) ax.set_xscale('log') ax.set_yscale('log') plt.scatter(nx, aerr, marker="x", color="r") plt.plot(nx, aerr[0]*(nx[0]/nx)**2, "--", color="k") plt.xlabel("number of zones") plt.ylabel("L2 norm of abs error") plt.title(r"convergence for smooth advection problem", fontsize=11) f = plt.gcf() f.set_size_inches(5.0,5.0) plt.xlim(8,256) plt.savefig("smooth_converge.eps", bbox_inches="tight")
# scar_loc = np.append(scar_loc,find_nearest_state(4.2,-4.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(5.2,-5.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1,-0.7,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1,-1.2,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1.5,-1.6,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-1.5,-1.9,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-2.4,-2.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-2.4,-2.3,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-3.2,-2.5,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-4.2,-3.5,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-4.2,-4.0,e,overlap)) # scar_loc = np.append(scar_loc,find_nearest_state(-5.2,-5.0,e,overlap)) # scar_loc = np.sort(scar_loc.astype(int)) plt.scatter(e, overlap) for n in range(0, np.size(scar_loc, axis=0)): plt.scatter(e[scar_loc[n]], overlap[scar_loc[n]], s=200, color="red", alpha=0.6) for n in range(0, np.size(subband_loc, axis=0)): plt.scatter(e[subband_loc[n]], overlap[subband_loc[n]], s=200, color="cyan", alpha=0.6)
k1 = pd.DataFrame(df.iloc[:, 0:1]) k2 = pd.DataFrame(df.iloc[:, 1:2]) k3 = pd.DataFrame(df.iloc[:, 2:3]) diff = pd.DataFrame(df.iloc[:, 4:5]) xinv = pd.DataFrame(np.linalg.pinv(x.values), x.columns, x.index) theta = pd.DataFrame(, y)) print(theta) output = pd.DataFrame(, theta)) print(np.sqrt(metrics.mean_squared_error(y, output))) plt.scatter(diff, y) plt.xlabel('difficulty') plt.ylabel('average marks') #plt.plot(diff,output,'r') plt.scatter(diff, output) #getting the python output to an html page sum = 0 while (1): print('enter the mark distribution') ip = [] for i in range(3):
test = data[~mask] # import Linear Regression model from sklearn import linear_model regr = linear_model.LinearRegression() # convert the column (list) into array data structure train_x = np.asanyarray(train[["ENGINE_SIZE"]]) # almost same as np.array train_y = np.asanyarray(train[["CO2"]]) # fit into the module => Output: regr.coefficient, regr.interception, train_y) # remember the basic linear formel: y = (coefficient)*x + (interception) !!!!!! print('Coefficients: ', regr.coef_) print('Intercept: ', regr.intercept_) # Plot outputs !!!!!!!!!!!! plt.scatter(train.ENGINE_SIZE, train.CO2, color='blue') plt.plot(train_x, regr.coef_[0][0] * train_x + regr.intercept_[0], '-r') # !!!!!!!!!!!!!!!!!!!! plt.xlabel("ENGINE_SIZE") plt.ylabel("Emission") ############ Evaluation #################3 from sklearn.metrics import r2_score test_x = np.asanyarray(test[['ENGINE_SIZE']]) test_y = np.asanyarray(test[['CO2']]) test_y_predict = regr.predict(test_x) print("Mean absolute error: {:.2f}".format( np.mean(np.absolute(test_y_predict - test_y))))
from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() sc_y = StandardScaler() X = sc_X.fit_transform(X) y = sc_y.fit_transform(y) # Fitting SVR to the dataset from sklearn.svm import SVR regressor = SVR(kernel='rbf'), y) # Predicting a new result y_pred = sc_y.inverse_transform( regressor.predict(sc_X.transform(np.array([[6.5]])))) # Visualising the Regression results plt.scatter(X, y, color='red') plt.plot(X, regressor.predict(X), color='blue') plt.title('Truth or Bluff SVR Model') plt.xlabel('Position level') plt.ylabel('Salary') # Visualising the Regression results (for higher resolution and smoother curve) X_grid = np.arange(min(X), max(X), 0.1) X_grid = X_grid.reshape((len(X_grid), 1)) plt.scatter(X, y, color='red') plt.plot(X_grid, regressor.predict(X_grid), color='blue') plt.title('Truth or Bluff (Regression Model)') plt.xlabel('Position level') plt.ylabel('Salary')
for i in range(1, 11): kmeans = KMeans(n_clusters = i, init = 'k-means++', random_state = 42) wcss.append(kmeans.inertia_) plt.plot(range(1, 11), wcss) plt.title('The Elbow Method') plt.xlabel('Number of clusters') plt.ylabel('WCSS') # Fitting K-Means to the dataset kmeans = KMeans(n_clusters = 5, init = 'k-means++', random_state = 42) y_kmeans = kmeans.fit_predict(X) # Visualising the clusters plt.scatter(X[y_kmeans == 0, 0], X[y_kmeans == 0, 1], s = 100, c = 'red', label = 'Cluster 1') plt.scatter(X[y_kmeans == 1, 0], X[y_kmeans == 1, 1], s = 100, c = 'blue', label = 'Cluster 2') plt.scatter(X[y_kmeans == 2, 0], X[y_kmeans == 2, 1], s = 100, c = 'green', label = 'Cluster 3') plt.scatter(X[y_kmeans == 3, 0], X[y_kmeans == 3, 1], s = 100, c = 'cyan', label = 'Cluster 4') plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5') plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids') plt.title('data_scientist_v2') plt.xlabel('Applications') plt.ylabel('Selected applicants') plt.legend()
x_show = np.stack((x1.flat, x2.flat), axis=1) # 测试点 print(x_show.shape) cm_light = mpl.colors.ListedColormap(['#A0FFA0', '#FFA0A0', '#A0A0FF']) cm_dark = mpl.colors.ListedColormap(['g', 'r', 'b']) y_show_hat = model.predict(x_show) # 预测值 # print( y_show_hat.shape) # print(y_show_hat) y_show_hat = y_show_hat.reshape(x1.shape) # 使之与输入的形状相同 print(y_show_hat) plt.figure(1, figsize=(10, 4), facecolor='w') plt.subplot(1, 2, 1) # 1行2列的第一张子图 plt.pcolormesh(x1, x2, y_show_hat, cmap=cm_light) # 预测值的显示 plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test.ravel(), edgecolors='k', s=150, zorder=10, cmap=cm_dark, marker='*') # 测试数据 plt.scatter(x[:, 0], x[:, 1], c=y.ravel(), edgecolors='k', s=40, cmap=cm_dark) # 全部数据 plt.xlabel('sepal length', fontsize=15) plt.ylabel('sepal width', fontsize=15) plt.xlim(x1_min, x1_max) plt.ylim(x2_min, x2_max) plt.grid(True)
plt.plot(errors[0], label=kernel_name) plt.legend(loc='best') plt.xlabel('Days to predict') plt.ylabel('Relative absolute error') plt.title('Errors as a function of time') # %% # Our conclusion from the above is that the shape of the error does not # depend much on the kernel # %% # We now plot the error after 4 days as a function of kernel params plt.figure() error, start, middle = zip(*errors_by_kernel.values()) plt.scatter(start, middle, np.array(error)[:, 1]) plt.scatter(start, middle, s=300 * np.array(error)[:, 1], c=np.array(error)[:, 3], marker='o') plt.colorbar() plt.xlabel('start parameter') plt.ylabel('middle parameter') plt.title('Errors as a function of ramp kernel parameter') # %% # These results tell us that we want a ramp with a length of 10 and # ramping all the way # %%
sin_default.append(float(splitted[1])) sin_quirez.append(float(splitted[2])) sin_ff.append(float(splitted[3])) sin_quirez_ff.append(float(splitted[4])) sin_float.append(float(splitted[5])) sin_mpfr.append(float(splitted[6])) sin_default_abs.append(abs(float(splitted[1]) - float(splitted[6]))) sin_quirez_abs.append(abs(float(splitted[2]) - float(splitted[6]))) sin_ff_abs.append(abs(float(splitted[3]) - float(splitted[6]))) sin_quirez_ff_abs.append(abs(float(splitted[4]) - float(splitted[6]))) sin_float_abs.append(abs(float(splitted[5]) - float(splitted[6]))) plt.rcParams.update({'font.size': 9}) plt.scatter(theta[beginIndex:endIndex], sin_mpfr[beginIndex:endIndex], color='xkcd:grey', marker = "o", label="Real", s = 70) plt.scatter(theta[beginIndex:endIndex], sin_quirez_ff[beginIndex:endIndex], color='k', marker = "*", label="Our CORDIC (posit)") plt.scatter(theta[beginIndex:endIndex], sin_float[beginIndex:endIndex], color='b', marker = "x", label="Naive CORDIC (float)") plt.ylim(2.55 * pow(10, -6), 4 * pow(10, -6)) plt.xlim(1.57079245, 1.57079365) plt.xticks([1.5707925, 1.5707930, 1.5707935]) f = mtick.ScalarFormatter(useOffset=False, useMathText=True) g = lambda x,pos : "${}$".format(f._formatSciNotation('%1.10e' % x)) #h = lambda x,pos : "$2^{{-{}}}$".format('%.f' % x) h = lambda x,pos : "${}$".format(f._formatSciNotation('%1.10e' % x)) plt.gca().yaxis.set_major_formatter(mtick.FuncFormatter(g)) plt.gca().xaxis.set_major_formatter(mtick.FuncFormatter(h)) plt.legend() plt.xlabel(r"$\theta$")
stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1, stop=X_set[:, 1].max() + 1, step=0.01)) plt.contourf(X1, X2, classifier.predict(np.array([X1.ravel(), X2.ravel()]).T).reshape(X1.shape), alpha=0.75, cmap=ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)): plt.scatter(X_set[y_set == j, 0], X_set[y_set == j, 1], c=ListedColormap(('red', 'green'))(i), label=j) plt.title('Logistic Regression (Training set)') plt.xlabel('Age') plt.ylabel('Estimated Salary') plt.legend() # Visualising the Test set results from matplotlib.colors import ListedColormap X_set, y_set = X_test, y_test X1, X2 = np.meshgrid( np.arange(start=X_set[:, 0].min() - 1, stop=X_set[:, 0].max() + 1, step=0.01), np.arange(start=X_set[:, 1].min() - 1,
def plotting(points): print(points) plt.scatter(points[:,0], points[:,1])
x_test = x[split:] y = dataset[labels[1]].reshape(len(dataset[labels[1]]), 1) y_train = y[:split] y_test = y[split:] #Creating the linear regression model object reg = linear_model.LinearRegression() #Fitting the model on the training data, y_train) #Getting the coefficients that were produced for y = mx+b print "Coefficients:", reg.coef_ #Calculating the mse (Mean Squared Error) print "MSE:", np.mean((reg.predict(x_test) - y_test)**2) #Calculatig the variance score of the model print "Variance:", reg.score(x_test, y_test) #Plotting the output # Plot outputs plt.scatter(x_test, y_test, color='black') plt.plot(x_test, reg.predict(x_test), color='blue', linewidth=3) plt.xticks(()) plt.yticks(())
from matplotlib import pyplot as plt # Первый каноничный импорт import pandas as pd # Второй каноничный импорт для обработки DataSet'а'fivethirtyeight') # Назначаем стилистику визуализации data_set = pd.read_csv('Rap.csv') # Считываем данные SCV-файла с DataSet'ом bpm = data_set['bpm'] # Переменная для параметра BPM в каждой строке year = data_set['year'] # Переменная для параметра "год релиза" в каждой строке plt.scatter( # Построение точечного графика и его настройка bpm, year, c=bpm, s=bpm*1.5, cmap='gist_heat', edgecolor='black', linewidth=.7 ) bar = plt.colorbar( # Построение шкалы BPM orientation='horizontal', shrink=0.8, extend='both', extendfrac=.1 ) bar.set_label('Шкала ударов в минуту', fontsize=18) # Подпись шкалы plt.title('Популярность скорости ' # Заголовок графика 'исполнения в Rap\'е ', fontsize=25) plt.xlabel('BPM', fontsize=18) # Ось абсцисс
def optimize(self, x, y): self.optimizer.zero_grad() _, loss = self.forward_with_loss(x, y) loss.backward() self.optimizer.step() return loss # train model = Model() for epoch in range(100): for x, y in data_loader: loss = model.optimize(x, y) print("Epoch {:4d} | loss : {:f}".format(epoch, loss)) # feed mu, sigma = model(torch.tensor(x_space)) mu = mu.detach().numpy() sigma = sigma.detach().numpy() # visualize plt.scatter(train_x, train_y, marker='x', c='black', alpha=0.5, label='train data') plt.plot(x_space, mu, label='prediction mean') plt.fill_between(x_space, mu + sigma, mu - sigma, alpha=0.5, label='+-') plt.plot(x_space, true_y, label='true function') plt.legend() plt.ylim(-0.5, 1.3)
ma_X = rules.antecedents.apply(to_list) + rules.consequents.apply(to_list) ma_X = ma_X.apply(sorted) rules_sets = list(ma_X) unique = [list(m) for m in set(tuple(i) for i in rules_sets)] index_rules = [] for i in unique: index_rules.append(rules_sets.index(i)) rules_r = rules.iloc[ index_rules, :] # getting rules without any redudancy, 18 rules rules_r.sort_values('lift', ascending=False).head(10) # visualizing results plt.scatter(rules_r['support'], rules_r['confidence'], alpha=0.5) plt.xlabel('support') plt.ylabel('confidence') plt.title('Support vs Confidence') plt.scatter(rules_r['support'], rules_r['lift'], alpha=0.5) plt.xlabel('support') plt.ylabel('lift') plt.title('Support vs lift') fit = np.polyfit(rules_r['lift'], rules_r['confidence'], 1) # 1 denote degree of polynomial fit_fn = np.poly1d(fit) plt.plot(rules_r['lift'], rules_r['confidence'], 'rs', rules_r['lift'], fit_fn(rules_r['lift'])) plt.xlabel('lift')
stride = 1 XX = cases[0:cases.size - q - lag * dd:stride] for i in range(1,lag): X=cases[i*dd:cases.size - q - (lag - i) * dd:stride] M=mobility[i*dd:mobility.size - q - (lag - i) * dd:stride] XX=np.column_stack((XX,X,M)) yy = cases[lag*dd+q::stride]; tt = t[lag*dd+q::stride] model = Ridge(alpha=a, fit_intercept=False).fit(XX, yy) print(XX) print(model.intercept_, model.coef_) fakeMobility = np.array([-.4]*288).reshape(-1,1) ZZ = cases[0:cases.size - q - lag * dd:stride] for i in range(1,lag): X=cases[i*dd:cases.size - q - (lag - i) * dd:stride] M=fakeMobility[i*dd:fakeMobility.size - q - (lag - i) * dd:stride] ZZ=np.column_stack((ZZ,X,M)) yy = cases[lag*dd+q::stride]; tt = t[lag*dd+q::stride] model = Ridge(alpha=a, fit_intercept=False).fit(ZZ, yy) fakePred = model.predict(ZZ) plt.scatter(t, cases, color='black'); plt.scatter(tt, fakePred, color='blue') plt.plot(t, fakeMobility, color='r') plt.title('Mobility Set To -40%') plt.xlabel("time (days)"); plt.ylabel("#cases") plt.legend(["mobility", "training data","predictions"],loc='upper right')
def draw_Battery_Use(consumption, total_power, solar_power, wind_power, dic, configuration, max_power): # Creating mutiple text variables to display in the graph power_generated = total_power.sum() power = total_power total_power = np.mean(np.reshape(total_power[:8760], (365, 24)), axis=1) t1 = "Storage capacity: \nAmount of windturbines: \nCable area: \nMaximum Power Output: \nTotal Power Generated: \nTotal costs: " t2 = str(int(dic['total_storage'])) + " kWh\n" + \ str(int(configuration[-2])) + "\n" + \ str(int(dic['cable_area'])) + " mm²\n" + \ str(int(max_power)) + " kW\n" + \ str(int(power_generated)) + " kWh\n" +\ '€' + str(int(dic['cost'])) # Creating the solar stats text variables to display in the graph t3 = "" for I in range(4): if configuration[0 + I * 3] > 0: t3 = t3 + "SP" + str(I + 1) + " - Area: " + str(int(configuration[0 + I*3])) +\ "m² - Angle: " + str(int(configuration[1 + I*3])) +\ "° - Orientation: " + str(int(configuration[2 + I*3])) + "°\n" plt.subplot(2, 1, 1) plt.plot(total_power, color='green', alpha=0.5, label='Total energy production') plt.plot(solar_power, color='yellow', alpha=0.5, label='Solar energy') plt.plot(wind_power, color='blue', alpha=0.5, label='Wind energy') plt.plot(consumption, color='red', label='Energy demand') plt.text(330, total_power.max() * 1.04, t2, ha='left', va='top', style='italic', wrap=False) plt.text(330, total_power.max() * 1.04, t1, ha='right', va='top', wrap=False) plt.text(362, total_power.max() * 0.725, t3, ha='right', va='top', wrap=False) plt.legend(loc='upper center') plt.title("Power Average per Day") plt.xlabel('Days') plt.ylabel('kW') plt.xlim(0, 365) plt.subplot(2, 1, 2) power = power - 6000 for x in range(2): if x == 0: batterycharge = [int(dic['total_storage'])] else: batterycharge = [batterycharge[-1]] Powershortage = [] for I in power: batterycharge.append(batterycharge[-1] + I) if (int(dic['total_storage']) < batterycharge[-1]): batterycharge[-1] = int(dic['total_storage']) elif (0 > batterycharge[-1]): batterycharge[-1] = 0 Powershortage.append(len(batterycharge) - 1) plt.plot(batterycharge, color='green', alpha=0.5) if len(Powershortage) == 0: plt.scatter(np.zeros(len(Powershortage)), Powershortage, color='red') plt.title("Power supply level over a Year") plt.xlabel('Hour') plt.ylabel('kWh') plt.xlim(0, 8760)
def DataPlot(self, data, name_of_bed, fig=False, save_data=False): ''' Method to plot data args:: data: Data to be plotted name_of_bed: Bed to be plotted fig =False (To plot a shart without saving the plot) or Yes (to save the plot) save_data = False(This prevent the data from saving as a CSV file to save, pass True) ''' = data self.name_of_bed = name_of_bed self.fig = fig self.save_data = save_data assert isinstance( self.name_of_bed, str ), f"Name of Bed must be a String, but {type(self.name_of_bed)} was passed." if self.fig == True or self.save_data == False: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(['Phi_scale'],['Cummulative_Mass_Retained'], color='b') plt.scatter(['Phi_scale'],['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold') fig.savefig(f'{self.name_of_bed}.png', pdi=fig.dpi) elif self.fig == False or self.save_data == True: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(['Phi_scale'],['Cummulative_Mass_Retained'], color='b') plt.scatter(['Phi_scale'],['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold') data.to_csv(f'{self.name_of_bed}.csv', index=False) elif self.fig == True or self.save_data == True: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(['Phi_scale'],['Cummulative_Mass_Retained'], color='b') plt.scatter(['Phi_scale'],['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold') fig.savefig(f'{self.name_of_bed}.png', pdi=fig.dpi) data.to_csv(f'{self.name_of_bed}.csv', index=False) else: """Plots the Cuumulative Mass Retained vs the Phi Scale for easy picking of the percentiles""" fig, ax = plt.subplots(figsize=(10, 8)) plt.plot(['Phi_scale'],['Cummulative_Mass_Retained'], color='b') plt.scatter(['Phi_scale'],['Cummulative_Mass_Retained'], color='r') ax.xaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.yaxis.set_minor_locator(AutoMinorLocator(n=5)) ax.xaxis.set_minor_formatter(FormatStrFormatter("%.1f")) ax.yaxis.set_minor_formatter(FormatStrFormatter("%.0f")) plt.rc('grid', linestyle="-", color='black') ax.spines['bottom'].set_color('1.5') ax.spines['top'].set_color('1.5') ax.spines['right'].set_color('1.5') ax.spines['left'].set_color('1.5') plt.grid(True, which='minor', color='k', linestyle='-') plt.grid(True, which='major', color='r', linestyle='-') plt.xlabel('Grain Size (Phi)', fontsize=16) plt.ylabel('Cumulative Mass Retained (%)', fontsize=16) plt.title(f'Cummulative Frequncy Curve {self.name_of_bed}', fontsize=20, fontweight='bold')
y_train = np.array(y[:num_training]) # Test data X_test = np.array(X[num_training:]).reshape((num_test,1)) y_test = np.array(y[num_training:]) # Create linear regression object linear_regressor = linear_model.LinearRegression() # Train the model using the training sets, y_train) # Predict the train data output y_train_pred = linear_regressor.predict(X_train) plt.figure() plt.scatter(X_train, y_train, color='green') plt.plot(X_train, y_train_pred, color='black', linewidth=4) plt.title('Training data') # Predict the test data output y_test_pred = linear_regressor.predict(X_test) plt.figure() plt.scatter(X_test, y_test, color='green') plt.plot(X_test, y_test_pred, color='black', linewidth=4) plt.title('Test data') plt.xticks(()) plt.yticks(()) # Measure performance
data.shape data.dtypes data.describe() #find rows where the number of returns is 0 and drop these rows data[data['N1']==0].count() data.drop(data[data.N1 == 0].index, inplace=True) data.shape data[data.N1 == 0] #create refund column data['refund'] = data['A11902']/data['N11902'] #graphs plt.scatter(data['A85300'],data['refund']) plt.scatter(data['N11901'],data['refund']) '''#categorical variables without zipcode cat_var = df[['STATE','agi_stub']] cat_var = cat_var.astype(str) dummies = pd.get_dummies(cat_var) dummies.columns dummies.shape''' #create percentage columns df = data.copy() df['single'] = df['mars1']/df['N1'] df['joint'] = df['MARS2']/df['N1']
# Combine lat = np.hstack((np.array(y), np.array(y1)+lat_eur, np.array(y2)+lat_us)) lon = np.hstack((np.array(x), np.array(x1)+lon_eur, np.array(x2)+lon_us)) plt.figure(figsize=(10, 3.5)) labels = ["a", "b", "c"] nbins = 12 for _i in range(3): ax = plt.subplot(131 + _i, projection=AzimuthalEquidistant(0, 0)) ax.set_global() lwsspy.maps.plot_map() weights = lwsspy.geo.azi_weights(0, 0, lat, lon, nbins=nbins*(_i+1)) plt.scatter(lon, lat, c=weights, cmap='rainbow', norm=LogNorm(vmin=min(weights), vmax=max(weights)), transform=PlateCarree(), edgecolors='k', linewidth=0.25) formatter = ticker.FuncFormatter(lambda y, _: '{:g}'.format(y)) cb = lwsspy.plot.nice_colorbar(orientation='horizontal', ticks=[0.3, 0.4, 0.6, 1.0, 1.5, 2.0, 3.0], # np.arange(0.3, 3.0, 0.3), format=formatter, aspect=40, pad=0.05) lwsspy.plot.plot_label( ax, f"{labels[_i]})", location=6, box=False, dist=0.0) cb.set_label("Weights") plt.title(f"$N_b = {nbins*(_i+1)}$", fontdict=dict(fontsize='small')) lwsspy.plot.plot_label( ax, f"min: {np.min(weights):3.2f}\n" f"max: {np.max(weights):3.2f}\n" f"median: {np.median(weights):3.2f}\n", location=3, box=False, dist=-0.1, fontdict=dict(fontsize='small'))
for cluster in clusters: x = [point[0] for point in cluster] y = [point[1] for point in cluster] z = [point[2] for point in cluster] if col != len(clusters): plt.plot(x, y, z, marker = "o", c = "{}".format(colors[col]), ms = 3) else: plt.plot(x, y, z, marker = "o", c = "w", alpha = 0.3, ms = 2) col += 1 print(time.clock() - start) start = time.clock() fig = plt.figure() ax = plt.gca() ax.set_axis_bgcolor((0, 0, 0)) col = 1 for cluster in clusters: x = [point[0] for point in cluster] y = [point[1] for point in cluster] if col != len(clusters): plt.scatter(x, y, s = 10, c = "{}".format(colors[col])) else: plt.scatter(x, y, s = 5, c = "w", alpha = 0.15) col += 1 print(time.clock() - start)
# -------------- import matplotlib.pyplot as plt # code starts here X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=6) cols=X_train.columns fig,axes=plt.subplots(nrows=3,ncols=3,figsize=(15,10)) for i in range(3): for j in range(3): col=cols[i * 3 + j] plt.scatter(X_train[col],y_train) # code ends here # -------------- # Code starts here corr = X_train.corr() corr del X_train['play_star_rating'] del X_train['val_star_rating'] del X_test['play_star_rating'] del X_test['val_star_rating'] # Code ends here
import matplotlib.pyplot as plt from sklearn import datasets def pca(data, base_num = 1): n, d = data.shape #n:データ数 d:次元数 n>dじゃないとダメです。 data_mean = data.mean(0) data_norm = data - data_mean cov =, data_norm) / float(n) w, vl = spla.eig(cov) index = w.argsort()[-min(base_num, d) :] t = vl[:, index[:: -1]].T return t if __name__ == "__main__": data = np.random.multivariate_normal([0, 0], [[1, 2], [3, 4]], 100) iris = datasets.load_iris() print[:, :4] data =[:, :2] base = pca(data) #data =,base) #ここから可視化 plt.scatter(data[:, 0], data[:, 1]) leng = (data.max()-data.min())/2 pc_line = np.array([-leng, leng]) * (base[0][1] / base[0][0]) plt.plot([-leng, leng], pc_line, "r")
def visualize_points(x, y, alpha=1., colors=('red', 'green')): c = [] for i in y: c.append(colors[i]) plt.scatter(x[:, 0], x[:, 1], color=c, alpha=alpha) plt.grid(True)
def finalize(self, population, engine): best_indv = population.best_indv( x = best_indv.solution y = engine.ori_fmax msg = 'Optimal solution: ({}, {})'.format(x, y) if '__main__' == __name__: # Run the GA engine and print every generation best_indv = engine.population.best_indv( print('Max({0},{1})'.format(best_indv.solution[0], x = np.linspace(0, 15, 10000) y = [-3 * (i - 30)**2 * math.sin(i) for i in x] plt.plot(x, y) plt.xlabel('x') plt.ylabel('y') plt.title('function') plt.axis([-1, 16, -3000, 3000]) plt.scatter(best_indv.solution[0],, color='r') a = round(best_indv.solution[0], 4) b = round(, 4) plt.annotate('Max(' + str(a) + ',' + str(b) + ')', xy=(best_indv.solution[0],, xytext=(7, 2500), arrowprops=dict(facecolor='black', shrink=0.1, width=2))
def show(): d = pd.read_excel("./one_hot.xlsx", sheetname='123') d = np.array(d) scaler = preprocessing.StandardScaler().fit(d) d = scaler.transform(d) #以下是将聚类结果可视化出来 #PCA(n_components=2)表示将4个特征的向量降维到二维,即可以画在平面 pca_model = PCA(n_components=2) #将iris.data转换成标准形式,然后存入reduced_data中 reduced_data = pca_model.fit_transform(d) iters, centers, assignments = k_means_cluster(reduced_data, 8) print(centers, assignments) assignments1 = pd.DataFrame(assignments) print(assignments1) assignments1.to_excel("./one_hot1.xlsx", sheet_name="234", index=False, header=True) #h表示间距 h = .2 #下面求x_min, x_max和y_min, y_max,主要是为了确定坐标轴 x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1 y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) xx_pt = list(xx.ravel()) yy_pt = list(yy.ravel()) xy_pts = np.array([[x, y] for x, y in zip(xx_pt, yy_pt)]) mytree = cKDTree(centers) dist, indexes = mytree.query(xy_pts) indexes = indexes.reshape(xx.shape) #下面使用matplotlib将图给画出来 plt.clf() plt.imshow(indexes, interpolation='nearest', extent=(xx.min(), xx.max(), yy.min(), yy.max()),, aspect='auto', origin='lower') symbols = ['o', '^', 'D', 's', '.', ',', '<', '*'] #sym=[sysmbols[i] for i in assignments] for i in range(8): x = [] y = [] for j in range(assignments.shape[0]): if assignments[j] == i: x.append(reduced_data[j][0]) y.append(reduced_data[i][1]) plt.plot(x, y, symbols[i], markersize=10) """ temp_group = reduced_data[(i*50) : (50)*(i+1)] plt.plot(temp_group[:, 0], temp_group[:, 1], symbols[i], markersize=10) """ plt.scatter(centers[:, 0], centers[:, 1], marker='x', color='black', s=169, linewidths=3, zorder=10) plt.title('K-means clustering') plt.xlim(x_min, x_max) plt.ylim(y_min, y_max) plt.savefig('./whfypca.png')
pca = PCA(n_components=2) X_r = lda = LinearDiscriminantAnalysis(n_components=2) X_r2 =, y).transform(X) # Percentage of variance explained for each components print('explained variance ratio (first two components): %s' % str(pca.explained_variance_ratio_)) plt.figure() colors = ['navy', 'turquoise', 'darkorange'] lw = 2 for color, i, target_name in zip(colors, [0, 1, 2], target_names): plt.scatter(X_r[y == i, 0], X_r[y == i, 1], color=color, alpha=.8, lw=lw, label=target_name) plt.legend(loc='best', shadow=False, scatterpoints=1) plt.title('PCA of IRIS dataset') plt.figure() for color, i, target_name in zip(colors, [0, 1, 2], target_names): plt.scatter(X_r2[y == i, 0], X_r2[y == i, 1], alpha=.8, color=color, label=target_name) plt.legend(loc='best', shadow=False, scatterpoints=1) plt.title('LDA of IRIS dataset') # import numpy as np import matplotlib.pyplot as plt from sklearn.decomposition import PCA, KernelPCA
#!/usr/bin/env python # -*- noplot -*- """ This example demonstrates how to set a hyperlinks on various kinds of elements. This currently only works with the SVG backend. """ import numpy as np import as cm import matplotlib.mlab as mlab import matplotlib.pyplot as plt f = plt.figure() s = plt.scatter([1, 2, 3], [4, 5, 6]) s.set_urls(['', '', None]) f.canvas.print_figure('scatter.svg') f = plt.figure() delta = 0.025 x = y = np.arange(-3.0, 3.0, delta) X, Y = np.meshgrid(x, y) Z1 = mlab.bivariate_normal(X, Y, 1.0, 1.0, 0.0, 0.0) Z2 = mlab.bivariate_normal(X, Y, 1.5, 0.5, 1, 1) Z = Z2 - Z1 # difference of Gaussians im = plt.imshow(Z, interpolation='bilinear', cmap=cm.gray, origin='lower', extent=[-3, 3, -3, 3])
# !/usr/bin/python3' import numpy as np from matplotlib.pyplot import scatter from matplotlib.pyplot import show a = np.loadtxt('magic04.txt', delimiter=',', usecols=(0, 1, 2, 3, 4, 5, 6, 7, 8, 9)) b = a[:, 0] # 取出属性一 c = a[:, 1] # 取出属性二 covariance = np.cov(b, c) corrc = np.corrcoef(b, c) # 计算相关系数矩阵 print(corrc) scatter(b, c, 20, b, ".") # 绘制散点图 show()
from sklearn import linear_model reg = linear_model.LinearRegression(), target_train) # print("Slope:", reg.coef_) # print("Intercept:", reg.intercept_) pred = reg.predict(feature_test) # Explained variance score: 1 is perfect prediction from sklearn.metrics import r2_score print('Variance score: %.3f' % r2_score(target_test, pred)) ### draw the scatterplot, with color-coded training and testing points import matplotlib.pyplot as plt for feature, target in zip(feature_test, target_test): plt.scatter(feature, target, color=test_color) for feature, target in zip(feature_train, target_train): plt.scatter(feature, target, color=train_color) ### labels for the legend plt.scatter(feature_test[0], target_test[0], color=test_color, label="test") plt.scatter(feature_test[0], target_test[0], color=train_color, label="train") ### draw the regression line, once it's coded try: plt.plot(feature_test, reg.predict(feature_test)) except NameError: pass, target_test) plt.plot(feature_train, reg.predict(feature_train), color="g")