def plotPC(PC1, PC2, labelList): """Plots a scatter plot of the any 2 specified dimensions after running PCA.""" pc1 = [[], [], [], [], [], [], [], [], [], []] pc2 = [[], [], [], [], [], [], [], [], [], []] for l in range(len(labelList)): # l returns a number within a numpy array actualNum = labelList[l][0] pc1[actualNum].append(PC1[l]) pc2[actualNum].append(PC2[l]) fig = plt.figure() ax = fig.add_subplot(111) colorList = [ "red", "green", "blue", "black", "gray", "yellow", "cyan", "magenta", "burlywood", "purple" ] for count in range(10): plt.scatter(pc1[count], pc2[count], c=colorList[count], lw=0, label=str(count)) plt.legend(scatterpoints=1) ax.set_xlabel("PC1") ax.set_ylabel("PC2") fig.savefig("2D_10MNistGraph.png") plt.close()
def plot_comparison(self, x_data, x_variable, y1_data, y1_variable, y2_data, y2_variable): import matplotlib as plt x, r1, r2, diff, combined, colors = self.compare_results( x_data,x_variable, y1_data, y1_variable, y2_data, y2_variable) plt.scatter(x, r1, c=colors)
def plot_decision_regions(X, y, classifier, resolution=0.02): # prepare marker and color map markers = ('s', 'x', 'o', '^', 'v') colors = ('red', 'blue', 'lightgreen', 'gray', 'cyan') cmap = ListedColormap(colors[:len(np.unique(y))]) # plot decision regions x1_min, x1_max = X[:, 0].min() - 1, X[:, 0].max() + 1 x2_min, x2_max = X[:, 1].min() - 1, X[:, 1].max() + 1 # generate grid point xx1, xx2 = np.meshgrid(np.arange(x1_min, x1_max, resolution), np.arange(x2_min, x2_max, resolution)) # translate features into array and predict Z = classifier.predict(np.array([xx1.ravel(), xx2.ravel()]).T) # translate result to grid point Z = Z.reshape(xx1.shape) # plot contour line of grid point plt.contourf(xx1, xx2, Z, alpha=0.4, cmap=cmap) plt.xlim(xx1.min(), xx1.max()) plt.ylim(xx2.min(), xx2.max()) # plot sample by each class for idx, cl in enumerate(np.unique(y)): plt.scatter(x=X[y == cl, 0], y=X[y == cl, 1], alpha=0.8, c=cmap(idx), marker=markers[idx], label=cl)
def plot_boundary(model, x, y, **kwargs): assert (x.shape[-1] == 2) cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) if 'h' in kwargs: h = kwargs['h'] else: h = 0.1 x_min, x_max = x[:, 0].min() - 1, x[:, 0].max() + 1 y_min, y_max = x[:, 1].min() - 1, x[:, 1].max() + 1 x_grid, y_grid = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) Z = model.predict(np.c_[x_grid.ravel(), y_grid.ravel()]) # Put the result into a color plot Z = Z.reshape(x_grid.shape) plt.figure() plt.pcolormesh(x_grid, y_grid, Z, cmap=cmap_light) # Plot also the training points plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=20) plt.xlim(x_grid.min(), x_grid.max()) plt.ylim(y_grid.min(), y_grid.max()) if 'title' in kwargs: plt.suptitle(kwargs['title']) if 'accuracy' in kwargs: plt.title("Accuracy: %.1f%%" % (kwargs['accuracy'] * 100), fontsize=10) plt.show()
def evaluate_prediction(): #Accuracy measure sklearn.metrics.stocker.confusion_matrix( Stockname.Startdate('2017-03-27'), Stockname.Startdate('2018-03-27')) Startdate = random.StockName.date() Enddate = random.StockName.date(after=Startdate) plt.scatter(dates, prices, color='black', label='Data') plt.plot(StockName.dates, StockName.predict(dates), color='black', label='Observations') plt.plot(StockName.dates, StockName.predict(dates), color='black', label='Observations') plt.plot(StockName.dates, StockName.predict(dates), color='blue', label='Predicted', attr='bold') plt.plot(StockName.dates, StockName.predict(dates), color='yellow', label='Confidence Interval', attr='block') plt.plot(StockName.midpoint.random(dates), label='Prediction Start', color='red', attr='dotted') plt.xlabel('Date') plt.ylabel('Price $') plt.title(StockName, 'Model Evaluation from ', Startdate, 'to ', Enddate)
def plot_scatter(self, dataframe, x, y, title, xlable, ylable, kurs): plt.scatter(x, y) #plt.title(title) plt.xlabel(xlable) plt.ylabel(ylable) plt.savefig('./PDFcreater/Plots/{}/{}.png'.format(kurs, title)) plt.show()
def plotinter(self, pts=1000): t0 = time.time() dt = 1 / pts t = arange(self.pts[0], self.pts[-1] + dt, dt) plt.plot(self.x(t), self.y(t)) plt.scatter(self.interx, self.intery, color="red") print(time.time() - t0)
def __init__(self, x, y): self.x = x self.y = y n = len(x) # cantidad de datos xSum = self.suma(x) ySum = self.suma(y) xySum = self.columnaXy(x, y, n) xCuadrado, sumXCuadrado = self.columnaCuadrado(x, n) xMedia = self.media(xSum, n) yMedia = self.media(ySum, n) num = xySum * n - (xSum + ySum) den = n * sumXCuadrado - (xSum**2) m = num / den b = yMedia - (m * xMedia) #graficar x1 = np.linspace(min(x) - 1, max(x) + 1) linea = m * x1 + b plt.plot(x1, linea) plt.scatter(x, y) plt.xlabel('x') plt.ylabel('y') plt.grid(True) plt.show()
def tsne_plot(model): "Creates and TSNE model and plots it" labels = [] tokens = [] total_size = len(model.wv.vocab) probability = 200.0 / total_size # r = for word in model.wv.vocab: tokens.append(model[word]) labels.append(word) tsne_model = TSNE(perplexity=40, n_components=2, init='pca', n_iter=2500, random_state=23) new_values = tsne_model.fit_transform(tokens) x = [] y = [] for value in new_values: x.append(value[0]) y.append(value[1]) plt.figure(figsize=(16, 16)) for i in range(len(x)): plt.scatter(x[i], y[i]) plt.annotate(labels[i], xy=(x[i], y[i]), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') plt.show()
def graph(): #Sets some values. x1 = datlo['ligand_rms_no_super_X'] y1 = datlo['interface_delta_X'] x2 = dathi['ligand_rms_no_super_X'] y2 = dathi['interface_delta_X'] #Calls actual max values for ligand_rms_no_super_X and interface_delta_X maxrmsd = data['ligand_rms_no_super_X'].max() minrmsd = data['ligand_rms_no_super_X'].min() maxint = data['interface_delta_X'].max() minint = data['interface_delta_X'].min() #Following lines define everything about the actual figure plt.figure(figsize=[16,9]) plt.xlim(xmin = minrmsd, xmax = maxrmsd) plt.ylim(ymin = minint, ymax = maxint) plot1 = plt.scatter(x1,y1, s=4, c='Blue', marker='o') plot2 = plt.scatter(x2,y2, s=4, c='Red', marker='o') plt.tick_params(axis='both',direction='inout',width=1,length=6,labelsize=13,pad=4) plt.title('interface_delta_x vs ligand_rms_no_super_X', size=16) plt.xlabel("ligand_rms_no_super_X", fontsize=13) plt.ylabel("interface_delta_X", fontsize=13) plt.legend(['total_score <= average', 'total_score > average'], markerscale=5, fontsize=12) #Prompts user to decide on whether to export png file printfile() #Displays plot plt.show()
def Exchange_rates(Base, Destination): data =requests.get('https://api.cryptonator.com/api/full/{}-{}'.format(Base, Destination)) Data = (data.json()['ticker']['markets']) market = [] price = [] volume = [] for A in range(len(Data)): market.append(Data[A]['market']) price.append(Data[A]['price']) volume.append(Data[A]['volume']) Difference = float(max(price))-float(min(price)) print("Minimum Price is \t",max(price)," \tat ", market[price.index(max(price))], " \nMaximum Price is \t",min(price)+" \tat ", market[price.index(min(price))], "\ndifference \t is\t", Difference,Destination) numbers = (0, len(market)) plt.scatter(numbers, market, color='red') for i, txt in enumerate(price): plt.annotate(txt, (numbers[i], market[i])) plt.title('ARBITRAGE') plt.ylabel('#PRICE') plt.show()
def volumeScatter(df): # scatter plot shows ranges of music volumes at different stress levels plt.figure() plt.scatter(df["Stress"], df["DB"]) plt.xlabel('Stress levels') plt.ylabel('Music volume') plt.show()
def predict_prices(dates, prices, x): dates = np.reshape(dates, (len(dates), 1)) svr_lin = SVR(kernel='linear', C=1e3) svr_poly = SVR(kernel='poly', C=1e3, degree=2) svr_rbf = SVR(kernel='rbf', C=1e3, gamma=0.1) svr_lin.fit(dates, prices) svr_poly.fit(dates, prices) svr_rbf.fit(dates, prices) plt.scatter(dates, prices, color='black', label='Data') plt.plot(dates, svr_rbf.predict(dates), color='red', label='RBF model') plt.plot(dates, svr_lin.predict(dates), color='green', label='Linear model') plt.plot(dates, svr_poly.predict(dates), color='blue', label='Polynomial model') plt.xlabel('Date') plt.ylabel('Price') plt.title('Support Vector Regression') plt.legend() plt.show() return svr_rbf.predict(x)[0], svr_lin.predict(x)[0], svr_poly.predict(x)[0]
def scatter_plot(P, L, pcIdx1, pcIdx2, letterList, rev): fig = plt.figure() # following the convention in lecture note ScatterPlot.html colors = ["r", "lime", "b", "y", "c", "m", "k", "tan", "pink", "darkred"] for i, letter in enumerate(letterList): plt.scatter(P[L == letter, pcIdx2], P[L == letter, pcIdx1], s=0.1, c=colors[i], label=letter) plt.axes().set_aspect('equal') #plt.axes().set_aspect('equal', 'datalim') plt.xlabel("Principle Component {}".format(pcIdx2)) plt.ylabel("Principle Component {}".format(pcIdx1)) plt.axhline(0, color='grey') plt.axvline(0, color='grey') plt.ylim([-5000, 5000]) plt.xlim([-5000, 5000]) plt.legend() plt.gca().invert_yaxis() fig.set_size_inches(8, 8) fName = os.path.join( pDir, 'scatter_PC{}_PC{}_{}_{}.png'.format(pcIdx1, pcIdx2, "".join(letterList), rev)) savefig(fName, bbox_inches='tight') plt.show()
def show_graph(x_list, y_list, width, height): """ x_list, y_list = x- & y-coordinates to plot width, height = size of plot """ plt.figure(figsize=[width, height]) # [width, height] plt.scatter(x_list, y_list, marker='.', s=5) plt.show() return
def tsne_plot(embedding, expression_value, cmaps="PuRd"): plt.scatter(embedding[:, 0], embedding[:, 1], lw=0.1, c=expression_value, cmap=plt.cm.get_cmap('PuRd')) plt.colorbar(label='expression value') plt.show()
def plot_line(x, y, y_hat, line_color='blue'): # Plot outputs plt.scatter(x, y, color='black') plt.plot(x, y_hat, color=line_color, linewidth=3) plt.xticks(()) plt.yticks(()) plt.show()
def plotPredictions(yActualTrain, yActualVal, yPredTrain, yPredVal): ''' Plot both train and validation predictions ''' plt.figure(figsize=(6, 3)) plt.subplot(131) plt.scatter(yActualTrain, yPredTrain, s=1) plt.subplot(132) plt.scatter(yActualVal,yPredVal, s=1) plt.show()
def plot_data(X, y, theta=np.array([])): """ Plot student admission data on a graph """ # Set y and x axis labels for scatter plot plt.ylabel('Exam 2 score') plt.xlabel('Exam 1 score') admitted = np.where(y == 1)[0] not_admitted = np.where(y == 0)[0] # Plot all admitted students plt.scatter(X[admitted, :1], X[admitted, 1:], marker='+', label='Admitted', c='black') # Plot all non-admitted students plt.scatter(X[not_admitted, :1], X[not_admitted, 1:], marker='o', label='Not admitted', c='yellow', edgecolors='black') # Set legend for scatter plot plt.legend(loc='upper right', fontsize=8) # Show best fit line if theta.size != 0: if theta.size <= 3: x_coords = np.array([np.min(X[:, 1]), np.max(X[:, 1])]) y_coords = (-1 / theta[2]) * (theta[0] + theta[1] * x_coords) plt.plot(x_coords, y_coords, 'b-', label='Decision boundary') else: # Here is the grid range u = np.linspace(-1, 1.5, 50) v = np.linspace(-1, 1.5, 50) z = np.zeros((u.size, v.size)) # Evaluate z = theta*x over the grid for i, ui in enumerate(u): for j, vj in enumerate(v): z[i, j] = np.dot(mapFeature(ui, vj), theta) z = z.T # important to transpose z before calling contour # print(z) # Plot z = 0 pyplot.contour(u, v, z, levels=[0], linewidths=2, colors='g') pyplot.contourf(u, v, z, levels=[np.min(z), 0, np.max(z)], cmap='Greens', alpha=0.4) plt.show()
def pca_plot(self): label = np.unique(self.label) with plt.style.context("seaborn-darkgrid"): for l in label: plt.scatter(self.Y[y == l, 0], self.Y[y == l, 1], label=l) plt.xlabel("PC 1") plt.ylabel("PC 2") plt.legend() plt.show()
def scatter_chart(plt, col1, col2, Title="Scatter Plot"): color = ['r'] results = linregress(col1,col2) print results plt.scatter(col1,col2) plt.plot(col1, col1*results[0] + results[1]) plt.ylabel(col2.name) plt.xlabel(col1.name) plt.title(Title)
def makePlot(muDistr, AiArray, Ai, num): #plt.scatter(muDistr,AiArray) if (num == 17): plt.scatter(muDistr, AiArray) title = "P(alpha|x,I) " plt.ylabel("Normalized Probability") plt.xlabel('x') plt.title(title) plt.show()
def plotPredictions(clf): xx, yy = np.meshgrid(np.arange(0, 250000, 10), np.arange(10, 70, 0.5)) Z = clf.predict(np.c_[xx.ravel(), yy.ravel()]) plt.figure(figsize=(8, 6)) Z = Z.reshape(xx.shape) plt.contourf(xx, yy, Z, cmap=plt.cm.Paired, alpha=0.8) plt.scatter(X[:, 0], X[:, 1], c=y.astype(np.float)) plt.show()
def main(): #Before anything happens, number of command-line arguments is checked and appropriate action taken. argumnumber() if (len(sys.argv) < 2): file = raw_input("Please provide a sorted_models file: ") else: file = sys.argv[1] #Prompts user to provide maxtot and maxinter before progam continues. global maxtot maxtot = float(raw_input("Please enter maximum total_score: ")) global maxinter maxinter = float(raw_input("Please enter maximum interface_delta_X: ")) #Imports DataFrame and filters based on max values provided. models_raw = pd.read_csv( file, sep=' ', names=['model', 'total_score', 'interface_delta_X']) models = models_raw.loc[(models_raw['total_score'] <= maxtot) & (models_raw['interface_delta_X'] <= maxinter)] sumavrg = ( (np.sum(models['total_score']) + np.sum(models['interface_delta_X'])) / (len(models['total_score']))) #Appends a column to models that contains the sum of total_score and interface_delta_X sumarr = (models['total_score'] + models['interface_delta_X']) models['add'] = sumarr #Finds values with lowest sum modhi = models.loc[models['add'] > sumavrg] modlo = models.loc[models['add'] <= sumavrg] minidx = models['add'].idxmin() xmin = models.iloc[minidx]['total_score'] ymin = models.iloc[minidx]['interface_delta_X'] #Creates the plot. x1 = modlo['total_score'] y1 = modlo['interface_delta_X'] x2 = modhi['total_score'] y2 = modhi['interface_delta_X'] plt.figure(figsize=[16, 9]) plot1 = plt.scatter(x1, y1, s=2, c='Green', marker='.') plot2 = plt.scatter(x2, y2, s=2, c='Red', marker='.') plt.tick_params(axis='both', direction='inout', width=1, length=6, labelsize=13, pad=4) plt.title('interface_delta_x vs total_score', size=16) plt.xlabel("total_score", fontsize=13) plt.ylabel("interface_delta_X", fontsize=13) plt.legend(['Sum <= average', 'Sum > average'], markerscale=7, fontsize=12) plt.annotate(xy=(xmin, ymin), s="Lowest sum: total_score: " + str(xmin) + "; interface_delta_X: " + str(ymin), textcoords='axes fraction', xytext=(0.6, 0.05)) printtofile() plt.show()
def mandlebrot(x, y): for xval in range(x): for yval in range(y): val = 0 iteration = 0 while val in range(20): val = val * val + x + y * 1j iteration = iteration + 1 plt.scatter(x, y, color=(0, 0, 255, iteration)) plt.show
def feature_summary(x_col, y_col, show_r2=False): """Gives a summary of a feature :return: """ # Preparation x_name = x_col.name y_name = y_col.name df = pd.concat([x_col, y_col], axis=1).sort_index() plt.rcParams["figure.figsize"] = (10, 7) breaks(1) print("%s" % x_name) print('Quantile:\n', x_col.quantile([0.0, 0.1, 0.25, 0.5, 0.75, 1.0])) # Histogram plt.subplot(221) try: plt.hist(x_col, bins=30) plt.xlabel(x_name) plt.title('Histogram (CF GHP): %s' % x_name) except ValueError: print("No histogram for %s available" % x_name) # Correlation if y_name != x_name: df = df.sort_values(x_name) # df[x_name + "_2"] = df[x_name] * df[x_name] # df[x_name + "_3"] = df[x_name] * df[x_name] * df[x_name] x = df.drop(y_name, 1) reg = linear_model.LinearRegression(normalize=True) reg.fit(x, df[y_name]) # Plot plt.subplot(222) plt.scatter(df[x_name], df[y_name]) plt.plot(df[x_name], reg.predict(x), color='g') plt.xlabel(x_name) plt.xlim([df[x_name].min(), df[x_name].max()]) plt.title('x:%s / y:%s ' % (x_name, y_name)) plt.ylabel("Target function: %s" % y_name) if show_r2: print("R²:", r2_score(df[y_name], reg.predict(x))) print(feature_importance(x, reg.coef_)) # Show plots plt.show() # Timeline x_col.rolling(window=10, center=False).mean().plot(title='%s: Timeline' % x_name, figsize=(10, 2), xlim=(170000, 175000)) plt.show() plt.close('all') return " "
def plot_various_trial_analyses(self,neuron_ind, var_level): plt.figure(figsize=(16, 5)) #the first thing we want to do is just plot the data average #so first get the data for all trials neuron_i_data_by_trial = self.by_trial_IT_Neural_Data_objmeans_sorted_by_category[var_level][:, :, neuron_ind] #now take the mean over the second dimension -- the trial dimension neuron_i_data_trial_mean = neuron_i_data_by_trial.mean(1) #for convenience, let's compute the min and max values of the neural response minval = neuron_i_data_trial_mean.min() maxval = neuron_i_data_trial_mean.max() #now let's plot the responses across objects plt.plot(neuron_i_data_trial_mean) #and block stuff to make the categories easier to see plt.fill_between(np.arange(64), minval, maxval, where=(np.arange(64) / 8) % 2, color='k', alpha=0.2) plt.xticks(np.arange(0, 64, 8) + 4, self.unique_categories, rotation=30); plt.ylabel('Neural Response of neuron %d' % neuron_ind) plt.ylim(minval, maxval) plt.xlabel('Responses for Variation %s images' % var_level) #now let's look at two trials -- the first and 6th ones, for example t1 = 0; t2 = 5 t1_data = neuron_i_data_by_trial[:, t1] t2_data = neuron_i_data_by_trial[:, t2] plt.figure(figsize=(12, 5)) plt.subplot(1, 2, 1) plt.plot(t1_data) plt.xticks(np.arange(0, 64, 8), self.unique_categories, rotation=30); plt.title('Neuron %d, trial %d, var %s' % (neuron_ind, t1, var_level)) plt.subplot(1, 2, 2) plt.plot(t2_data) plt.xticks(np.arange(0, 64, 8), self.unique_categories, rotation=30); plt.title('Neuron %d, trial %d, var %s' % (neuron_ind, t2, var_level)) #let's do a scatter plot of the responses to one trial vs the other plt.figure() plt.scatter(t1_data, t2_data) plt.xlabel('responses of neuron %d, trial %d, %s'% (neuron_ind, t1, var_level)) plt.ylabel('responses of neuron %d, trial %d, %s'% (neuron_ind, t2, var_level)) #how correlated are they exactly between trials? let's use pearson correlation rval = stats.pearsonr(t1_data, t2_data)[0] plt.title('Correlation for varlevel %s images = %.3f' % (var_level, rval)) #in fact, let's have a look at the correlation for all pairs of trials fig = plt.figure(figsize = (7, 7)) #the numpy corrcoef function basically gets the pairwise pearson correlation efficiently corrs = np.corrcoef(neuron_i_data_by_trial.T) #now let's plot the matrix of correlations using the matshow function plt.colorbar(fig.gca().matshow(corrs)) plt.xlabel('trials of neuron %d' % neuron_ind) plt.ylabel('trials of neuron %d' % neuron_ind) plt.title('Between-trial correlations for varlevel %s' % var_level)
def plot_regression_line(x, y, b): plt.scatter(x, y, color="m", marker="o", s=30) y_pred = b[0] + b[1] * x plt.plot(x, y_pred, color="g") plt.xlabel('x') plt.ylabel('y') plt.show()
def plot(x, y, **kwargs): """can only do 2D plot right now""" assert (x.shape[-1] == 2) color = (y + 2) / 5 if 'accuracy' in kwargs: accuracy = kwargs['accuracy'] plt.figure() plt.scatter(x[:, 0], x[:, 1], c=color) if 'title' in kwargs: plt.suptitle(kwargs['title']) if 'accuracy' in kwargs: plt.title("Accuracy: %.1f%%" % (kwargs['accuracy'] * 100), fontsize=10) plt.show()
def plot_points(self): points = self.points x_pts = [pt[0] for pt in points] y_pts = [pt[1] for pt in points] col = [pt[3] for pt in points] plt.figure() plt.scatter(x_pts, y_pts, c=col) # plt.axes([0, 10, 0, 10]) plt.ylim(-15, 15) plt.xlim(0, 15) # plt.axes(xlim=(-5, 5), ylim=(0, 3.5)) plt.show()
def scatterPlot(self): axis = [ min(self.tdata[:, 0]) - 1, max(self.tdata[:, 0]) + 1, min(self.tdata[:, 1]) - 1, max(self.tdata[:, 1]) + 1 ] setx = np.linspace(axis[0], axis[1]) plt.scatter(self.tdata[:, 0], self.tdata[:, 1]) plt.plot(setx, self.modelFunction(setx)) plt.axis(axis) plt.show()
def print_scatter_data(): import matplotlib.pylab as plt filename = Par.dirname + ('/Scatter.dat') fitnesses = [] self_reliences = [] life_times = [] self_reliences_dead = [] for Agent in Par.Agents: if Agent.dead != True: fitnesses.append(Agent.fitness) Needs = Agent.needs Production = Agent.production selfReli = [0.0]*Par.num_resources for i in range(Par.num_resources): selfReli[i] = Production[i]*Needs[i] self_reliences.append(abs(sum(selfReli))) else: life_time = Agent.t_death - Agent.t_discovery life_times.append(life_time) Needs = Agent.needs Production = Agent.production selfReli = [0.0]*Par.num_resources for i in range(Par.num_resources): selfReli[i] = Production[i]*Needs[i] self_reliences_dead.append(abs(sum(selfReli))) file =open(filename, 'w') for i in range(len(fitnesses)): s= str(fitnesses[i]) +' '+ str(self_reliences[i]) file.write(s) file.write('\n') file.close() plt.scatter(self_reliences, fitnesses) plt.ylabel('Fitness') plt.xlabel('self_reliences') plt.savefig('FitnessVSR.png') plt.close() plt.scatter(self_reliences_dead, life_times) plt.ylabel('LifeTimes') plt.xlabel('self_reliences') plt.savefig('LifeTimeVSR.png') plt.close()
import matplotlib as plt import numpy as np import pandas as pd dat=pd.read_csv('Voters.csv').as_matrix() x=dat[:,0] y=dat[:,1] plt.scatter(x,y) plt.show() plt.hist(x) plt.hist(y,bins=15) #images train=pd.read_csv('test.csv') M=train.as_matrix() im=M[0,1:] im=im.reshape(28,28) M=train.as_matrix() plt.imshow(im) plt.show() plt.imshow(im,cmap="gray") from scipy.stats import norm norm.pdf(0) norm.pdf(0,loc=5, scale=10) r=np.random.randn(10) norm.pdf(r) norm.cdf(r) r=10*np.random.randn(10000)+5
df = pd.DataFrame(ground_cricket_data) # instantiate LinearRegression class regr = linear_model.LinearRegression() # define variables x = df['Ground Temperature'] # pandas.Series.to_frame() returns a data frame x = x.to_frame() y = df['Chirps/Second'] # fit the object to the data regr.fit(x, y) # plot using equation plt.scatter(x, y) # use attributes to plot linear regression equation # y = β0 + β1x where β0 is intercept and β1 is coefficient plt.plot(x, (regr.intercept_ + (regr.coef_ * x))) # plot using max/min as data for prediction # create a new data frame of min and max feature values df_new = pd.DataFrame({'Ground Temp': [df['Ground Temperature'].min(), df['Ground Temperature'].max()]}) plt.scatter(x, y) # plot the new frame against the prediction plt.plot(df_new, regr.predict(df_new)) # calculate the r-squared score (or coefficient of determination) regr.score(x, y)
###Note, this function will auto change, it acts global. seqDB, data_genes, data_isoforms = qcf.bioReplicateSelfCorrelate(seqDB,data_genes, data_isoforms, excludeSelf=1) ###Exclude self avoids self comparison # pairs = np.zeros([len(seqDB),2]) #xToPlot= 'pearsonCorrToMTT' #yToPlot = 'pearsonCorrToMeanReplicate' xToPlot= 'spearmanCorrToMeanReplicate' yToPlot = 'pearsonCorrToMeanReplicate' for rowId in range(len(seqDB)): pairs[rowId,0] = seqDB.loc[rowId,xToPlot] #seqDB['num_cells'] ###X AXIS pairs[rowId,1] = seqDB.loc[rowId,yToPlot] ####Y AXIS plt.scatter(pairs[:,0],pairs[:,1]) plt.xlabel(xToPlot) plt.ylabel(yToPlot) axes = plt.gca() axes.set_xlim([0.8,1.05]) axes.set_ylim([0.8,1.05]) fig = plt.gcf() fig.set_size_inches(15,10) # In[12]: seqDB # In[21]:
# In[67]: #4 categoricalFreq_rel=categoricalFreq.div(categoricalFreq.sum(1).astype(float)) categoricalFreq_rel categoricalFreq_rel.plot(kind='barh', stacked=True) title('MPG by Efficiency(stacked)') savefig('stacked', dpi=400, bbox_inches='tight') # In[64]: #5 plt.scatter(carData['barrels08'], carData['highway08']) plt.title('Barrel Consumption vs Highway MPG') # In[2]: #Part 2 import scipy as sp import sklearn as sk # In[3]: #2 medicalData=pd.read_table('Medical.csv', header=False, sep=',') medicalData
import numpy as np import matplotlib as plt import sys, string, os n_archivos = len(sys.argv) for i in range(n_archivos) datos = np.loadtxt(sys.argv[i]) N = np.shape(datos)[0] for i in range(N): plt.scatter(datos[i,0],datos[i,1]) plt.show()
df["time"] = [t[11:13] + t[14:16] for t in df["lastupdated"]] df["day"] = [date2int(t) for t in df["lastupdated"]] # day 0 is May 17, 2014 df["dayofwk"] = [(t+6)%7 for t in df["day"]] # 0 indexed Sunday df.head() # <codecell> plt.figure(figsize=(10,15)) im = plt.imread('chicago.png') implot = plt.imshow(im) x = (df['west'] - df['west'].min())*477/(df['east'].max() - df['west'].min()) y = 798-(df['north'] - df['south'].min())*798/(df['north'].max() - df['south'].min()) s = df['currentspeed'] / df['currentspeed'].max() plt.scatter(x,y,c=s,linewidth=0,s=1000,alpha=0.1) #x0 = (df.ix[0]['west'] - df['west'].min())*477/(df['east'].max() - df['west'].min()) #y0 = 798-(df.ix[0]['north'] - df['south'].min())*798/(df['north'].max() - df['south'].min()) #plt.scatter(x0,y0,c='r',s=2000) #x0 = (df.ix[0]['east'] - df['west'].min())*477/(df['east'].max() - df['west'].min()) #y0 = 798-(df.ix[0]['south'] - df['south'].min())*798/(df['north'].max() - df['south'].min()) #plt.scatter(x0,y0,c='r',s=2000) plt.xlim(0,477) plt.ylim(798,0) plt.xticks([]) plt.yticks([]) #plt.plot([df['west'],df['west'],df['east'],df['east'],df['west']],[df['south'],df['north'],df['north'],df['south'],df['south']],linewidth=20,alpha=0.2) # <codecell>
plt.show() # Dendrogram from scipy.spatial.distance import pdist from scipy.cluster.hierarchy import linkage, dendrogram, fcluster, fclusterdata distanceMatrix = pdist(data) dend = dendrogram(linkage(distanceMatrix, method='complete'), color_threshold=2, leaf_font_size=10, labels = df.yearID.tolist()) assignments = fcluster(linkage(distanceMatrix, method = 'complete'), 2, 'distance') cluster_output = pandas.DataFrame({'team':df.yearID.tolist(), 'cluster':assignments}) cluster_output plt.scatter(df.total_salaries, df.total_runs, s=60, c=cluster_output.cluster) # Got the following code when I tried to improve the plot # AttributeError: 'int' object has no attribute 'view' #colors = cluster_output.cluster #colors[colors == 1] = 'b' #colors[colors == 2] = 'g' #colors[colors == 3] = 'r' # #plt.scatter(df.total_salaries, df.total_runs, s=100, c=colors, lw=0) ############################################################################ # Principal component analysis
def plotDistribution(dist): for k in dist.index: alpha = np.array([dist[x][k] for x in dist])*100 x = [k for x in np.arange(0,len(alpha))] y = dist.columns plt.scatter(x,y,c=alpha,marker='s',linewidths=0,cmap='Oranges',norm=pltcolors.Normalize(vmin=0,vmax=1),vmin=0,vmax=1,edgecolors=None)
pd.set_option('display.max_rows', 3000) pd.set_option('display.width', 100000) df = pd.read_csv('../input/2013_NCAA_Game.csv') pd.scatter_matrix(df) pd.scatter_matrix(df, diagonal='kde') hist(df['Team Avg Scoring Margin']) plt.scatter(df['Team Score'], df['Team Margin']) pf = pd.read_csv('../input/clean_player_data.csv') pf = pf.drop_duplicates() tt = pf.groupby('Team').mean()
import sys import numpy as np import matplotlib as plt import math a=sys.argv[1] data=np.loadtxt(a) x=data[:,3] y=data[:,4] figura=plt.polyfit(x,y,1) plt.scatter(x,y) plt.plot(x,y*figura[0]*(y**4) + figura[i]*(y**3), figura[2]*(y**2), figura[3]*y + fit[4]) plt.xlabel("Pasos (x)", frontsize=20) plt.ylabel("Distancia (y)", frontsize =20) plt.tittle("Numero de Pasos VS Distancia con la Regresion", frontsize =15) plt.savefig('ajuste.png')
initialCon.append(-0.01 / 300.0 * x) # Ensuring boundary conditions #initialCon[0] = 0 #initialCon[-1] = 0 yPrev = initialCon yCurrent = initialCon sample = [] for n in range(0,10): for i in range(0,len(x)): if i !=0 and i != 649: yNew = ((2 - 2*r**2 - 6 * eps * r**2 * N**2)*yCurrent[i] - yPrev[i] + r**2*(1 + 4*eps*N**2)*(yCurrent[i+1] + yCurrent[i-1]) - eps*r**2*N**2 *(yCurrent[i+2] + yCurrent[i-2])) yPrev = copy(yCurrent) yCurrent = copy(yNew) plt.scatter(yNew) plt.draw() plt.pause() plt.clf()
#%% USPS version pca = PCA(n_components=5) X_trans = pca.fit_transform(X) data = np.hstack((X_trans, np.matrix(y).T)) #%% np.random.shuffle(data) sample = data[:500,:] #%% pl.figure() for i in range(5): for j in range(5): try: #pl.subplot(5,5,((j)*5)+i) pl.figure() pl.scatter(sample[:,i].A1, sample[:,j].A1, 20, sample[:,5].A1) pl.show() except IndexError: print i,j
plt.hist(data.year, bins=np.arange(1950, 2013), color='#cccccc') plt.xlabel("Release Year") remove_border() # Received following message: # Traceback (most recent call last): # File "<stdin>", line 1, in <module> # AttributeError: 'module'object has not attribute 'hist' # AND # AttributeError: 'module'object has not attribute 'xlabel' plt.hist(data.score, bins=20, color='#cccccc') plt.xlabel("IMDB rating") remove_border() # Again, I'm receiving AttributeError messages. Is there an issue with the matplotlib that is not allowing me to produce a histogram? plt.scatter(data.year, data.score, lw=0, alpha=.08, color='k') plt.xlabel("Year") plt.ylabel("IMDB Rating") remove_border() # Again, I'm receiving AttributeError message. data[(data.votes > 9e4) & (data.score < 5)][['title', 'year', 'score', 'votes', 'genres']] data[data.score == data.score.min()][['title', 'year', 'score', 'votes', 'genres']] data[data.score == data.score.max()][['title', 'year', 'score', 'votes', 'genres']] genre_count = np.sort(data[genres].sum())[::-1] pd.DataFrame({'Genre Count': genre_count}) # The genres were not listed alongside the counts
print r("summary(rdata)") r("?princomp") r("p = princomp(rdata)") print r("names(p)") print r("head(p$scores, n=6") irisPd = pd.DataFrame(r.get("p$scores"),columns=['pc1','pc2','pc3','pc4','pc5']) irisPd.head() mat.scatter(irisPY.Comp1,irisPY.Comp2). title('Iris'). xlabel('Primary Component 1'). ylabel('Primary Component 2') mat.show() colors = ['red', 'green', 'blue'] labels = ['Setosa', 'Virginica', 'Versicolor'] fig = mat.figure() ax = mat.add_subplot(1, 1, 1) ax.set_xlabel('Primary Component 1') ax.set_ylabel('Primary Component 2') ax.set_title('Species Data')
tama = [] tam = 0 clustersx = np.empty((0)) clustersy = np.empty((0)) for i in range(n_centros): arr_x = np.empty((0)) arr_y = np.empty((0)) for j in range(n_puntos): if (int(minimos[j])==i): arr_x = np.append(arr_x,posx[j]) arr_y = np.append(arr_y,posy[j]) tam += np.size(arr_x) tama.append(tam) clustersx = np.append(clustersx,arr_x) clustersy = np.append(clustersy,arr_y) clustersx = np.split(clustersx,tama) clustersy = np.split(clustersy,tama) for i in range(n_centros): plt.xlabel("Puntos en x") plt.ylabel("Puntos en y") plt.title("K-means clustering") plt.scatter(clustersx[i],clustersy[i],c=colores[i]) plt.scatter(centrosx[i],centrosy[i],c=colores[i],s=110) plt.show()
import numpy as np import matplotlib as pl n = 1024 X = np.random.normal(0,1,n) Y = np.random.normal(0,1,n) pl.scatter(X,Y)
import matplotlib as plt import numpy as np from sklearn import datasets, linear_model def generate_data(): np.random.seed(0) X, y = datasets.make_moons(200, noise=0.20) return X, y class Config: nn_input_dim = 2 nn_output_dim = 2 epsilon = 0.01 reg_lambda = 0.01 def visualize(X, y, model): plot_decision_boundary(lambda x: predict(model, x), X, y) plt.title("Logistic regression") def plot_decision_boundary(pred_func, X, y): pass if __name__ == "__mail__": X, y = generate_data() plt.scatter(X[:,0], X[:,1], s=40, c=y, cmap=plt.cm.Spectral) plt.show()
def read(): pickle_file = open("pickled_data.pkl", "r") t = pickle.load(pickle_file) v = pickle.load(pickle_file) print t print v initialize(v, s, t, dt, n) calculate(v, s, t, dt, n) store(v, t, n) #plot plt.figure(1) plt.subplot(211) plt.plot(t, v,"g-", linewidth=2.0) plt.scatter(t, v) plt.title('The Velocity of a Free Falling Object') plt.xlabel('Time($t$)', fontsize=14) plt.ylabel('Velocity($m/s$)', fontsize=14) plt.text(3,-60,r'$g = 9.8 m/s^2$', fontsize=16) plt.grid(True) plt.subplot(212) plt.plot(t, s,"g-", linewidth=2.0) plt.scatter(t, s) plt.title('The Displacement of a Free Falling Object') plt.xlabel('Time($t$)', fontsize=14) plt.ylabel('Displacement($m$)', fontsize=14) plt.text(3,-300,r'$g = 9.8 m/s^2$', fontsize=16) plt.grid(True)