def histo_datalink(mode, lambda_val, attribute, ci=95, datalinks=range(0, NUM_DATA_LINK), save=False): stats = scalar_stats(scalar_parse(mode, lambda_val)) for u in datalinks: attr = attribute + '-' + str(u) bar = stats['mean'][attr] error = np.array([ bar - stats['ci' + str(ci) + '_l'][attr], stats['ci' + str(ci) + '_h'][attr] - bar ]).reshape(2, 1) plt.bar('User ' + str(u), bar, yerr=error, align='center', alpha=0.95, ecolor='k', capsize=7) # Show graphic plt.title(attribute + ": " + MODE_DESCRIPTION[mode]) if save: plt.savefig("histousers_" + attribute + "_" + mode + "_" + lambda_val + ".pdf", bbox_inches="tight") plt.clf() else: plt.show() return
def visualize(self): # Aadesh fig = plt.figure() data1 = pd.read_csv("DSBDA.csv", names=['Name']) data2 = pd.read_csv(self.directory + "/DSBDA.csv", names=['Name']) df = pd.merge(data1, data2, how='left', indicator='Submitted') df['Submitted'] = np.where(df.Submitted == 'both', True, False) # print (df) y = [] for i in range(df.index.min(), df.index.max() + 1): if (df['Submitted'][i] == True): y.append(1) elif (df['Submitted'][i] == False): y.append(0) x = [] x = list(df.index + 1) barWidth = 0.85 # Create green Bars plt.bar(x, y, color='green', edgecolor='white', width=barWidth, label='On Time') plt.savefig(self.directory + "/assigenmet1.pdf") print('Showing Plot') plt.show()
def plot_stacked_barchart(self, dataframe, sort, title, xlable, ylable, kurs): x = [] tutor = [] y = [] for i in dataframe['tutor']: if i not in tutor: tutor.append(i) y.append([]) for i, elem in enumerate(dataframe[sort]): print(y, elem) if elem in x: y[tutor.index(dataframe['tutor'][i])][x.index(elem)] += 1 else: x.append(elem) for j, elem2 in enumerate(tutor): y[j].append(0) y[tutor.index(dataframe['tutor'][i])][x.index(elem)] += 1 for i, elem in enumerate(y): plt.bar(range(len(elem)), elem, label=tutor[i]) plt.xlabel(xlable) plt.ylabel(ylable) plt.legend(loc="best") plt.savefig('./PDFcreater/Plots/{}/{}.png'.format(kurs, title)) #plt.show() # loescht den Plot fuer den Naechsten Plot plt.clf() plt.cla() plt.close()
def print_pred_distrib_figure(filename, bins, histo, dx, J_opt): assert isinstance(filename, str), 'filename must be a string' filename = os.path.splitext(filename)[0] + '.png' matplotlib = _try_import_matplotlib() if matplotlib is None: return else: from matplotlib import pyplot as plt figure = plt.figure(figsize=(7, 7)) plt.bar(bins[:-1], histo[0], width=dx, align='edge', color='blue', alpha=0.7, label='neutral') plt.bar(bins[:-1], histo[1], width=dx, align='edge', color='red', alpha=0.7, label='deleterious') plt.axvline(x=J_opt, color='k', ls='--', lw=1) plt.ylabel('distribution') plt.xlabel('predicted score') plt.legend() figure.savefig(filename, format='png', bbox_inches='tight') plt.close() plt.rcParams.update(plt.rcParamsDefault) LOGGER.info(f'Predictions distribution saved to {filename}')
def chartData(data, keyword): #Takes input of the results of the fts search and the phrase that was searched for #and creates bar chart of occurences by book for the phrase from matplotlib import pyplot as plt import numpy as np import math data = sorted(data, key=lambda x: x[1], reverse=True) info = []#number of occurences books = []#list of books for d in data: info.append(d[1]) b = d[0] books.append(b[b.find('LIBER'):])#Use only book number for label since name is the same for all books #create chart plt.close() fig = plt.figure() width = 0.4 ind = np.arange(len(books)) plt.bar(ind, info, width=width) plt.xticks(ind + width/2., books) plt.yticks(np.arange(0,max(info)*2,math.ceil(max(info)/5))) plt.ylabel('Number of Occurences') plt.xlabel('Books') plt.title('Occurences of "' + keyword + '" by book in Curtius Rufus (Latin)') fig.autofmt_xdate() plt.show(block=False)#display plot, and continue with program
def musicType_plot(rap, randb, classical, indie, pop, df): plt.figure() # declaring testing data xs = [1, 2, 3, 4, 5] ys = [randb, rap, classical, pop, indie] # setting range xrng = np.arange(len(xs)) yrng = np.arange(0, max(ys)+60, 50) #labeling data plt.xlabel('Music Type') plt.ylabel('Music volume') # spacing and declare bar chart plt.bar(xrng, ys, 0.45, align="center") # labeling plt.xticks(xrng, ["randb", "rap", "classical", "pop", "indie"]) plt.yticks(yrng) plt.grid(True) plt.show()
def print_path_prob_figure(filename, bins, histo, dx, path_prob, smooth_path_prob, cutoff=200): assert isinstance(filename, str), 'filename must be a string' filename = os.path.splitext(filename)[0] + '.png' matplotlib = try_import_matplotlib() if matplotlib is None: return else: from matplotlib import pyplot as plt figure = plt.figure(figsize=(7, 7)) s = np.sum(histo, axis=0) v1 = np.where(s >= cutoff, path_prob, 0) v2 = np.where(s < cutoff, path_prob, 0) v3 = np.where(s >= cutoff, smooth_path_prob, 0.) plt.bar(bins[:-1], v1, width=dx, align='edge', color='red', alpha=1) plt.bar(bins[:-1], v2, width=dx, align='edge', color='red', alpha=0.7) plt.plot(bins[:-1] + dx / 2, v3, color='orange') plt.ylabel('pathogenicity prob.') plt.xlabel('predicted score') plt.ylim((0, 1)) figure.savefig(filename, format='png', bbox_inches='tight') plt.close() plt.rcParams.update(plt.rcParamsDefault) LOGGER.info(f'Pathogenicity plot saved to {filename}')
def evaluation(X, y, model, n_preds=10, random=True, show_graph=True): n_steps = X.shape[1] max_random_int = len(y) - n_steps y_true, y_pred, prediction_accuracy, slices = [], [], [], [] for i in range(n_preds): if random == True: position = np.random.randint(0, max_random_int) else: position = i y_hat = model.predict(X[position:position + 1])[0][0] y_pred.append(y_hat) y_true.append(y[position]) y_current = y[position] # If we predit return, c = 0, else c = previous sequence position if y.min() < 0: c = 0 else: c = y[position - 1] if ((y_hat > c) & (y_current > c)) or ((y_hat < c) & (y_current < c)): acc = 1 else: acc = 0 prediction_accuracy.append(acc) slices.append((list(y[position - n_steps:position + 1]), list(y[position - n_steps:position]) + [y_hat], acc)) if show_graph == True: plt.rcParams['figure.dpi'] = 227 plt.style.use('seaborn-whitegrid') plt.figure(figsize=(16, 6)) plt.bar(range(n_preds), y_true[:], width=.7, alpha=.6, color="#4ac2fb", label="True") plt.bar(range(n_preds), y_pred[:], width=.7, alpha=.6, color="#ff4e97", label="Predicted") plt.axhline(0, color="#333333", lw=.8) plt.legend(loc=1) plt.title('Daily Return Prediction', fontSize=15) plt.show() print('MSE:', mean_squared_error(y_true, y_pred)) print('Accuracy: {}%'.format( round((sum(prediction_accuracy) / len(prediction_accuracy)) * 100), 2)) return slices, np.array(y_true), np.array(y_pred)
def char_positive(): x_axis = Category y_axis = statistic_positive() y_pos = np.arange(len(y_axis)) plt.figure(figsize=(10, 5)) plt.bar(y_pos, y_axis) plt.xticks(y_pos, x_axis) plt.title('What categories we choose the most') plt.show()
def plot_boring_barchart(self,dataframe,x,y,title,xlable,ylable, kurs): plt.bar(x, y, color='blue') #plt.title(title) plt.xlabel(xlable) plt.ylabel(ylable) plt.savefig('./PDFcreater/Plots/{}/{}.png'.format(kurs,title)) #loescht den Plot fuer den Naechsten Plot plt.clf() plt.cla() plt.close()
def getHistogramPGN(df): y = df.iloc[:, -1] perc_win = y.sum() / y.count() height = [1 - perc_win, perc_win] bars = ('win=0 (%)', 'win=1 (%)') y_pos = np.arange(len(bars)) plt.bar(y_pos, height) plt.xticks(y_pos, bars) plt.title("labels histogram") path = WORKING_PATH + "/___histogram.png" plt.savefig(path) return path
def _plot_lengths(self, lens, prec, figsize): '''Plots a list of file lengths displaying prec digits of precision''' rounded = [round(i, prec) for i in lens] rounded_count = Counter(rounded) plt.figure(num=None, figsize=figsize, dpi=80, facecolor='w', edgecolor='k') labels = sorted(rounded_count.keys()) values = [rounded_count[i] for i in labels] width = 1 plt.bar(labels, values, width) xticks = np.linspace(int(min(rounded)), int(max(rounded))+1, 10) plt.xticks(xticks) plt.show()
def plot_hist(a,b): v = np.random.beta(a, b, 10000) s = np.zeros(v.shape[0]) for i in range(v.shape[0]): s[i] = np.random.binomial(20,v[i]) figure() center = scipy.stats.itemfreq(s)[:,0] hist = scipy.stats.itemfreq(s)[:,1] plt.bar(center, hist, align = 'center', width = 0.7) plt.savefig('../Figures/a'+str(a)+'_b'+str(int(b))+'.pdf')
def plot_hist(a, b): v = np.random.beta(a, b, 10000) s = np.zeros(v.shape[0]) for i in range(v.shape[0]): s[i] = np.random.binomial(20, v[i]) figure() center = scipy.stats.itemfreq(s)[:, 0] hist = scipy.stats.itemfreq(s)[:, 1] plt.bar(center, hist, align='center', width=0.7) plt.savefig('../Figures/a' + str(a) + '_b' + str(int(b)) + '.pdf')
def plotDisparityHistogram(network=None): assert network is not None, "Network is not initialised! Visualising failed." import matplotlib.pyplot as plt from NetworkBuilder import sameDisparityInd spikesPerDisparityMap = [] for d in range(0, maxDisparity-minDisparity+1): cellsOut = [network[x][1] for x in sameDisparityInd[d]] spikesPerDisparityMap.append(sum([sum(x.get_spike_counts().values()) for x in cellsOut])) print spikesPerDisparityMap plt.bar(range(0, maxDisparity-minDisparity+1), spikesPerDisparityMap, align='center') plt.show()
def _Learning(descriptor_list, n_clusters, n_images, train_labels, ret=None, std=None): #set up model kmeans_obj = KMeans(n_clusters=n_clusters) #format the data (descriptor list) vStack = np.array(descriptor_list[0]) for remaining in descriptor_list[1:]: vStack = np.vstack((vStack, remaining)) descriptor_vstack = vStack.copy() #perform clustering kmeans_ret = kmeans_obj.fit_predict(descriptor_vstack) #develop vocabulary mega_histogram = np.array([np.zeros(n_clusters) for i in range(n_images)]) old_count = 0 for i in range(n_images): l = len(descriptor_list[i]) for j in range(l): if ret is None: idx = kmeans_ret[old_count+j] else: idx = ret[old_count+j] mega_histogram[i][idx]+=1 old_count+=1 #display trained vocabulary vocabulary = mega_histogram x_scaler = np.arange(n_clusters) y_scalar = np.array([abs(np.sum(vocabulary[:,h], dtypes=np.int32)) for h in range(n_clusters)]) plt.bar(x_scaler,y_scalar) plt.xlabel("Visual Word Index") plt.ylabel("Frequency") plt.title("Complete Vocabulary Generated") plt.xticks(x_scaler+0.4,x_scaler) plt.show() #standardize if std is None: scale = prep.StandardScaler().fit(mega_histogram) mega_histogram = scale.transform(mega_histogram) else: mega_histogram = std.transform(mega_histogram) #train--USES SVC!! clf = SVC() self.clf.fit(mega_histogram,train_labels)
def color_info(color_dist, inner_deck_list): color_dict = color_dist(inner_deck_list, "deck") print("colors:") pp.pprint(color_dict) fig, ax = plt.subplots() ax.set_xlabel('Colors') ax.set_ylabel('Count') ax.set_title('Distribution of Colored cards in deck') x_val = [] y_val = [] for i in color_dict.items(): x_val.append(i[0]) y_val.append(i[1]) plt.bar(x_val, y_val) plt.show()
def myplot(feat, i, j, df): df = df[i:j + 1] df = df[df["rate"] > 3.8] n = df[feat].value_counts() l = list(zip(list(n), list(n.keys()))) l.sort(key=lambda tup: tup[1]) height = [x[0] for x in l] bars = [x[1] for x in l] y_pos = np.arange(len(bars)) plt.bar(y_pos, height, color=sns.color_palette()) plt.xticks(y_pos, bars, rotation="vertical") plt.xlabel("Suitable values") plt.ylabel("Count") plt.title("Values for " + feat) plt.show()
def fnctn(a,c): df2 = pd.DataFrame(data.groupby(a)[c].count()) df2=df2.astype(float) plot1 = plt.bar(x=df2.index,y=df2.iloc[:,0]) plot1.set_xticklabels(plot1.get_xticklabels(), rotation=30, fontsize=6) plot1.figure.savefig("./static/q18.jpg") plt.close()
def draw_bar(savename): import matplotlib.pyplot as plt size = 3 x = np.arange(size) a = [34.89, 33.87, 72.37] b = [551.72, 552.87, 698.34] xl = ['training time \n on MNIST', 'training time on \n Fashion-MNIST', 'training time \n on CIFAR-10'] total_width, n = 0.54, 2 width = total_width / n x = x - (total_width - width) plt.figure() plt.bar(x, a, width=width) plt.bar(x + width, b, width=width) plt.ylabel('time(s)') plt.xticks(x, xl,) plt.legend() plt.savefig('../eps/' + savename) plt.show()
def print_feat_imp_figure(filename, feat_imp, featset): assert isinstance(filename, str), 'filename must be a string' filename = os.path.splitext(filename)[0] + '.png' matplotlib = _try_import_matplotlib() if matplotlib is None: return else: from matplotlib import pyplot as plt fig = plt.figure(figsize=(7, 7)) n = len(feat_imp) plt.bar(range(n), feat_imp, align='center', tick_label=featset) plt.xticks(rotation='vertical') plt.ylabel('feat. importance') fig.savefig(filename, format='png', bbox_inches='tight') plt.close() plt.rcParams.update(plt.rcParamsDefault) LOGGER.info(f'Feat. importance plot saved to {filename}')
def drawTKY(): """ Function plots 10 most popular Venue Categories in Tokyo. """ #Import dataset of New York from root directory TKY = pd.read_csv('dataset_TSMC2014_TKY.csv') TKY=TKY[["venueCategory","venueCategoryId"]] grouped2=TKY.groupby(["venueCategory"]).count() grouped2=grouped2.sort_values('venueCategoryId') grouped2=grouped2[237:247] #Plot bars of most popular venue categories plt.figure(figsize=(16,6)) plt.style.use('fivethirtyeight') plt.bar(grouped2.index,grouped2["venueCategoryId"]) plt.title("10 Most Popular Venue Categories \n Tokyo: 2012-2013",fontsize=14,color='black') plt.ylabel("Check-ins per Venue Category",fontsize=14) plt.show()
def drawNYC(): """ Function plots 10 most popular Venue Categories in New York. """ #Import dataset of New York from root directory NYC = pd.read_csv('dataset_TSMC2014_NYC.csv') NYC=NYC[["venueCategory","venueCategoryId"]] grouped=NYC.groupby(["venueCategory"]).count() grouped=grouped.sort_values('venueCategoryId') grouped=grouped[241:251] #Plot bars of most popular venue categories plt.figure(figsize=(16,6)) plt.style.use('fivethirtyeight') plt.bar(grouped.index,grouped["venueCategoryId"]) plt.title("10 Most Popular Venue Categories \n New York: 2012-2013",fontsize=14,color='black') plt.ylabel("Check-ins per Venue Category",fontsize=14) plt.show()
def visual_importnance(X, forest): importances = forest.feature_importances_ std = np.std([tree.feature_importances_ for tree in forest.estimators_], axis=0) indices = np.argsort(importances)[::-1] # Print the feature ranking print("Feature ranking:") for f in range(X.shape[1]): print("%d. feature %d (%f)" % (f + 1, indices[f], importances[indices[f]])) # Plot the feature importances of the forest plt.figure() plt.title("Feature importances") plt.bar(range(X.shape[1]), importances[indices], color="r", yerr=std[indices], align="center") plt.xticks(range(X.shape[1]), indices) plt.xlim([-1, X.shape[1]]) plt.show()
def sunSat_plot(saturday_mean, sunday_mean, df): plt.figure() # declaring values to be plotted xs = [1, 2] ys = [saturday_mean, sunday_mean] # setting range xrng = np.arange(len(xs)) yrng = np.arange(0, max(ys)+50, 50) # making bar chart and alligning items properly plt.bar(xrng, ys, 0.45, align="center") # labeling plt.xticks(xrng, ["Saturday", "Sunday"]) plt.yticks(yrng) plt.grid(True) plt.show()
def make_hists(self, X, y): colors = [ 'red', 'tan', 'lime', 'orange', 'black', 'yellow', 'green', 'pink', 'red', 'brown', 'grey', 'purple', 'navy' ] features = (set(self.train_df.keys()) - {'Vote'}) - set(categorical_features) for feature in features: df = pd.DataFrame({ "x": X[feature].values, "class": y.values.flatten() }) _, edges = np.histogram(df["x"], bins=15) histdata = [] labels = [] for n, group in df.groupby("class"): histdata.append(np.histogram(group["x"], bins=edges)[0]) labels.append(n) hist = np.array(histdata) histcum = np.cumsum(hist, axis=0) plt.bar(edges[:-1], hist[0, :], width=np.diff(edges)[0], label=labels[0], align="edge") for i in range(1, len(hist)): plt.bar(edges[:-1], hist[i, :], width=np.diff(edges)[0], bottom=histcum[i - 1, :], color=colors[i], label=labels[i], align="edge") plt.legend(title="class") plt.savefig('hists_label/' + feature + '.jpeg')
def scree_plot(self): total = sum(self.eig_vals) explained_var = [(i / total) * 100 for i in sorted(self.eig_vals, reverse=True)] cum_var = np.cumsum(explained_var) with plt.style.context('seaborn-darkgrid'): plt.bar(range(len(explained_var)), explained_var, align='center', label='individual explained variance') plt.step(range(len(explained_var)), cum_var, where='mid', label='cumulative explained variance', color="red") plt.ylabel("Cumulative variance") plt.xlabel("Principal components") plt.tight_layout() plt.legend(loc='best') plt.show()
def plotAmod(self): # Set up state space (FSM1,2,3, Loc and Vel) x_val = [1, 2, 3, 4, 5] y_val = [ self.FSM1_elapsed, self.FSM2_elapsed, self.FSM3_elapsed, self.Loc_elapsed, self.Vel_elapsed ] # Compare separate states (MECE) and wheels active/non-active y_label = [ 'Intersection Active', 'State Estimator Active', 'Indefinite Navigation Active', 'Localization Active', 'Wheels Active' ] # Plot values upon shutdown plt.bar(x_val, y_val, tick_label=y_label, width=0.5, color=['blue', 'green']) plt.xlabel('Duckiebot states') plt.ylabel('Duration (s)') plt.title('Time spent by %s per state' % self.veh_name)
def plot_value_array(i, predictions_array, true_label, number_of_classes=3): predictions_array, true_label = predictions_array, true_label[i] plt.style.use(['classic']) plt.grid(False) plt.xticks(range(number_of_classes)) plt.yticks([]) thisplot = plt.bar(range(number_of_classes), 1, color="#FFFFFF") plt.ylim([0, 1]) predicted_label = np.argmax(predictions_array) #print(true_label[0]) #print(predicted_label) thisplot[predicted_label].set_color('red') thisplot[true_label].set_color('blue')
def make_bar_graph(data=housing): ''' :param data: :return: ''' data = data mp.figure('Bar', facecolor='lightgray') mp.title('Bar', fontsize=20) gs = mg.GridSpec(3, 4) i, j = 0, 0 for column in housing.columns: # 创建子图 mp.subplot(gs[i, j]) # 在图形内部添加文字,设置位置,内容,对齐方式,字号,颜色,透明度 mp.text(0.5, 0.5, str(i) + '+' + str(j), ha='center', va='center', size=35, color='red', alpha=0.5) # 删除边界刻度 mp.xticks(()) mp.yticks(()) # 绘制柱状图 single_data = housing[column] min_data, max_data = min(single_data), max(single_data) mp.xlim(min(min_data, max_data)) step = (max_data - min_data) / 10 for x in range(min_data, max_data, step): sum_num = sum(x <= single_data <= x + step) y.append(sum_num) x = np.range(len(y)) mp.bar(x, y, 0.4, color='dodgerblue', label=column, alpha=0.75) # 调整子图位置 j += 1 j = j % 4 i = i + j // 4 # 改变布局形式,改为紧凑布局 mp.tight_layout() pass
def musicCategory_plt(randb_ser, rap_ser, classical_ser, pop_ser, indie_ser,df): plt.figure() # define x and y values xs = [1, 2, 3, 4, 5] ys = [len(randb_ser), len(rap_ser), len(classical_ser), len(pop_ser), len(indie_ser)] # setting range xrng = np.arange(len(xs)) yrng = np.arange(0, max(ys)+60, 50) # labeling data plt.xlabel('Music Type') plt.ylabel('Days listened') # define bar chart and spacing plt.bar(xrng, ys, 0.45, align="center") # more labeling plt.xticks(xrng, ["randb", "rap", "classical", "pop", "indie"]) plt.yticks(yrng) plt.grid(True) plt.show()
def plot_response_time_variousT(): dataframe = pd.DataFrame() index = [] for k in capacity_change_time: for mode in modes: index.append(f"{mode},t={k}s") dataframe['index'] = index for mode in modes: index = [] meanResponseTime = [] for i in capacity_change_time: df = scalar_df_parse( f"C:\\Users\\Leonardo Poggiani\\Documents\\GitHub\\PECSNproject\\csv\\pool_classico_varia_T\\{mode}{i}.csv" ) response = df[df.name == "responseTime"] meanResponseTime.append(response.value.mean()) index.append(i) plt.bar(index, meanResponseTime) plt.title(f"{mode}") plt.rcParams["figure.figsize"] = (12, 10) plt.xticks(rotation=25) plt.show() plt.xlabel("Value of t") plt.ylabel("Response time") plt.title("Comparison of various values of t") plt.legend(loc='best') plt.savefig( "C:\\Users\\Leonardo Poggiani\\Documents\\GitHub\\PECSNproject\\analysis\\variandoT\\responseTimeAlVariareDiT2.png" )
fig = plt.pyplot.gcf() fig.set_size_inches(16, 10) count_by_occupation = user_fields.map(lambda fields: (fields[3], 1)).reduceByKey(lambda x, y: x + y).collect() x_axis1 = np.array([c[0] for c in count_by_occupation]) y_axis1 = np.array([c[1] for c in count_by_occupation]) x_axis = x_axis1[np.argsort(y_axis1)] y_axis = y_axis1[np.argsort(y_axis1)] pos = np.arange(len(x_axis)) width = 1.0 ax = plt.axes() ax.set_xticks(pos + (width / 2)) ax.set_xticklabels(x_axis) plt.bar(pos, y_axis, width, color='lightblue') plt.xticks(rotation=30) fig = plt.pyplot.gcf() fig.set_size_inches(16, 10) count_by_occupation2 = user_fields.map(lambda fields: fields[3]).countByValue() print "Map-reduce approach:" print dict(count_by_occupation2) print "" print "countByValue approach:" print dict(count_by_occupation) ''' explore movie data '''
import matplotlib as plt import numpy as np import pandas as pd from matplotlib import pyplot as plt import datetime Data = pd.read_csv("wikipedia_by_month.txt", sep='\t', header = None, names=['Date', 'Count']) fig = plt.figure() ind = np.arange(len(Data['Date'])) plt.bar(ind, Data['Count']) fig.autofmt_xdate() plt.xticks(ind, Data['Date'], rotation = 'vertical') plt.savefig("figure.pdf")