def log_plot(): start = int(np.floor(np.log(min(median)) / np.log(10))) + 3 end = int(np.ceil(np.log(max(median)) / np.log(10))) + 3 xs = [] ticks = [] for i in range(start, end + 1): xs.append(10 ** (i - 3)) if i % 3 == 0: ticks.append('{}s'.format(prefix[i / 3])) else: ticks.append(str(10 ** (i % 3))) plt.barh(pos, median, align='center', height=0.25, left=1e-3, color=bar_color, lw=0) plt.errorbar(median, pos, ecolor=error_bar_color, fmt=None, xerr=err) plt.grid(True) plt.xlabel('Time') plt.xlim(min(xs), max(xs)) plt.xscale('log') plt.xticks(xs, ticks) plt.ylim(ymax=size) plt.yticks(pos, language) plt.twinx() plt.ylim(ymax=size) plt.yticks(pos, relative) plt.savefig('plots/{}.png'.format(pid), bbox_inches='tight') plt.clf()
def as_pyplot_figure(self, label=1, **kwargs): """Returns the explanation as a pyplot figure. Will throw an error if you don't have matplotlib installed Args: label: desired label. If you ask for a label for which an explanation wasn't computed, will throw an exception. kwargs: keyword arguments, passed to domain_mapper Returns: pyplot figure (barchart). """ import matplotlib.pyplot as plt exp = self.as_list(label, **kwargs) fig = plt.figure() vals = [x[1] for x in exp] names = [x[0] for x in exp] vals.reverse() names.reverse() colors = ['green' if x > 0 else 'red' for x in vals] pos = np.arange(len(exp)) + .5 plt.barh(pos, vals, align='center', color=colors) plt.yticks(pos, names) plt.title('Local explanation for class %s' % self.class_names[label]) return fig
def plot_errsh(): results = Control_results; fig, ax = plt.subplots() #results rects_train = plt.barh(ind,results['train_errs'], width, color = 'b', alpha = opacity, xerr =results['train_errs_std']/np.sqrt(10), label = '$train$'); rects_test = plt.barh(ind+width,results['test_errs'], width, color = 'r', alpha = opacity, xerr =results['test_errs_std']/np.sqrt(10), label = 'test'); plt.ylabel('Performance (Error)'); plt.title('Error (MSE)') plt.yticks(ind+width, Datasets); plt.legend(); #plot and save plt.tight_layout(); plt.savefig('errs'+'.png'); plt.show();
def plot(data, reverse=False): if reverse: data.reverse() r = range(len(data)) plt.barh(r, [d[1] for d in data]) plt.yticks(r, [d[0] for d in data]) plt.show()
def make_entity_plot(filename, title, fixed_noip, fixed_ip, dynamic_noip, dynamic_ip): plt.figure(figsize=(12,5)) plt.title("Settings comparison - " + title) plt.xlabel('Time (ms)', fontsize=12) plt.xlim([0,62000]) x = 0 barwidth = 0.5 bargroupspacing = 1.5 fixed_noip_mean,fixed_noip_conf = conf_stats(fixed_noip) fixed_ip_mean,fixed_ip_conf = conf_stats(fixed_ip) dynamic_noip_mean,dynamic_noip_conf = conf_stats(dynamic_noip) dynamic_ip_mean,dynamic_ip_conf = conf_stats(dynamic_ip) values = [fixed_noip_mean,fixed_ip_mean,dynamic_noip_mean, dynamic_ip_mean] errs = [fixed_noip_conf,fixed_ip_conf,dynamic_noip_conf, dynamic_ip_conf] y_pos = numpy.arange(len(values)) plt.barh(y_pos, values, xerr=errs, align='center', color=['r', 'b', 'r', 'b'], ecolor='black', alpha=0.7) plt.yticks(y_pos, ["Fixed | no I.P.", "Fixed | I.P.", "Dynamic | no I.P.", "Dynamic | I.P."]) plt.savefig(output_file(filename)) plt.clf()
def pylot_show(): sql = 'select * from douban;' cur.execute(sql) rows = cur.fetchall() # 把表中所有字段读取出来 count = [] # 每个分类的数量 category = [] # 分类 for row in rows: count.append(int(row[2])) category.append(row[1]) y_pos = np.arange(len(category)) # 定义y轴坐标数 #color = cm.jet(np.array(2)/max(count)) plt.barh(y_pos, count, color='y', align='center', alpha=0.4) # alpha图表的填充不透明度(0~1)之间 plt.yticks(y_pos, category) # 在y轴上做分类名的标记 plt.grid(axis = 'x') for count, y_pos in zip(count, y_pos): # 分类个数在图中显示的位置,就是那些数字在柱状图尾部显示的数字 plt.text(count+3, y_pos, count, horizontalalignment='center', verticalalignment='center', weight='bold') plt.ylim(+28.0, -2.0) # 可视化范围,相当于规定y轴范围 plt.title('douban_top250') # 图表的标题 fontproperties='simhei' plt.ylabel('movie category') # 图表y轴的标记 plt.subplots_adjust(bottom = 0.15) plt.xlabel('count') # 图表x轴的标记 #plt.savefig('douban.png') # 保存图片 plt.show()
def followsPicture(mp): val = [] label = [] lst0 = mp.keys() #print lst0 lst1 = mp.values() if len(lst1) > 10: num = 10 else: num = len(lst1) while (num != 0): i = lst1.index(max(lst1)) label.append(lst0[i]) #print type(lst1[i]) val.append(int(lst1[i])) lst0.pop(i) lst1.pop(i) num -= 1 pos = np.arange(10) + .5 plt.figure(1) plt.barh(pos,val,align='center') plt.yticks(pos,label) plt.xlabel(u'粉丝数目') string = u"统计人数:" + str(len(mp.keys())) plt.title(string) plt.show()
def draw_who_by_others(): table = [] with open('analysis/who_by_others.csv', 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=',') for row in reader: if not row: continue lang = row[0] mention_val = float(row[1]) if not lang in tiobe_values: continue tiobe_val = tiobe_values[lang] if not tiobe_val: continue if not subreddit_has_alias[lang]: continue table.append((lang, mention_val / tiobe_val)) print table table.sort(key=operator.itemgetter(0), reverse=True) print table langs, vals = zip(*table) y_pos = np.arange(len(langs)) plt.barh(y_pos, vals, align='center', alpha=0.4) plt.yticks(y_pos, langs) plt.title('mentioned by others relative to tiobe value') plt.savefig('img/mentions_relative_to_tiobe.png', bbox_inches='tight')
def barGraph(namesByYear): # Bargraph generator """Plotting function used to create bar graphs.""" plt.title('Births By Name For Input Year') plt.xlabel('Births') plt.yticks(range(len(namesByYear), 0, -1), [n for (n, t) in namesByYear]) plt.barh(range(len(namesByYear), 0, -1), [t for (n, t) in namesByYear]) plt.show()
def StyleBar(cur, styleval, stylethresh): ratingsarray, BeerStyles, BeerStyle_count = \ StyleRatings(cur, styleval, stylethresh) # print(BeerStyles) # print(BeerStyle_count) # print(ratingsarray) n = len(BeerStyles) if n < 2: print("Too few things are rated, decrease stylethresh") return 1 style_averages, style_std, style_min, style_max = \ RatingsArrayIntoStats(ratingsarray) y_pos = 1.5*np.arange(len(BeerStyles)) plt.figure(figsize=(18, 8)) plt.barh(y_pos, style_averages, xerr=style_std, align='center',alpha=0.4) plt.yticks(y_pos,BeerStyles, fontsize=16) plt.xticks(fontsize=16) plt.xlabel('Rating', fontsize=18) plt.title(styleval + ' Style Ratings', fontsize=20) plt.show() # Make dataframe ratarr = np.vstack((style_averages, style_std, BeerStyle_count, style_min, style_max)) beerstyle_df = pd.DataFrame(data=ratarr.T, index=BeerStyles, columns=['Mean', 'StDev', 'Count', 'Min', 'Max']) beerstyle_df['StDev'] = beerstyle_df['StDev'].map('{:,.2f}'.format) beerstyle_df['Mean'] = beerstyle_df['Mean'].map('{:,.2f}'.format) return beerstyle_df
def plot_hist_over_category(category_names_avgs_sem_triple_list, plt_number, x_label, groups, printse): height_factor = 0.5 ind = np.linspace(0,len(category_names_avgs_sem_triple_list)*height_factor, num = len(category_names_avgs_sem_triple_list)) width = 0.25 fig = plt.figure(figsize=(15.5, 10),dpi=800) plot = fig.add_subplot(111) plot.tick_params(axis='y', which='major', labelsize= 10 ) plot.tick_params(axis='x', which='major', labelsize= 10 ) length = len(category_names_avgs_sem_triple_list) l = 0 it = cycle(["#CCD64B","#C951CA","#CF4831","#90D0D2","#33402A","#513864", "#C84179","#DA983D","#CA96C4","#53913D","#CEC898","#70D94C", "#CB847E","#796ACB","#74D79C","#60292F","#6C93C4","#627C76", "#865229","#838237"]) color=[next(it) for i in range(length)] if printse: p1 = plt.barh(ind, [x[1] for x in category_names_avgs_sem_triple_list], color=color,align='center', height= height_factor, xerr= [x[2] for x in category_names_avgs_sem_triple_list]) else: p1 = plt.barh(ind, [x[1] for x in category_names_avgs_sem_triple_list], color=color,align='center', height= height_factor) plt.yticks(ind, [x[0] for x in category_names_avgs_sem_triple_list]) plt.xlabel(x_label) plt.ylabel("Categories") plt.subplots_adjust(bottom=0.15, left=0.14,right=0.95,top=0.95) plt.ylim([ind.min()- height_factor, ind.max() + height_factor]) plt.xlim(min([x[1] for x in category_names_avgs_sem_triple_list])-height_factor, max([x[1] for x in category_names_avgs_sem_triple_list])+height_factor) try: os.makedirs(plot_path+x_label) except OSError as exception: if exception.errno != errno.EEXIST: raise print("da wirds gespeichert:") print(plot_path+x_label+"/"+str(plt_number)+"groups_"+str(groups)) plt.savefig(plot_path+x_label+"/"+str(plt_number)+"groups_"+str(groups)) plt.close()
def pylot_show(): sql = 'select * from douban;' cur.execute(sql) rows = cur.fetchall() count = [] category = [] for row in rows: count.append(int(row[2])) category.append(row[1]) print(count) y_pos = np.arange(len(category)) print(y_pos) print(category) colors = np.random.rand(len(count)) # plt.barh() plt.barh(y_pos, count, align='center', alpha=0.4) plt.yticks(y_pos, category) for count, y_pos in zip(count, y_pos): plt.text(count, y_pos, count, horizontalalignment='center', verticalalignment='center', weight='bold') plt.ylim(+28.0, -1.0) plt.title(u'豆瓣电影250') plt.ylabel(u'电影分类') plt.subplots_adjust(bottom = 0.15) plt.xlabel(u'分类出现次数') plt.savefig('douban.png')
def plot_feature_importances_cancer(model): n_features = cancer.data.shape[1] plt.barh(range(n_features), model.feature_importances_, align='center') plt.yticks(np.arange(n_features), cancer.feature_names) plt.xlabel("특성 중요도") plt.ylabel("특성") plt.ylim(-1, n_features)
def get_feature_importance_figure(estimator, feature_names): fig, ax = plt.subplots(figsize=(12, 8)) y_pos = range(len(feature_names)) plt.barh(y_pos, estimator.feature_importances_) ax.set_yticks(y_pos) ax.set_yticklabels(feature_names, fontsize=14) return fig
def PlotFeaturesImportance(X,y,featureNames,dataName): ''' Plot the relative contribution/importance of the features. Best to reduce to top X features first - for interpretability Code example from: http://bugra.github.io/work/notes/2014-11-22/an-introduction-to-supervised-learning-scikit-learn/ ''' gbc = GradientBoostingClassifier(n_estimators=40) gbc.fit(X, y) # Get Feature Importance from the classifier feature_importance = gbc.feature_importances_ # Normalize The Features feature_importance = 100 * (feature_importance / feature_importance.max()) sorted_idx = numpy.argsort(feature_importance) pos = numpy.arange(sorted_idx.shape[0]) + 4.5 # pos = numpy.arange(sorted_idx.shape[0]) # plt.figure(figsize=(16, 12)) plt.figure(figsize=(14, 9), dpi=250) plt.barh(pos, feature_importance[sorted_idx], align='center', color='#7A68A6') #plt.yticks(pos, numpy.asanyarray(df.columns.tolist())[sorted_idx]) #ORIG plt.yticks(pos, numpy.asanyarray(featureNames)[sorted_idx]) plt.xlabel('Relative Importance') plt.title('%s: Top Features' %(dataName)) plt.grid('off') plt.ion() plt.show() plt.savefig(str(dataName)+'TopFeatures.png',dpi=200)
def graph_combine_plot(combined_dictionary, folder_path, figure_size, color_code, sensitive_features, figure_name, font, title_font, target): feature_names = combined_dictionary.keys() feature_values = combined_dictionary.values() feature_error_values = [] feature_importance = [] for tp in feature_values: feature_importance.append(tp[0]) feature_error_values.append(tp[1]) #feature_importance = 100.0 * (np.array(feature_importance) / np.array(feature_importance).max()) #print feature_importance sorted_idx = np.argsort(feature_importance) final_column_list = [] truncated_feature_importance = [] final_error_values = [] for i in xrange(len(feature_importance)): cur_feature_name = feature_names[sorted_idx[i]] cur_feature_name = cur_feature_name[0].upper() + cur_feature_name[1:] final_column_list.append(cur_feature_name) truncated_feature_importance.append(feature_importance[sorted_idx[i]]) final_error_values.append(feature_error_values[sorted_idx[i]]) rcParams['figure.figsize'] = figure_size[0], figure_size[1] y_pos = np.arange(len(final_column_list))+ 0.5 color_format = [] for i in range(len(final_column_list)): c_name = final_column_list[i].lower() if c_name in sensitive_features: print feature_names[i] color_format.append('grey') else: color_format.append(color_code) #plt.barh(y_pos, truncated_feature_importance, align='center', color = color_format, xerr=final_error_values, ecolor='k') plt.barh(y_pos, truncated_feature_importance, align='center', color = color_format) plt.yticks(y_pos, final_column_list, **font) plt.xlabel('Combined Attribute Ranking', fontdict=font) plt.title('Combined Feature Importance \n across all Methodologies', fontdict=title_font) max_x = np.max(np.array(truncated_feature_importance)) + np.max(final_error_values) plt.xlim([-1,max_x]) #axes = plt.gca() #axes.set_xlim([min_x - 2,105]) plt.savefig(folder_path + "/" + "{0}.pdf".format(figure_name), bbox_inches = 'tight') plt.clf() return "plotted"
def plot(results, total_a, total_b, label_a, label_b, outputFile=None): all_rules = sorted(results, key=lambda v: (-len(v['item']), round(abs(v['count_a'] / total_a - v['count_b'] / total_b), 2), round(v['count_a'] / total_a, 2))) values_a = [100 * rule['count_a'] / total_a for rule in all_rules] values_b = [100 * rule['count_b'] / total_b for rule in all_rules] plt.rc('figure', autolayout=True) plt.rc('font', size=22) fig, ax = plt.subplots(figsize=(24, 18)) index = range(len(all_rules)) bar_width = 0.35 if label_a.startswith('_'): label_a = ' ' + label_a if label_b.startswith('_'): label_b = ' ' + label_b bar_a = plt.barh(index, values_a, bar_width, color='b', label=label_a) bar_b = plt.barh([i + bar_width for i in index], values_b, bar_width, color='r', label=label_b) plt.xlabel('Support') plt.ylabel('Rule') plt.title('Most interesting deviations') plt.yticks([i + bar_width for i in index], [rule_to_str(rule['item']) for rule in all_rules]) if len(all_rules) > 0: plt.legend(handles=[bar_b, bar_a], loc='best') if outputFile is not None: plt.savefig(outputFile) else: plt.show() plt.close(fig)
def plot_silhouette(km, filename='tmp.png', fast=False): """ Saves a silhuette plot to filename, showing the distributions of silhouette scores in clusters. kmeans is a k-means clustering object. If fast is True use score_fast_silhouette to compute scores instead of score_silhouette. :param km: a k-means clustering object. :type km: :class:`KMeans` :param filename: name of output plot. :type filename: string :param fast: if True use :func:`score_fast_silhouette` to compute scores instead of :func:`score_silhouette` :type fast: boolean. """ import matplotlib.pyplot as plt plt.figure() scoring = score_fast_silhouette if fast else score_silhouette scores = [[] for i in range(km.k)] for i, c in enumerate(km.clusters): scores[c].append(scoring(km, i)) csizes = map(len, scores) cpositions = [sum(csizes[:i]) + (i+1)*3 + csizes[i]/2 for i in range(km.k)] scores = reduce(lambda x,y: x + [0]*3 + sorted(y), scores, []) plt.barh(range(len(scores)), scores, linewidth=0, color='c') plt.yticks(cpositions, map(str, range(km.k))) #plt.title('Silhouette plot') plt.ylabel('Cluster') plt.xlabel('Silhouette value') plt.savefig(filename)
def myplot_hbar_group(df, cols, colors=None, legend_suffix=None, ylabel=None, xlabel=None ,show=False, exp_prefix=None): import numpy as np plt.figure() index = np.arange(df.shape[0]) if df.shape[0] <= 5: bar_width = 0.30 else: bar_width = 0.30 for col_ix, col in enumerate(cols): plt.barh(index + bar_width * col_ix, df[col] ,bar_width, color=colors[col_ix], label=col + legend_suffix[col_ix]) plt.ylabel(ylabel) plt.xlabel(xlabel) plt.yticks(index + bar_width, list(df.index), fontsize=4) plt.legend(loc='best', shadow=True, fontsize='xx-small') plt.title(list(df[-1:].index)[0], fontsize='x-small') if show: sys.stderr.write("*** displaying a plot...\n") plt.show() if exp_prefix is not None: exp_filename = exp_prefix + "hbar_grp" + '.png' print " exporting plot:{0} ...".format(exp_filename) plt.savefig(exp_filename, dpi=200)
def basic_training(clf, x_train, x_test, y_train, y_test, plot_importance=False): print '----------------------' print 'Basic training' print clf start = time() clf.fit(x_train, y_train) y_pred = clf.predict(x_test) print "RMSE: {}".format(performance_metric(y_test, y_pred)) end = time() print "Trained model in {:.4f} seconds".format(end - start) # plot feature importance if plot_importance: importance = clf.feature_importances_ importance = 100.0 * (importance / importance.max()) sorted_idx = np.argsort(importance) pos = np.arange(sorted_idx.shape[0]) + .5 plt.figure() plt.barh(pos, importance[sorted_idx], align='center') plt.yticks(pos, x_train.columns[sorted_idx]) plt.xlabel('Relative Importance') plt.title('Variable Importance') plt.show() print 'Done basic training!' return y_pred
def plot_feature_importance(estimator, columns, n=50): """ Plots feature importance Parameters ---------- estimator : estimator for which the important features needs to be drawn columns : names of all features n : int, number of important features to plot Return ------ Plot of features importance """ global CURR_FIGURE # extract feature importance and normalize them to sum up to 100 feature_importance = estimator.feature_importances_ feature_importance = (100.0 * feature_importance) / sum(feature_importance) index = np.argsort(feature_importance)[::-1][0:n] # feature names feature_names = columns # plot plt.figure(CURR_FIGURE) pos = (np.arange(n) + .5)[::-1] plt.barh(pos, feature_importance[index], align='center') plt.yticks(pos, feature_names[index]) plt.xlabel('Relative Importance') plt.title(str(n) + ' Most Important Features') CURR_FIGURE += 1
def model_metrics(classifiers, var_names): print 'Gini Importances:' importances = np.zeros(shape=(len(classifiers), len(var_names))) importances_std = np.zeros(shape=(len(classifiers), len(var_names))) for i, classifier in enumerate(classifiers): importances[i, :] = classifier.feature_importances_ importances_std[i, :] = np.std([tree.feature_importances_ for tree in classifier.estimators_], axis=0) mean_importances = np.mean(importances, axis=0) std_importances = np.mean(importances_std, axis=0) feats = zip(var_names, mean_importances, std_importances) # Remove non-important feats: feats = [feat for feat in feats if feat[1] > 0.0] feats.sort(reverse=True, key=lambda x: x[1]) print tabulate(feats, headers=['Variable', 'Mean', 'Std']) feats.sort(reverse=False, key=lambda x: x[1]) # Plot the feature importances of the classifier plt.figure() plt.title("Gini Importance") y_pos = np.arange(len(feats)) plt.barh(y_pos, width=zip(*feats)[1], height=0.5, color='r', xerr=zip(*feats)[2], align="center") plt.yticks(y_pos, zip(*feats)[0]) plt.show()
def plot_zrtt_treshold(data, output_path): threshold = 1 gateways, zrtts = [], [] for hop in data: ip, pais, zrtt = hop gateways.append(ip+"\n"+pais) zrtts.append(float(zrtt)) gateways.reverse() zrtts.reverse() fig = plt.figure() y_pos = np.arange(len(gateways)) plt.barh(y_pos, zrtts, align='center', alpha=0.4) plt.yticks(y_pos, gateways, horizontalalignment='right', fontsize=9) plt.title('ZRTTs para cada hop') plt.xlabel('ZRTT') plt.ylabel('Hop') # Line at y=0 plt.vlines(0, -1, len(gateways), alpha=0.4) # ZRTT threshold plt.vlines(threshold, -1, len(gateways), linestyle='--', color='b', alpha=0.4) plt.text(threshold, len(gateways) - 1, 'Umbral', rotation='vertical', verticalalignment='top', horizontalalignment='right') fig.set_size_inches(6, 9) plt.tight_layout() plt.savefig(output_path, dpi=1000, box_inches='tight')
def plot_unique_by_date(alignment_summaries, metadata): plt.figure(figsize=(8, 5.5)) df_meta = pd.DataFrame.from_csv(metadata) df_meta['Date Produced'] = pd.to_datetime(df_meta['Date Produced']) alndata = [] for summary in alignment_summaries: alndata.append(simpleseq.sam.get_alignment_metadata(summary)) unique = pd.Series(np.array([s['uniq_rate'] for s in alndata]), index=alignment_summaries) # plot unique alignments index = df_meta.index.intersection(unique.index) order = df_meta.loc[index].sort(columns='Date Produced', ascending=False).index left = np.arange(len(index)) height = unique.ix[order] width = 0.9 plt.barh(left, height, width) plt.yticks(left + 0.5, order, fontsize=10) ymin, ymax = 0, len(left) plt.ylim((ymin, ymax)) plt.xlabel('percentage') plt.title('comparative alignment summary') plt.ylabel('time (descending)') # plot klein in-drop line plt.vlines(unique['Klein_in_drop'], ymin, ymax, color='indianred', linestyles='--') sns.despine() plt.tight_layout()
def plot_freqs(freqs, n=30): # plot top n words and their frequencies from greatest to least if n > len(freqs): n = len(freqs) # sort in decreasing order words_sorted = sorted(freqs, key=freqs.get, reverse=True) freqs_sorted = [freqs[word] for word in words_sorted[:n]] # plot fig = plt.figure(figsize=(6,4)) beautify_plot(fig) plt.ylim(0,n) #plt.xlim(0,MAX_OF_FREQS) # Plot in horizontal bars in descending order bar_locs = np.arange(n, 0, -1) bar_width = 1.0 plt.barh(bar_locs, freqs_sorted, height=bar_width, align='center', color=t20[0], alpha=0.8, linewidth=0) # Label each bar with its word plt.yticks(range(n-1,-1,-1), words_sorted) plt.xlabel('Word Frequency (per billlion)') plt.title('Top ' + str(n) + ' words used in Billboard 100 Songs') plt.show()
def lookAtVoltages(voltagetraces,startRec,plotTime): voltage_means=zeros(len(voltagetraces)) for n in range(len(voltagetraces)): voltage_means[n]=mean(voltagetraces[n]) print 'mean voltages (mean,std.dev):' meanV=mean(voltage_means) stdDev=sqrt(var(voltage_means)) print meanV,stdDev fig=plt.figure() ax1=fig.add_axes([.15,.1,.7,.8]) plotVolts=zeros([len(voltList),int(plotTime)]) for i in range(len(voltList)): plotVolts[i,:]=voltagetraces[i][0:int(plotTime)] plt.plot(range(int(startRec),int(plotTime)+int(startRec)),voltagetraces[i][0:int(plotTime)],color='0.75',label=str(voltList[i])) plt.plot(range(int(startRec),int(plotTime)+int(startRec)),sum(plotVolts,0)/len(voltList),'r',linewidth=3) plt.ylabel("rate [Hz]") plt.xlabel("time [ms]") bins=arange(plotVolts.min(),plotVolts.max(),(plotVolts.max()-plotVolts.min())/50) hist=zeros(len(bins)-1) for n in range(int(plotTime)): hist=hist+histogram(plotVolts[:,n], bins, new=True, normed=False)[0] plt.plot(range(int(startRec),int(plotTime)+int(startRec)),zeros(int(plotTime)),'k:',linewidth=3) ax2=fig.add_axes([.85,.1,.1,.8]) ax2.set_axis_off() plt.barh(bins[:-1],hist[:],height=(bins[1]-bins[0]),edgecolor='b') ax1.set_ylim(plotVolts.min(),plotVolts.max()) ax2.set_ylim(ax1.get_ylim()) return fig,meanV,stdDev
def plot_feature_importance(regressor, params, X_test, y_test): test_score = np.zeros((params['n_estimators'],), dtype = np.float64) for i, y_pred in enumerate(regressor.staged_predict(X_test)): test_score[i] = regressor.loss_(y_test, y_pred) plt.figure(figsize = (12, 6)) plt.subplot(1, 2, 1) plt.title('MAE Prediction vs. Actual (USD) ') plt.plot(np.arange(params['n_estimators']) + 1, regressor.train_score_, 'b-', label = 'Training set Deviance') plt.plot(np.arange(params['n_estimators']) + 1, test_score, 'r-', label = 'Test set deviance') plt.legend(loc='upper right') plt.xlabel('Boosting Iterations') plt.ylabel('Mean absolute error') #plot feature importance feature_importance = regressor.feature_importances_ #normalize feature_importance = 100.0 * (feature_importance / feature_importance.max()) sorted_idx = np.argsort(feature_importance) pos = np.arange(sorted_idx.shape[0]) + .5 plt.subplot(1, 2, 2) plt.barh(pos, feature_importance[sorted_idx], align='center') feature_names = np.array(feature_cols) plt.yticks(pos, feature_names[sorted_idx]) plt.xlabel('Relative importance') plt.title('Variable Importance') plt.show()
def bii_hbar(group,code,in_data): [trust,res,div,bel,collab,resall,comfort,iz,score] = in_data plt.figure() if len(code) == 2 and not isinstance(code, basestring): code = code[0] + " " + code[1] val = [mean(trust),mean(res),mean(div),mean(bel),mean(collab),mean(resall),mean(comfort),mean(iz)][-1::-1] pos = arange(8) # the bar centers on the y axis plt.plot((mean(score), mean(score)), (-1, 8), 'g',label='Average',linewidth=3) #plt.barh(pos,val, xerr=err, ecolor='r', align='center',label='Score') plt.barh(pos,val, align='center', label='Score') if group: err = [std(trust),std(res),std(div),std(bel),std(collab),std(resall),std(comfort),std(iz)][-1::-1] plt.errorbar(val,pos, xerr=err, label="St Dev", color='r',fmt='o') lgd = plt.legend(loc='upper center', shadow=True, fontsize='x-large',bbox_to_anchor=(1.1, 1.1),borderaxespad=0.) plt.yticks(pos, (('Tru', 'Res', 'Div', 'Ment Str','Collab', 'Res All', 'Com Zone', 'In Zone'))[-1::-1]) plt.xlabel('Score') plt.title('Results for ' + code, fontweight='bold', y=1.01) plt.xlabel(r'$\mathrm{Total \ Innovation \ Index \ Score:}\ %.3f$' %(mean(score)),fontsize='18') axes = plt.gca() axes.set_xlim([0,10]) # plt.legend((score_all,score_mean), ('Score','Mean'),bbox_to_anchor=(1.3, 1.3),borderaxespad=0.) file_name = "hbar" path_name = "static/%s" %file_name #path_name = "/Users/johanenglarsson/bii/mod/static/%s" %file_name plt.savefig(path_name, bbox_extra_artists=(lgd,), bbox_inches='tight')
def barh_plot(): """ barh plot """ # 生成测试数据 means_men = (20, 35, 30, 35, 27) means_women = (25, 32, 34, 20, 25) # 设置标题 plt.title("横向柱状图", fontproperties=myfont) # 设置相关参数 index = np.arange(len(means_men)) bar_height = 0.35 # 画柱状图(水平方向) plt.barh(index, means_men, height=bar_height, alpha=0.2, color="b", label="Men") plt.barh(index+bar_height, means_women, height=bar_height, alpha=0.8, color="r", label="Women") plt.legend(loc="upper right", shadow=True) # 设置柱状图标示 for x, y in zip(index, means_men): plt.text(y+0.3, x, y, ha="left", va="center") for x, y in zip(index, means_women): plt.text(y+0.3, x+bar_height, y, ha="left", va="center") # 设置刻度范围/坐标轴名称等 plt.xlim(0, 45) plt.xlabel("Scores") plt.ylabel("Group") plt.yticks(index+(bar_height/2), ("A", "B", "C", "D", "E")) # 图形显示 plt.show() return
def visualize_silhouette_score(X,y_km): cluster_labels = np.unique(y_km) n_clusters = cluster_labels.shape[0] silhouette_vals = metrics.silhouette_samples(X, y_km, metric='euclidean') y_ax_lower, y_ax_upper = 0, 0 yticks = [] for i, c in enumerate(cluster_labels): c_silhouette_vals = silhouette_vals[y_km == c] c_silhouette_vals.sort() y_ax_upper += len(c_silhouette_vals) color = cm.jet(i / n_clusters) plt.barh(range(y_ax_lower, y_ax_upper), c_silhouette_vals, height=1.0, edgecolor='none', color=color) yticks.append((y_ax_lower + y_ax_upper) / 2) y_ax_lower += len(c_silhouette_vals) silhouette_avg = np.mean(silhouette_vals) plt.axvline(silhouette_avg, color="red", linestyle="--") plt.yticks(yticks, cluster_labels + 1) plt.ylabel('Cluster') plt.xlabel('Silhouette coefficient') plt.show()
'Inclination', 'Asc Node Longitude', 'Perihelion Distance', 'Perihelion Arg', 'Perihelion Time', 'Mean Anomaly' ]] # Using Random Forest Feature importance to select the most important features from sklearn.ensemble import RandomForestRegressor model = RandomForestRegressor(random_state=1, max_depth=10) model.fit(X, y) features = X.columns importances = model.feature_importances_ indices = np.argsort(importances)[-1:-4:-1] plt.title('Feature Importances') plt.barh(range(len(indices)), importances[indices], color='b', align='center') plt.yticks(range(len(indices)), [features[i] for i in indices]) plt.xlabel('Relative Importance') plt.show() # Well we can clearly see in the feature importance graph that there are just three # variables which contributes by more than 96% to the target then all the other # variables, the other variables contribute with less than 1%, so we will just # keep only those three variables: # (Minimum Orbit Intersection, Est Dia in M(average) and Absolute Magnitude) # Plotting the Data since we have only three features X = X.iloc[:, indices] def plotData2D(X=X, y_=y):
plt.savefig('mytable.png') # INSPECTION ====================================================================================== pylab.rcParams['figure.figsize'] = (14.5, 6.0) crimes_rating = crimeData['Category'].value_counts() print('San Francisco Crimes\n') print('Category\t\tNumber of occurences') print(crimes_rating) top = 18 y_pos = np.arange(len(crimes_rating[0:top].keys())) plt.barh(y_pos, crimes_rating[0:top].get_values(), align='center', alpha=0.4, color='blue') plt.yticks(y_pos, [x.title() for x in crimes_rating[0:top].keys()], fontsize=11) plt.xlabel('Number of occurences', fontsize=14) plt.title('San Francisco Crime Classification', fontsize=26) plt.ticklabel_format(style='sci', axis='x', scilimits=(0, 0)) plt.savefig("crimes_occurences.png") """ Add new features to the dataset: Weekday (Monday, Tuesday, ...) Hour of day Month Year Day of month
df.set_index(['所在区域'], inplace=True) # 根据所在区域做聚集操作 groupby_obj = df.groupby("所在区域") # 根据所在区域算出总价的平均值 total_list = groupby_obj.mean()["总价"] print(total_list) # 取出索引作为标签 total_index_list = total_list.index print(total_list, total_index_list) # 设置刻度 y_ticks = range(len(total_index_list)) x_ticks = range(0, 260, 10) # 打开画布 plt.figure(figsize=(20, 9), dpi=80) # 设置柱子 plt.barh(y=y_ticks, width=total_list, height=0.2, color='orange') # 设置y轴刻度 plt.yticks(ticks=y_ticks, labels=total_index_list) # 设置x轴刻度 plt.xticks(ticks=x_ticks) # 增加标签 plt.xlabel("区域平均价格(单位:万元)") plt.ylabel("区域名称") plt.title("成都2020年2月份各区平均房价一览图") # 打开网格 plt.grid() # 保存图片 plt.savefig(to_path) # 展示图片
# Generating Feature importance data for Random Forest model built importances = trained_model.feature_importances_ indices = np.argsort(importances)[::-1] trainlabels = list(train_x.columns.drop('msno', 1)) importanceList = np.array((importances)).tolist() featureList = {} for i in range(len(trainlabels)): featureList[trainlabels[i]] = importanceList[i] # Getting top ten features as per feature importance generated above vallist = featureList.values() vallist.sort() import operator sorted_d = sorted(featureList.items(), key=operator.itemgetter(1), reverse=True) # Plotting Feature importance data for Random Forest model built plt.figure() plt.rcParams['figure.figsize'] = 17, 12 plt.title("Feature importances") plt.yticks(range(train_x.shape[1] - 1), train_x.columns.drop('msno', 1)) plt.barh(indices, importances[indices], color="b", align="center") plt.xlim([ -1, ]) plt.xlabel('Features importance score') plt.show()
"智取威虎山", "大闹天竺", "金刚狼3:殊死一战", "蜘蛛侠:英雄归来", "悟空传", "银河护卫队2", "情圣", "新木乃伊", ] ## 票房前20的电影数据(即y轴数据),单位:亿 y = [ 56.01, 26.94, 17.53, 16.49, 15.45, 12.96, 11.8, 11.61, 11.28, 11.12, 10.49, 10.3, 8.75, 7.55, 7.32, 6.99, 6.88, 6.86, 6.58, 6.23 ] ## 设置图形大小与清晰度 pyplot.figure(figsize=(20, 8), dpi=80) ## 绘制条形图(横状) # 注: 因为不能使用字符串,所以使用可迭代的数字数据 # 参数height设置条形高度,参数color设置条形颜色 pyplot.barh(range(len(x)), y, height=0.3, color='orange') ## 设置y轴刻度间距,字符串,中文显示,显示旋转角度 pyplot.yticks(range(len(x)), x) ## 设置网格,alpha设置网格线透明度 pyplot.grid(alpha=0.3) ## 展示条形图 pyplot.show()
# Seems like a a numeric variable so we will change it to float fractions: loan_stats_data['revol_util'] = loan_stats_data['revol_util'].str.rstrip( '%').astype('float') / 100 print('zip_code:') print(loan_stats_data.zip_code.unique()) print("Too many categories. We won't use this variable in further analysis.") # Find numeric variables: numeric_data = loan_stats_data._get_numeric_data() # Find correlations between each column to the loan_status_bool: r = numeric_data.corr(method="pearson") plt.figure(figsize=(20, 10)) plt.barh(r.columns[:-1], r.loan_status_bool[:-1]) plt.axvline(color='black') plt.xlim(-0.4, 0.4) plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.ylabel('Parameter', fontsize=25) plt.xlabel('Correlation coefficient (r)', fontsize=25) plt.title('Correlations between all numerical\nparameters and loan status', fontsize=30) plt.show() # Choose only numeric parametirs with an absolute correlation greater than 0.1: r = r.loan_status_bool[:-1] corr_params = r.values is_corr = np.where(np.abs(corr_params) > 0.1) selected_numeric_params = numeric_data[r.index[is_corr]]
def traffic_plot_bar(X, column): a = X[column].value_counts() objects = tuple(a.index) y_pos = np.arange(len(objects)) plt.barh(y_pos, a, tick_label=objects) plt.show()
import numpy as np import matplotlib.pyplot as plt import pandas as pd import pyodbc as db import matplotlib.pyplot as plt #Create connection string to connect DBTest database with windows authentication con = db.connect( 'DRIVER={ODBC Driver 13 for SQL Server};SERVER=.;Trusted_Connection=yes;DATABASE=db_final' ) cursor = con.cursor() cursor.execute( "SELECT distinct [Fakultas], YEAR(tanggal), COUNT(mahasiswa) FROM [db_final].[dbo].[MHS] GROUP BY [Fakultas],YEAR(tanggal) ORDER BY COUNT(mahasiswa) DESC" ) rows = cursor.fetchall() df = pd.DataFrame([[ij for ij in i] for i in rows]) df.rename(columns={0: 'Fakultas', 1: 'Tahun', 2: 'Jumlah'}, inplace=True) y = df['Jumlah'] x = df['Fakultas'] plt.barh(x, y) plt.show() df
print('#', 50 * "-") # ----------------------- data = np.random.rand(1024, 2) plt.scatter(data[:, 0], data[:, 1]) plt.show() print('#', 50 * "-") # ----------------------- data = [5, 10, 30, 8] plt.bar(range(len(data)), data) plt.show() print('#', 50 * "-") # ----------------------- plt.bar(range(len(data)), data, width=1.) plt.show() plt.barh(range(len(data)), data) plt.show() print('#', 50 * "-") # ----------------------- data = np.random.rand(3, 4) x8 = np.arange(4) plt.bar(x8 + 0.00, data[0], color='b', width=0.25) plt.bar(x8 + 0.25, data[1], color='g', width=0.25) plt.bar(x8 + 0.50, data[2], color='r', width=0.25) plt.show() print('#', 50 * "-") # ----------------------- a = np.random.rand(4) b = np.random.rand(4) x9 = np.arange(4) plt.bar(x9, a, color='b')
plt.style.use('fivethirtyeight') plt.bar(lang, popularity) plt.title('Popularity of Languages') plt.xlabel('languages') plt.ylabel('Popularity') plt.tight_layout() plt.show() # In[37]: # horizontal bar using barh plt.style.use('fivethirtyeight') # use reverse function lang.reverse() popularity.reverse() plt.barh(lang, popularity) plt.title('Popularity of Languages') #plt.xlabel('languages') plt.xlabel('Popularity') plt.tight_layout() plt.show() # In[ ]:
def anual_comparison(personas=0, graphs=True): yeardata = gen_yeardata(personas, graphs) for b in range(1, 3): for a in range(1, 3): if b == 2 and a == 1: continue key = {1: 'DOY', 2: 'MES'} # parametro a key2 = {1: 'Econs [kWh]', 2: '$'} # parametro b tit = { 1: 'Calefón A ', 2: 'Bomba de calor ', 3: 'Gas con Alm. ', 4: 'Gas Inst. ' } if b == 1: es_base = calefon(yeardata, 'B').groupby( [key[a]])[key2[b]].sum().reset_index() es_caleA = calefon(yeardata, 'A').groupby( [key[a]])[key2[b]].sum().reset_index() es_caleA_col = calefon(yeardata, 'A', True).groupby( [key[a]])[key2[b]].sum().reset_index() es_bba = bbacalor(yeardata).groupby( [key[a]])[key2[b]].sum().reset_index() es_bba_col = bbacalor(yeardata, True).groupby( [key[a]])[key2[b]].sum().reset_index() es_gasal = gas_almacen(yeardata).groupby( [key[a]])[key2[b]].sum().reset_index() es_gasal_col = gas_almacen(yeardata, True).groupby( [key[a]])[key2[b]].sum().reset_index() es_gasin = gas_inst(yeardata).groupby( [key[a]])[key2[b]].sum().reset_index() es_gasin_col = gas_inst(yeardata, True).groupby( [key[a]])[key2[b]].sum().reset_index() esc = {1: es_caleA, 2: es_bba, 3: es_gasal, 4: es_gasin} esc3 = { 1: es_caleA_col, 2: es_bba_col, 3: es_gasal_col, 4: es_gasin_col } labels = {1: '', 2: '', 3: '', 4: ''} ylab = 'Consumo kWh/día' if a == 2: ylab = 'Consumo kWh/mes' totalE = { 'Esc. Base': es_base[key2[b]].sum(), tit[1]: esc[1][key2[b]].sum(), tit[2]: esc[2][key2[b]].sum(), tit[3]: esc[3][key2[b]].sum(), tit[4]: esc[4][key2[b]].sum(), tit[1] + ' con Col.': esc3[1][key2[b]].sum(), tit[2] + ' con Col.': esc3[2][key2[b]].sum(), tit[3] + ' con Col.': esc3[3][key2[b]].sum(), tit[4] + ' con Col.': esc3[4][key2[b]].sum() } else: ylab = 'Consumo $/mes' tar = {1: ressimple, 2: resdoble, 3: gasres, 4: supergas} es_base = ressimple(calefon(yeardata, 'B')) es_caleA_rs = tar[1](calefon(yeardata, 'A')) es_caleA_rs_col = tar[1](calefon(yeardata, 'A', True)) es_caleA_dh = tar[2](calefon(yeardata, 'A')) es_caleA_dh_col = tar[2](calefon(yeardata, 'A', True)) es_bba_rs = tar[1](bbacalor(yeardata)) es_bba_rs_col = tar[1](bbacalor(yeardata, True)) es_bba_dh = tar[2](bbacalor(yeardata)) es_bba_dh_col = tar[2](bbacalor(yeardata, True)) es_gasal_gn = tar[3](gas_almacen(yeardata)) es_gasal_gn_col = tar[3](gas_almacen(yeardata, True)) es_gasal_glp = tar[4](gas_almacen(yeardata)) es_gasal_glp_col = tar[4](gas_almacen(yeardata, True)) es_gasin_gn = tar[3](gas_inst(yeardata)) es_gasin_gn_col = tar[3](gas_inst(yeardata, True)) es_gasin_glp = tar[4](gas_inst(yeardata)) es_gasin_glp_col = tar[4](gas_inst(yeardata, True)) esc = { 1: es_caleA_rs, 2: es_bba_rs, 3: es_gasal_gn, 4: es_gasin_gn } esc2 = { 1: es_caleA_dh, 2: es_bba_dh, 3: es_gasal_glp, 4: es_gasin_glp } esc3 = { 1: es_caleA_rs_col, 2: es_bba_rs_col, 3: es_gasal_gn_col, 4: es_gasin_gn_col } esc4 = { 1: es_caleA_dh_col, 2: es_bba_dh_col, 3: es_gasal_glp_col, 4: es_gasin_glp_col } labels = { 1: 'Res. Simp.', 2: 'Res. Simp.', 3: 'Gas Nat.', 4: 'Gas Nat.' } labels2 = { 1: 'Doble Hor.', 2: 'Doble Hor.', 3: 'GLP', 4: 'GLP' } totalP = { 'Esc. Base': es_base[key2[b]].sum(), tit[1] + labels[1]: esc[1][key2[b]].sum(), tit[2] + labels[2]: esc[2][key2[b]].sum(), tit[3] + labels[3]: esc[3][key2[b]].sum(), tit[4] + labels[4]: esc[4][key2[b]].sum(), tit[1] + labels2[1]: esc2[1][key2[b]].sum(), tit[2] + labels2[2]: esc2[2][key2[b]].sum(), tit[3] + labels2[3]: esc2[3][key2[b]].sum(), tit[4] + labels2[4]: esc2[4][key2[b]].sum(), tit[1] + 'con colect ' + labels[1]: esc3[1][key2[b]].sum(), tit[2] + 'con colect ' + labels[2]: esc3[2][key2[b]].sum(), tit[3] + 'con colect ' + labels[3]: esc3[3][key2[b]].sum(), tit[4] + 'con colect ' + labels[4]: esc3[4][key2[b]].sum(), tit[1] + 'con colect ' + labels2[1]: esc4[1][key2[b]].sum(), tit[2] + 'con colect ' + labels2[2]: esc4[2][key2[b]].sum(), tit[3] + 'con colect ' + labels2[3]: esc4[3][key2[b]].sum(), tit[4] + 'con colect ' + labels2[4]: esc4[4][key2[b]].sum() } if graphs: plt.figure() style.use('fivethirtyeight') maxy = int(round(es_base[key2[b]].max() * 1.3)) y = [i for i in range(0, maxy, round(maxy / 5))] ax1 = plt.subplot(4, 1, 1) ax2 = plt.subplot(4, 1, 2) ax3 = plt.subplot(4, 1, 3) ax4 = plt.subplot(4, 1, 4) ax = {1: ax1, 2: ax2, 3: ax3, 4: ax4} for i in range(1, 5): if i == 1: ax[i] = plt.subplot(4, 1, 1) else: ax[i] = plt.subplot(4, 1, i, sharex=ax1) plt.plot(es_base[key[a]], es_base[key2[b]], label='Esc. Base', color='r', linewidth=1.5) plt.plot(esc[i][key[a]], esc[i][key2[b]], label=labels[i] + ' sin colector', color='b', linewidth=1.5) plt.plot(esc3[i][key[a]], esc3[i][key2[b]], label=labels[i] + ' con colector', color='tab:orange', linewidth=1.5) if b == 2: plt.plot(esc2[i][key[a]], esc2[i][key2[b]], label=labels2[i] + ' sin colector', color='m', linewidth=1.5) plt.plot(esc4[i][key[a]], esc4[i][key2[b]], label=labels2[i] + ' con colector', color='tab:gray', linewidth=1.5) plt.fill_between( es_base[key[a]], es_base[key2[b]], esc2[i][key2[b]], where=(es_base[key2[b]] > esc2[i][key2[b]]), color='g', alpha=0.2) plt.fill_between( es_base[key[a]], esc2[i][key2[b]], es_base[key2[b]], where=(es_base[key2[b]] < esc2[i][key2[b]]), color='y', alpha=0.2) plt.fill_between( es_base[key[a]], es_base[key2[b]], esc[i][key2[b]], where=(es_base[key2[b]] > esc[i][key2[b]]), color='g', alpha=0.2) plt.fill_between( es_base[key[a]], esc[i][key2[b]], es_base[key2[b]], where=(es_base[key2[b]] < esc[i][key2[b]]), color='y', alpha=0.2) if i == 3: plt.ylabel(ylab, fontsize=24) plt.yticks(y, fontsize=20) plt.ylim(-maxy * 0.03, maxy) plt.legend(fontsize=10, loc=2) plt.setp(ax[i].get_xticklabels(), visible=False) plt.grid(True) plt.title('Esc. ' + tit[i], fontsize=28, color='c') if i == 4: plt.setp(ax[i].get_xticklabels(), visible=True) x = [i for i in range(1, 13)] plt.xlabel("Mes del año", fontsize=24) if a == 1: x = [i for i in range(15, 366, 15)] x.insert(0, 1) plt.xlabel("día del año", fontsize=24) plt.xticks(x, fontsize=20) plt.subplots_adjust(hspace=0.15) ordenadaE = pd.DataFrame(totalE, index=['kWh/año' ]).transpose().sort_values(by='kWh/año', ascending=False) ordenadaP = pd.DataFrame(totalP, index=['$/año' ]).transpose().sort_values(by='$/año', ascending=False) if graphs: plt.figure() ax = plt.subplot(1, 1, 1) plt.title('Consumo calentamiento de agua caliente anual', fontsize=28, color='c') plt.barh(range(len(ordenadaE)), ordenadaE.iloc[:, 0], align='center') plt.yticks(range(len(ordenadaE)), ordenadaE.index, fontsize=20) plt.xlabel('kWh / año', fontsize=24) plt.xticks(fontsize=20) ax.set_axisbelow(True) plt.grid(False, axis='y') porc = [ int((i / ordenadaE.at['Esc. Base', 'kWh/año']) * 100) for i in ordenadaE.values ] labels = [str(i) + '% del Esc. Base' for i in porc] rects = ax.patches # esp = 0.3 *rects[-1].get_width() for rect, label in zip(rects, labels): width = rect.get_width() ax.text(width * 0.8, rect.get_y(), label, ha='center', va='bottom', fontsize=20, color='r', weight='bold') # rect.get_y() + rect.get_height()/2 plt.figure() ax = plt.subplot(1, 1, 1) plt.title('Costo calentamiento de agua caliente anual', fontsize=28, color='c') plt.barh(range(len(ordenadaP)), ordenadaP.iloc[:, 0], align='center') plt.yticks(range(len(ordenadaP)), ordenadaP.index, fontsize=20) plt.xlabel('miles de $ / año', fontsize=24) plt.xticks(fontsize=20) ax.set_axisbelow(True) plt.grid(False, axis='y') porc = [ int((i / ordenadaP.at['Esc. Base', '$/año']) * 100) for i in ordenadaP.values ] labels = [str(i) + '% del Esc. Base' for i in porc] rects = ax.patches # esp = 0.35 * rects[-1].get_width() for rect, label in zip(rects, labels): width = rect.get_width() ax.text(width * 0.9, rect.get_y(), label, ha='center', va='bottom', fontsize=18, color='r', weight='bold') scale_x = 1000 ticks_x = ticker.FuncFormatter( lambda x, pos: '{0:g}'.format(x / scale_x)) ax.xaxis.set_major_formatter(ticks_x) plt.subplots_adjust(left=0.18) ordenadaP.index.name = 'Sistema' ordenadaE.index.name = 'Sistema' ordenadaP = ordenadaP.round(0) ordenadaE = ordenadaE.round(0) hoy = dt.date.today().strftime("%d-%m-%y") ordenadaE.to_csv(r'generated_csvs\Consumo anual Energia ' + hoy + '.csv') ordenadaP.to_csv(r'generated_csvs\Costo anual ' + hoy + '.csv') return [ordenadaE, ordenadaP]
#print(f"{type(flat_info['id'])}, {type(flat_info['rooms'])}, {type(flat_info['type'])}, {type(flat_info['price'])}") subway_dict[subway].append(flat_info) #print(subway_dict.keys()) # TODO 2: подсчитайте и выведите на печать количество новостроек, расположенных рядом с каждым из метро. Используйте вариант прохода по словарю, который вам больше нравится #list(subway_dict) #print(type(subway_dict)) Chart = dict() for k, v in subway_dict.items(): # print(v) col_novostroy = 0 for v1 in v: if v1["type"] == "новостройка": #print(v1) col_novostroy += 1 print(f"{k} - {col_novostroy}") Chart.update({k: col_novostroy}) plt.barh(range(len(Chart)), list(Chart.values()), align='center') plt.yticks(range(len(Chart)), list(Chart.keys())) plt.xlabel('Кол-во новостроек рядом с метро') plt.title('Анализ расположения новостроек относительно станций метро') plt.show() # In[ ]:
from matplotlib import pyplot as plt import csv from collections import Counter plt.style.use('fivethirtyeight') with open('data.csv') as file: csv_reader = csv.DictReader(file) # generates a dictionary iterator lc = Counter() # row = next(csv_reader) # gives next element of the iterator for row in csv_reader: lc.update(row['LanguagesWorkedWith'].split(';')) lang = [] fcount = [] for data in lc.most_common(15): lang.append(data[0]) fcount.append(data[1]) plt.barh(lang, fcount) plt.title("Most popular languages") # plt.xlabel('Programming Language') plt.xlabel('Frequency') plt.tight_layout() plt.show()
random_state=0) y_km = km.fit_predict(X) cluster_labels = np.unique(y_km) n_clusters = cluster_labels.shape[0] silhouette_vals = silhouette_samples(X, y_km, metric='euclidean') y_ax_lower, y_ax_upper = 0, 0 yticks = [] for i, c in enumerate(cluster_labels): c_silhouette_vals = silhouette_vals[y_km == c] c_silhouette_vals.sort() y_ax_upper += len(c_silhouette_vals) color = cm.jet(float(i) / n_clusters) plt.barh(range(y_ax_lower, y_ax_upper), c_silhouette_vals, height=1.0, edgecolor='none', color=color) yticks.append((y_ax_lower + y_ax_upper) / 2.) y_ax_lower += len(c_silhouette_vals) silhouette_avg = np.mean(silhouette_vals) plt.axvline(silhouette_avg, color="red", linestyle="--") plt.yticks(yticks, cluster_labels + 1) plt.ylabel('Cluster') plt.xlabel('Silhouette coefficient') plt.tight_layout() #plt.savefig('images/11_04.png', dpi=300)
fast_mimic=True, random_state=RANDOM_STATE) t_aft = time.time() clock_time[3] = t_aft - t_bef MIMIC_fitness_curve = best_fitness_curve # print("----------------------------------") # print("MIMIC") # print(best_state) # print(best_fitness) # print(fitness_curve) # print("----------------------------------") # Clock time different algorithms plt.figure() plt.barh(algorithms, clock_time, align='center') plt.title("Randomized Optimization", fontsize=16, fontweight='bold') plt.suptitle("Knapsack (50 samples)", fontsize=10) plt.ylabel('Algorithm') plt.xlabel('Time (seconds)') plt.savefig('knapsack_time50.png', bbox_inches="tight") # Fitness curve for different algorithms plt.figure() temp = max(len(RHC_fitness_curve), len(SA_fitness_curve), len(GA_fitness_curve), len(MIMIC_fitness_curve)) x_1 = np.linspace(1, temp, len(RHC_fitness_curve)) x_2 = np.linspace(1, temp, len(SA_fitness_curve)) x_3 = np.linspace(1, temp, len(GA_fitness_curve)) x_4 = np.linspace(1, temp, len(MIMIC_fitness_curve)) y_1 = RHC_fitness_curve
from matplotlib import pyplot as plt import matplotlib """绘制条形图""" font = {'family': 'MicroSoft YaHei'} matplotlib.rc('font', **font) # 使支持中文 x = ["战狼2","速度与激情8","功夫瑜伽","西游伏妖篇","变形金刚5:最后的骑士","摔跤吧!爸爸","加勒比海盗5:死无对证","金刚:骷髅岛","极限特工:终极回归","生化危机6:终章","乘风破浪","神偷奶爸3","智取威虎山","大闹天竺","金刚狼3:殊死一战","蜘蛛侠:英雄归来","悟空传","银河护卫队2","情圣","新木乃伊",] y = [56.01,26.94,17.53,16.49,15.45,12.96,11.8,11.61,11.28,11.12,10.49,10.3,8.75,7.55,7.32,6.99,6.88,6.86,6.58,6.23] plt.figure(figsize=(20, 8), dpi=80) # 设置图形大小 # plt.bar(range(len(x)), y, width=0.3) # 绘制条形图,线条宽度 plt.barh(range(len(x)), y, height=0.3, color='orange') # 绘制横着的条形图,横着的用height控制线条宽度 # 设置字符串到x轴 plt.yticks(range(len(x)),x) plt.grid(alpha=0.3) # 添加网格 plt.ylabel('电影名称') plt.xlabel('票房') plt.title('票房前20的电影') plt.savefig('./01.png') plt.show()
plt.legend(loc="upper center", bbox_to_anchor=(0.5, 1.14,), ncol=3, borderaxespad=0.) for ext in [ "png", "svg" ]: fname = os.path.join(options.outputdir, "{0}.{1}".format(options.suffix, ext)) plt.savefig(fname, dpi=100) print "Saved {0}".format(fname) barlabels = [] barvalues = [] for n in range(1, len(data)): l = l = data[n][0] splitOn='VM' l = ('\n%s'%splitOn).join(l.split(splitOn)) barlabels.append(l) barvalues.append(float(data[n][1])) plt.clf() plt.barh(bottom=range(0, len(data)-1), height=0.5, width=barvalues, align='center') plt.yticks(numpy.arange(len(data)-1), barlabels) plt.grid(True) plt.title('Network Performance - Testcase {0}'.format(options.suffix)) plt.xlabel("Testcase {0} - Mbits/sec".format(options.suffix)) for ext in [ "png", "svg" ]: fname = os.path.join(options.outputdir, "{0}.bar.{1}".format(options.suffix, ext)) plt.savefig(fname, dpi=100) print "Saved {0}".format(fname)
def draw_plot_func(dictionary, n_classes, window_title, plot_title, x_label, output_path, to_show, plot_color, true_p_bar): # sort the dictionary by decreasing value, into a list of tuples sorted_dic_by_value = sorted(dictionary.items(), key=operator.itemgetter(1)) # unpacking the list of tuples into two lists sorted_keys, sorted_values = zip(*sorted_dic_by_value) # if true_p_bar != "": """ Special case to draw in: - green -> TP: True Positives (object detected and matches ground-truth) - red -> FP: False Positives (object detected but does not match ground-truth) - orange -> FN: False Negatives (object not detected but present in the ground-truth) """ fp_sorted = [] tp_sorted = [] for key in sorted_keys: fp_sorted.append(dictionary[key] - true_p_bar[key]) tp_sorted.append(true_p_bar[key]) plt.barh(range(n_classes), fp_sorted, align='center', color='crimson', label='False Positive') plt.barh(range(n_classes), tp_sorted, align='center', color='forestgreen', label='True Positive', left=fp_sorted) # add legend plt.legend(loc='lower right') """ Write number on side of bar """ fig = plt.gcf() # gcf - get current figure axes = plt.gca() r = fig.canvas.get_renderer() for i, val in enumerate(sorted_values): fp_val = fp_sorted[i] tp_val = tp_sorted[i] fp_str_val = " " + str(fp_val) tp_str_val = fp_str_val + " " + str(tp_val) # trick to paint multicolor with offset: # first paint everything and then repaint the first number t = plt.text(val, i, tp_str_val, color='forestgreen', va='center', fontweight='bold') plt.text(val, i, fp_str_val, color='crimson', va='center', fontweight='bold') if i == (len(sorted_values) - 1): # largest bar adjust_axes(r, t, fig, axes) else: plt.barh(range(n_classes), sorted_values, color=plot_color) """ Write number on side of bar """ fig = plt.gcf() # gcf - get current figure axes = plt.gca() r = fig.canvas.get_renderer() for i, val in enumerate(sorted_values): str_val = " " + str(val) # add a space before if val < 1.0: str_val = " {0:.2f}".format(val) t = plt.text(val, i, str_val, color=plot_color, va='center', fontweight='bold') # re-set axes to show number inside the figure if i == (len(sorted_values) - 1): # largest bar adjust_axes(r, t, fig, axes) # set window title fig.canvas.set_window_title(window_title) # write classes in y axis tick_font_size = 12 plt.yticks(range(n_classes), sorted_keys, fontsize=tick_font_size) """ Re-scale height accordingly """ init_height = fig.get_figheight() # comput the matrix height in points and inches dpi = fig.dpi height_pt = n_classes * (tick_font_size * 1.4) # 1.4 (some spacing) height_in = height_pt / dpi # compute the required figure height top_margin = 0.15 # in percentage of the figure height bottom_margin = 0.05 # in percentage of the figure height figure_height = height_in / (1 - top_margin - bottom_margin) # set new height if figure_height > init_height: fig.set_figheight(figure_height) # set plot title plt.title(plot_title, fontsize=14) # set axis titles # plt.xlabel('classes') plt.xlabel(x_label, fontsize='large') # adjust size of window fig.tight_layout() # save the plot fig.savefig(output_path) # show image if to_show: plt.show() # close the plot plt.close()
else: mmin.append(float(row[1])) mmax.append(float(tmmax - tm)) m.append(float(row[1])) merror = [mmin, mmax] y_pos = np.arange(len(x)) plt.xlim(10, 90) plt.ylim() # Create bars fig1 = plt.barh(y_pos, l, xerr=lerror, height=0.20, label='Mixed noise (4000 samples)') fig2 = plt.barh(y_pos + 0.20, m, xerr=merror, height=0.20, label='Mixed noise (2000 samples)') fig3 = plt.barh(y_pos + 0.40, k, xerr=kerror, height=0.20, label='Enviromental noise') fig3 = plt.barh(y_pos + 0.60, y, xerr=yerror, height=0.20, label='No noise') # Create names on the x-axis
predictions = seq_model.predict(test_images[:20]) predictions.shape # In[27]: figsize(12,3) i = 0 plt.subplot(121) plt.imshow(test_images[i,:,:,0], cmap=plt.cm.gray) plt.axis("off") plt.subplot(122) plt.barh(np.arange(10), predictions[i,:]) plt.yticks(np.arange(10), classes) plt.grid(True); # You can write the model to disk to retrieve later using `Model.save()`. Historically this had used HDF5 but with TF2 more integration is coming with the tensorflow-specific `SavedModel` format. HDF5 has trouble with custom objects and nested models. # In[28]: seq_model.save("seq_model.h5") # In[ ]:
import matplotlib.pyplot as plt import numpy as np from matplotlib import font_manager, rc from matplotlib import style font_name = font_manager.FontProperties( fname="c:/Windows/Fonts/malgun.ttf").get_name() rc('font', family=font_name) style.use('ggplot') industry = ['Insect', 'Reptile', 'Aquatic', 'Bird', 'Mammal'] fluctuations = [3, 3, 5, 4, 2] fig = plt.figure(figsize=(8, 4)) ax = fig.add_subplot(111) ypos = np.arange(5) rects = plt.barh(ypos, fluctuations, align='center', height=0.5) plt.yticks(ypos, industry) plt.xlabel('등락률') plt.show()
df_5 = df_5.sort_values(by='count_M', ascending=True) df_5.set_index(['LinkType'], inplace=True) # In[3]: # draw bidirectional bar chart index = np.arange(len(df_5)) lColor = (1/256, 1/256, 256/256, 3/3) # blue rColor = (256/256, 1/256, 1/256, 3/3) # red plt.figure(figsize=(16,6)) # using the arrangement, the data is stacked up, that is, a multi-dimensional bar chart plt.barh( index, df_5['count_M'], color = lColor ) plt.barh( index, -df_5['count_F'], color = rColor ) plt.xticks([-40000, -20000, 0, 20000, 40000], ['40000', '20000', '0', '20000', '40000']) # Set the scale (originally negative for left side of the origin, now all positive) plt.yticks(index, df_5.index) plt.legend(['M', 'F']) plt.show() # In[ ]:
def plot_feature_importances(clf, feature_names): c_features = len(feature_names) plt.barh(range(c_features), clf.feature_importances_) plt.xlabel("Feature importance") plt.ylabel("Feature name") plt.yticks(numpy.arange(c_features), feature_names)
plt.figure(figsize=(16, 2)) plt.title( 'Mean vs Max Value', fontsize=20, fontweight='bold', y=1.05, ) plt.xlabel('Value [M€]') max_value = dataset['ValueNum'].max() objects = ('Max Value', 'Mean Value') y_pos = np.arange(len(objects)) performance = [max_value / 1000000, mean_value / 1000000] plt.barh(y_pos, performance, align='center', alpha=0.5) plt.yticks(y_pos, objects) plt.show() # <b>NOTE:</b> Mean Value of Player in FIFA 18 is about €2.4M. It seems to be lots of money, but it is still nothing when we compare it to €123M - Value of Neymar. Moreover Value of players is unequally distributed - only 23.2% of players cross this Value. # ### 3.5. Over or Under Mean Wage # <a id="over_or_under_mean_wage"></a> # In[41]: print('Mean wage of player in FIFA 18 is around: €' + str(round(mean_wage, -3)) + '.') # In[42]:
markeredgecolor='black', markerfacecolor='firebrick') plt.boxplot(part_4.result_5, meanprops=meanpoint, meanline=False, showmeans=True) plt.ylabel('Temperature') plt.title('Average temperatures measured outside') #9 part_4.result_9.sort_values().plot(kind='bar', color='g') plt.ylim([0, 10300]) plt.xlabel('Temperature') plt.ylabel('Appearances') plt.title('Top 3 appearance') plt.vlines(-1, 0, 10500, colors='k', linestyles='dashed') plt.vlines(3, 0, 10500, colors='k', linestyles='dashed') plt.xlim(-2, 4) plt.legend('③❸') #6 y = (data.groupby("temp")["temp"]) count = (data.groupby("temp")["temp"].count()) y_pos = np.arange(21, 52) plt.barh(y_pos, count, alpha=0.9) plt.xlabel('Data amount') plt.ylabel('Temperature') plt.title('Data amount by temperature') plt.legend() plt.show()
def index(): df2 = playstore.copy() # Statistik top_category = pd.crosstab(index=df2['Category'], columns='Jumlah').sort_values( 'Jumlah', ascending=False).reset_index() # Dictionary stats digunakan untuk menyimpan beberapa data yang digunakan untuk menampilkan nilai di value box dan tabel stats = { 'most_categories': top_category['Category'][0], 'total': top_category['Jumlah'][0], 'rev_table': playstore.groupby(['Category', 'App']).agg({ 'Reviews': 'mean', 'Rating': 'mean', }).sort_values('Reviews', ascending=False).head(10).reset_index(). to_html(classes=[ 'table thead-light table-striped table-bordered table-hover table-sm' ]) } ## Bar Plot cat_order = df2.groupby('Category').agg({ 'App': 'count' }).rename({ 'Category': 'Total' }, axis=1).sort_values('App', ascending=False).head() X = cat_order.index Y = cat_order['App'] my_colors = 'rgbkymc' # bagian ini digunakan untuk membuat kanvas/figure fig = plt.figure(figsize=(8, 3), dpi=300) fig.add_subplot() # bagian ini digunakan untuk membuat bar plot plt.barh(X, Y, color=my_colors) # bagian ini digunakan untuk menyimpan plot dalam format image.png plt.savefig('cat_order.png', bbox_inches="tight") # bagian ini digunakan untuk mengconvert matplotlib png ke base64 agar dapat ditampilkan ke template html figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) figdata_png = base64.b64encode(figfile.getvalue()) # variabel result akan dimasukkan ke dalam parameter di fungsi render_template() agar dapat ditampilkan di # halaman html result = str(figdata_png)[2:-1] ## Scatter Plot X = df2['Reviews'].values # axis x Y = df2['Rating'].values # axis y area = playstore[ 'Installs'].values / 10000000 # ukuran besar/kecilnya lingkaran scatter plot fig = plt.figure(figsize=(5, 5)) fig.add_subplot() # isi nama method untuk scatter plot, variabel x, dan variabel y plt.scatter(x=X, y=Y, s=area, alpha=0.3) plt.xlabel('Reviews') plt.ylabel('Rating') plt.savefig('rev_rat.png', bbox_inches="tight") figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) figdata_png = base64.b64encode(figfile.getvalue()) result2 = str(figdata_png)[2:-1] ## Histogram Size Distribution X = (playstore['Size'] / 1000000).values fig = plt.figure(figsize=(5, 5)) fig.add_subplot() plt.hist(X, bins=100, density=True, alpha=0.75) plt.xlabel('Size') plt.ylabel('Frequency') plt.savefig('hist_size.png', bbox_inches="tight") figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) figdata_png = base64.b64encode(figfile.getvalue()) result3 = str(figdata_png)[2:-1] ## Buatlah sebuah plot yang menampilkan insight di dalam data contentRat = df2.groupby('Content Rating').agg({ 'App': 'count' }).rename({ 'Category': 'Total' }, axis=1).sort_values('App', ascending=False).head(5) X = contentRat.index Y = contentRat['App'] my_colors = 'rgbkymc' # bagian ini digunakan untuk membuat kanvas/figure fig = plt.figure(figsize=(10, 5), dpi=300) fig.add_subplot() # bagian ini digunakan untuk membuat bar plot plt.bar(X, Y, color=my_colors) # bagian ini digunakan untuk menyimpan plot dalam format image.png plt.savefig('Content_Rating.png', bbox_inches="tight") figfile = BytesIO() plt.savefig(figfile, format='png') figfile.seek(0) figdata_png = base64.b64encode(figfile.getvalue()) result4 = str(figdata_png)[2:-1] # Tambahkan hasil result plot pada fungsi render_template() return render_template( 'index.html', stats=stats, result=result, result2=result2, result3=result3, result4=result4, )
def predict_new_image(img_url, model_nn, metainfo): # Download the image print('Downloading image.') response = requests.get(img_url, stream=True) with open('temp_img.jpg', 'wb') as out_file: shutil.copyfileobj(response.raw, out_file) # Load image and convert to RGB color scheme try: im = Image.open('temp_img.jpg').convert(mode='RGB', colors=256) except IOError: print('Image cannot be loaded.') # Extract size of image width, height = im.size # Extract min dimension for squaring of image min_dim = min(width, height) # Compute image offset for squaring of image offset_x = width - min_dim offset_y = height - min_dim # Square image im_squared = im.crop((offset_x / 2., offset_y / 2., offset_x / 2. + min_dim, offset_y / 2. + min_dim)) # Resize image im_resized = im_squared.resize((224, 224), resample=1) # Put image into numpy array img = np.array(im_resized) / 255 # Extract features using Mobilenet print('Feature Extraction.') module_url = 'https://tfhub.dev/google/imagenet/mobilenet_v2_100_224/feature_vector/4' m = tf.keras.Sequential([hub.KerasLayer(module_url, trainable=False)]) m.build([None, 224, 224, 3]) nn_features = m.predict(img[None, ...]) # Compute class predictino probabilities print('Plotting report.') sigm = model_nn['model'].decision_function(nn_features).squeeze() probability = np.exp(sigm) / np.sum(np.exp(sigm)) * 100 # Get class names class_names = metainfo['class_names'] # Get predicted and true label of image predicted_idx = np.argmax(probability) predicted_prob = probability[predicted_idx] predicted_label = class_names[predicted_idx] # Plot overview figure fig = plt.figure(figsize=(13, 6)) gs = gridspec.GridSpec(1, 2, width_ratios=[1, 1]) # Plot prediction probabilities ax = plt.subplot(gs[0]) plt.title('Prediction Probability') y_pos = np.arange(len(probability)) plt.barh(y_pos, probability, color='#BFBFBF') # Set y-label text y_label_text = [ '{}: {:5.1f}%'.format(e, probability[i]) for i, e in enumerate(class_names) ] ax.set_yticks(y_pos) ax.set_yticklabels(y_label_text) ylim = list(plt.ylim()) plt.vlines(1 / len(probability) * 100, *ylim, linestyles=':', linewidth=2) plt.ylim(ylim) # Plot image ax = plt.subplot(gs[1]) plt.title('Image') plt.imshow(img) plt.grid(False) plt.xticks([]) plt.yticks([]) # Add information text to image info_txt = '\nThis is to {:.02f}% a {}!'.format(predicted_prob, predicted_label) plt.xlabel(info_txt, fontdict={'size': 21}) plt.tight_layout() plt.show()
eafn = len(x['eaf']) ismn = len(x['ism']) isfn = len(x['isf']) objects = ('Greater European Male', 'Greater European Female', 'Jewish Male', 'Jewish Female', 'Greater African Male', 'Greater African Female', 'Hispanic Male', 'Hispanic Female', 'East Asian Male', 'East Asian Female', 'Indian Subcontinent Male', 'Indian Subcontinent Female') y_pos = np.arange(len(objects)) avgSalary = [gem, gef, jm, jf, gam, gaf, hm, hf, eam, eaf, ism, isf] num = [gemn, gefn, jmn, jfn, gamn, gafn, hmn, hfn, eamn, eafn, ismn, isfn] print(avgSalary) print(num) plt.barh(y_pos, avgSalary, align='center', alpha=0.5) plt.yticks(y_pos, objects) plt.xlabel('Average Yearly Salary') plt.ylabel('Race and Gender') plt.title('Average Salary by Race and Gender') plt.show() with open('finalData/all_data.csv', 'r') as read_file: csv_reader = reader(read_file) count = 0 x = {'male': [], 'female': []} for line in csv_reader: if count != 0: salary = float(line[1]) if line[0] == 'male':
plt.ylabel('True label') # graph plot lr fig = plt.figure(4) ax = fig.add_subplot() cax = ax.matshow(cf_matrix_lr) plt.title('Confusion matrix of SubTask B (LR)') fig.colorbar(cax) ax.set_xticklabels([''] + labels) ax.set_yticklabels([''] + labels) plt.xlabel('Predicted label') plt.ylabel('True label') classifier_list = ['SVM', 'DT', 'KNN', 'LR'] accuracy_list = [accuracy_svm, accuracy_dt, accuracy_knn, accuracy_lr] fig = plt.figure(5) ax = fig.add_subplot() plt.title("Accuracy Comparison for SubTask B") plt.ylabel('Classifier') plt.xlabel('Accuracy (%)') plt.barh(classifier_list, accuracy_list) for i, j in enumerate(accuracy_list): ax.text(j + 1, i + .10, str(j), color='blue') plt.show() file = open('classifier_dump_b', 'wb') file1 = open('accuracy_list_dump_b', 'wb') pl.dump(classifier_list, file) pl.dump(accuracy_list, file1)
importances[indices], color=random_colors, align="center") plt.xticks(range(X.shape[1]), header1, rotation=90) plt.xlim([-1, X.shape[1]]) plt.show() ##################################################### # # # Models Accuracy # # # ##################################################### from collections import OrderedDict model_accuracy = OrderedDict(sorted(model_accuracy.items(), key=lambda t: t[1])) plt.figure() plt.gcf().subplots_adjust(left=0.22) plt.title("Models Accuracy") plt.barh(range(len(model_accuracy)), model_accuracy.values(), align='center', color='#009688') plt.yticks(range(len(model_accuracy)), model_accuracy.keys(), rotation=0) axes = plt.gca() axes.set_xlim([0.8, 1.0]) plt.xlabel("Accuracy") plt.ylabel('Classifier') plt.show()