def present_compared_data(match_full_data_df, name, results_dir): match_full_data_df = match_full_data_df.drop_duplicates('ref_e') presented_data = match_full_data_df[[Header.ref_id + '_e', 'count', 'actual_prior', 'prior_e', 'actual_prior_diff']].copy() presented_data.rename(columns={'count': '# of refs', 'actual_prior': 'actual frequency', 'prior_e': 'expected frequency', 'actual_prior_diff':'frequency difference'}, inplace=True) presented_data.index = range(1, len(presented_data) + 1) presented_data.loc[len(presented_data)+1] = [None, sum(presented_data['# of refs']), sum(presented_data['actual frequency']), sum(presented_data['expected frequency']), None] presented_data['expected frequency'] = presented_data['expected frequency'].apply(lambda val: "{0:.2f}%".format(val * 100)) presented_data['actual frequency'] = presented_data['actual frequency'].apply(lambda val: "{0:.2f}%".format(val * 100)) presented_data['frequency difference'] = presented_data['frequency difference'].apply(lambda val: "{0:.2f}%".format(val * 100)) presented_data.index = range(1, len(presented_data)) + ['sum'] ax = plt.subplot(111, frame_on=False ) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, presented_data, loc='center') # where df is your data frame # table(ax, presented2, loc='center') res_name = name + '_emirge_smurf_compare.png' plt.tight_layout() im_path = os.path.join(results_dir, res_name) plt.savefig(im_path, bbox_inches='tight') plt.clf() logging.info("saved results to {}".format(im_path))
def draw_tabular(): try: df = pd.read_sql_query( "SELECT DISTINCT currencySymbol , currencyName FROM currencies WHERE currencies.currencyId Not IN (SELECT DISTINCT countries.currencyId1 FROM countries) ", db) #print(df) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df, loc='center') # where df is your data frame plt.show() plt.savefig('C:/Users/MoniSingh/Desktop/cervello/tabular.png') except ValueError: df = pd.read_sql_query( "SELECT DISTINCT currencySymbol , currencyName FROM currencies WHERE currencies.currencyId Not IN (SELECT DISTINCT countries.currencyId1 FROM countries) ", db) #print(df) # DISPLAY ON CONSOLE ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df, loc='center', index=False) # where df is our data frame plt.show() plt.savefig('C:/Users/MoniSingh/Desktop/cervello/tabular.png')
def data_acq(key,acq_freq,upload_freq, cnt): cnt = 0 while cnt != acq_freq: cel,fah = tempRead() _date = datetime.now() _date =str( _date.strftime('%d-%m-%Y %H:%M:%S')) c.append(cel) date.append(_date) f.append(fah) df_cel = pd.DataFrame({'celsius':c}) df_fah = pd.DataFrame({'fahrenheit':f}) df_date = pd.DataFrame({'Date_Time':date}) df = combineFrame(combineFrame(df_date,df_cel),df_fah) #dataframe with datetime and temperature print df #df.to_html('df.html') # convert the dataframe to html file #df.plot(x='Date_Time',y='celsius') #filename = 'tempPlot.png' #plt.savefig(filename,dpi=150) ax = plt.subplot(111, frame_on=False) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) table(ax, df, loc='upper right') plt.savefig('datatemp.png', transparent=True) # save the dataframe as a png file. time.sleep(upload_freq) cnt = cnt+1 data_upload(key, acq_freq, upload_freq) #upload the data recorded after the defined upload frequency
def sk_kurt(data): header = None sep = ',' empdf = pd.DataFrame() df = pd.read_csv(data,sep,header) # C:\Users\yobin\Desktop\c.csv Ecommerce Purchases c=0 t=0 df1 = df.skew() li = list(df1) ke = df1.keys() while(t<len(li)): if(li[t]>0): li[t]='positively skewed' elif(li[t]<0): li[t]='negatively skewed' else: li[t]='symmetric' t=t+1 while(c<len(li)): empdf = empdf.append({'column_name':ke[c],'skewness':li[c]},ignore_index=True) c=c+1 empdf ax = plt.subplot(111) table(ax, empdf, loc='center') ax.set_axis_off() plt.savefig('skewness.png')
def top_sources(self, human, bot, path): nb_sources = 3 sources = pd.concat([human, bot], axis=1) sources.columns = ['humans', 'bots'] # fetch top 3 for each type top_humans = sources.sort_values(by='humans', ascending=False).head(nb_sources).fillna(0) top_bots = sources.sort_values(by='bots', ascending=False).head(nb_sources).fillna(0) mixed_sources = pd.concat([top_humans, top_bots]) def add_percentage(df): bots_perc = df['bots'] / (df['bots'] + df['humans']) * 100 df['% bots'] = bots_perc df['% humans'] = 100 - bots_perc return df sources = add_percentage(mixed_sources).applymap(lambda x: '%.2f' % x) sources = sources.drop(["humans", "bots"], axis=1) pl.figure(figsize=(15,5)) ax1 = plt.subplot(111, frame_on=False) ax1.set_title("Top {} sources per type.".format(nb_sources)) ax1.xaxis.set_visible(False) ax1.yaxis.set_visible(False) table(ax1, sources, loc="center") pl.savefig(path)
def graphical_analysis(data,dv,path='',regression=True): numerical,categorical = numerical_categorical_division(data) if regression: for col in numerical: if col != dv: ax = data.plot(col,dv) fig = ax.get_figure() fig.savefig(path+col+'.png',dpi=1000) for col in categorical: if col != dv: ax = data.boxplot(dv,by=col) fig = ax.get_figure() fig.savefig(path+col+'.png',dpi=1000) else: for col in numerical: if col != dv: ax = data.boxplot(col,by=dv) fig = ax.get_figure() fig.savefig(path+col+'.png',dpi=1000) for col in categorical: if col != dv: f =pd.crosstab(data[dv],data[col],dropna=False) for cat in f.columns: f[cat] = f[cat].apply(lambda x: x/f[cat].sum()*100) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, f) plt.savefig(path+col+'.png',dpi=1000)
def make_table(self, value, ax, metric_suppression=''): ''' Creation of a statistics table printed with the graph :param value: information measured :param ax: axes used :param metric_suppression: suppression of a metric when we use the describe pandas function ''' if metric_suppression: the_table = table( ax, np.round(value.describe().drop(metric_suppression), 2), loc='center', ) else: the_table = table( ax, np.round(value.describe(), 2), loc='center', ) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) ax.axis('off') the_table.set_fontsize(12) the_table.scale(1, 1.2)
def tournament(mygame, nplayers, rounds, S_self, m0, p_one): scores_pair = np.zeros((nplayers + 1, nplayers + 1)) for i in range(nplayers): for j in range(i + 1, nplayers + 1): scoresA = [] scoresB = [] movesA = [] movesB = [] for k in range(rounds): moveA = mygame.player(i, movesB, movesA, m0, p_one) if j < nplayers: moveB = mygame.player(j, movesA, movesB, m0, p_one) else: moveB = mygame.player(S_self, movesA, movesB, m0, p_one) scoreA, scoreB = mygame.onegame(moveA, moveB) movesA.append(moveA) movesB.append(moveB) scoresA.append(scoreA) scoresB.append(scoreB) scoresA_cum = np.cumsum(scoresA) scoresB_cum = np.cumsum(scoresB) scores_pair[i][j] = scoresA_cum[-1] scores_pair[j][i] = scoresB_cum[-1] #plt.figure(figsize=(4,4)) #plt.imshow(scores_pair) #plt.colorbar() plt.figure(figsize=(6, 2)) df = pd.DataFrame(scores_pair) df['average'] = df.sum(axis=1) df.loc[:, 'average'] *= 1.0 / nplayers #df.round(1) ax = plt.subplot(111, frame_on=False) # no visible frame #ax = plt.plot(frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis #table(ax, df, loc='center') # table(ax, np.round(df, 1), loc='center') # if not os.path.isdir('static'): os.mkdir('static') else: for filename in glob.glob(os.path.join('static', '*.png')): os.remove(filename) plotfile = os.path.join('static', str(time.time()) + '.png') plt.savefig(plotfile) final_score = [scores_pair[nplayers][i] for i in range(nplayers)] #final_score = [scores_pair[2][i] for i in range(nplayers+1) ] #return plotfile+" "+ str(final_score) #return plotfile+" "+ ' '.join(str(x) for x in final_score) return plotfile + "," + ' '.join(map(str, final_score))
def __df_to_png(self, df, file_path): # Clear prev sub plot subplots(clear=True) matplotlib.rc('figure', dpi=160) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df, loc='center') # where df is your data frame savefig(file_path) return self.response
def save_as_table(df): import matplotlib.pyplot as plt import pandas as pd from pandas.tools.plotting import table ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df) # where df is your data frame plt.savefig('mytable.png')
def plottable(df, path='/Users/razzak_lebbai/junk/test.png'): ax = plt.subplot(111, frame_on=False) # no visible frame ##ax.patch.set_visible(False) ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax,df, loc='top') # where df is your data frame plt.tight_layout() ##plt.show() plt.savefig(path) plt.show()
def plot_table(df, width=0.3, height=0.15, size=12): table(plt.gca(), df, loc='center') # Grab the most recent table from the current axis the_table = plt.gca().tables[-1] for cell in the_table.get_celld().values(): cell.set_height(height) cell.set_width(width) cell.set_fontsize(size) return the_table
def calculate_illnesses(age, gender, product_category, symptoms_list): figure_filepath = get_graphs_filepath('prob_table_df.png') prediction_array = to_array(age, gender, product_category, symptoms_list) # use the 4 args to calculate results into dictionary as below # initialize illnesses possible_illnesses = { 'death': 0., 'life_threatening': 0., 'serious_injuries_illness': 0., 'disability': 0., 'other_serious__important_medical_events_': 0., 'congenital_anomaly': 0., 'req_intervention_to_prvnt_perm_imprmnt': 0., 'hospitalization': 0., 'visited_an_er': 0., 'visited_a_health_care_provider': 0. } with open(get_pickle_filepath('forest_tuned_lowbias_fitted.pkl'), 'rb') as picklefile: forest_tuned_lowbias_fitted = pickle.load(picklefile) # calculate illnesses illness_probabilities = forest_tuned_lowbias_fitted.predict_proba( prediction_array) illnesses_list = list(possible_illnesses.keys()) illness_probabilities_list = list(illness_probabilities[0]) illness_probabilities_series = pd.Series(illness_probabilities_list) illness_probabilities_series = illness_probabilities_series * 100 possible_illnesses_table = pd.DataFrame({ 'Possible Outcome': illnesses_list, 'Probability': illness_probabilities_series }) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, possible_illnesses_table, loc='center') plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=1.0) plt.savefig(figure_filepath) return figure_filepath
def _make_table(self, ax, df, title, height=None): if df is None: ax.set_visible(False) return import pandas.tools.plotting as plotting idx_nlevels = df.index.nlevels col_nlevels = df.columns.nlevels # must be convert here to get index levels for colorization df = self._insert_index(df) tb = plotting.table(ax, df, loc=9) tb.set_fontsize(self.font_size) if height is None: height = 1.0 / (len(df) + 1) props = tb.properties() for (r, c), cell in compat.iteritems(props['celld']): if c == -1: cell.set_visible(False) elif r < col_nlevels and c < idx_nlevels: cell.set_visible(False) elif r < col_nlevels or c < idx_nlevels: cell.set_facecolor('#AAAAAA') cell.set_height(height) ax.set_title(title, size=self.font_size) ax.axis('off')
def plot_table(title, cells, column_names, row_names, save=None): f, ax = matplotlib.pyplot.subplots(1) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) ax.set_frame_on(False) f.suptitle(title, fontsize=14, fontweight='bold') df = pandas.DataFrame(cells) df.columns = column_names df.index = row_names tab = table( ax=ax, data=df, colLabels=column_names, rowLabels=row_names, loc='upper right', ) tab.auto_set_font_size(False) tab.set_fontsize(12) print(df) if save is not None: print('Saving table into: {}'.format(save)) try: os.makedirs(os.path.dirname(save)) except: pass matplotlib.pyplot.savefig(save) else: matplotlib.pyplot.show() matplotlib.pyplot.close(f)
def makefile(): totalData = [[[] for j in range(120)] for i in range(4)] global origin, outpath, srcpath origin = '../src/Analyze/' outpath = '../src/trajectory/' srcpath = '../src/data/' data = json.load(open('../src/trajectory/fixations.json')) abst_data = json.load(open('../src/trajectory/abst_info.json')) matrixes = calcFlow(data, abst_data) print(len(matrixes[0])) for task in range(len(matrixes)): for matrix in range(len(matrixes[task])): row = ['AOI ' + str(i) for i in range(len(matrixes[task][matrix]))] column = ['AOI ' + str(i) for i in range(len(matrixes[task][matrix]))] fig, ax = plt.subplots(1, 1) table = plotting.table(ax, pd.DataFrame(matrixes[task][matrix]), rowLabels=row, colLabels=column, loc='center') table.scale(1, 1) # plt.title(out["name"]) plt.close() ax.axis('off') f = open('../src/flows/task' + str(task + 1) + '/' + str(matrix) + '.json', 'w') json.dump(matrixes[task][matrix].tolist(), f, ensure_ascii=False, indent=4, sort_keys=True, separators=(',', ': ')) f.close()
def plot_good_ratio(target, feature, bins=10, accumulative=False, ascending=True, show_table=True, **kwargs): """good_ratio로 구한 P(target = 1) 확률을 그래프로 표현한다. :param target: good / bad 여부를 나타내는 Series. :param feature: 분포를 구할 feature Series. :param bins: feature의 구간 개수. default: 10 :param accumulative: 확률을 구할 때 누적 확률을 구할 것인지 여부. False이면 해당 구간에서의 확률만 구하고, True이면 누적 확률을 구한다. default: False :param ascending: accumulative == True일 때, 누적 확률을 큰 값에서부터 누적할지, 작은 값부터 누적할지 결정한다. True이면 작은 값부터, False이면 큰 값부터 누적한다. :param show_table: feature 구간별 data table을 출력할 지 여부. """ if accumulative: base, _, ratio_df = good_ratio(target, feature, bins, accumulative, ascending) title_str = "good ratio of %s / accumulative / ascending = %s" % (feature.name, ascending) else: base, ratio_df = good_ratio(target, feature, bins, accumulative, ascending) title_str = "good ratio of %s" % (feature.name) ax = ratio_df["ratio"].plot(**kwargs) ax.axhline(base) ax.set_title(title_str) ax.set_xlabel("%s_cut" % feature.name) ax.set_ylabel("good ratio") if show_table: ax.set_xlabel("") ax.get_xaxis().set_ticklabels([]) ratio_df["ratio"] = ratio_df["ratio"].round(3) t = table(ax, ratio_df[["ratio", "good", "count"]].T) t.scale(1.0, 2.0)
def summarize_week(week): matplotlib.style.use('ggplot') dfPos = pd.DataFrame() dfNeg = pd.DataFrame() descf = pd.DataFrame() for comma in week['collected']: pos, neg, dates, describe = summarizeCOMMA(comma) dfPos = dfPos.join(pd.DataFrame(data=pos, index=dates, columns=[comma['category']]), how='outer') dfNeg = dfNeg.join(pd.DataFrame(data=neg, index=dates, columns=[comma['category']]), how='outer') descf = descf.join(pd.DataFrame(data=np.round(describe.values(), 2), index=describe.keys(), columns=[comma['category']]), how='outer') fig, axes = plt.subplots(3, 1) titles_dict = { 'fontsize': 14, 'fontweight': 8, 'verticalalignment': 'baseline', 'horizontalalignment': 'center' } dfPos.plot.area(stacked=False, ax=axes[0]) axes[0].set_title("Tweets week " + str(week['_id']), fontdict=titles_dict) axes[0].set_ylabel('Positive') axes[0].legend(fontsize='xx-small') axes[0].get_xaxis().set_visible(False) dfNeg.plot.area(stacked=False, ax=axes[1], figsize=(6, 9)) axes[1].set_ylabel('Negative') axes[1].legend(fontsize='xx-small') axes[1].set_xlabel('Weekly Summary') axes[1].set_xticklabels(dfNeg.index.values, rotation='horizontal') import matplotlib.dates as dates axes[1].get_xaxis().set_major_locator(dates.DayLocator(interval=1)) axes[1].get_xaxis().set_minor_formatter(dates.DateFormatter('')) axes[1].get_xaxis().set_major_formatter(dates.DateFormatter('%b %d\n')) axes[2].axis('tight') axes[2].axis('off') t = table(axes[2], descf, gid=str(' Weekly Summary\n'), cellLoc='center', loc='center') t.auto_set_font_size(False) t.set_fontsize(7) # figManager = plt.get_current_fig_manager() # figManager.window.showMaximized() fig.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0) # plt.savefig("week_"+str(week['_id'])+".png",bbox_inches='tight') return fig
def pie_chart(self, data, title, column=""): #print(data) data.index = list(data[column]) #print(data) plt.figure(figsize=(16, 8)) ax1 = plt.subplot(121, aspect='equal') data['Assets'].plot(kind='pie', ax=ax1, autopct='%1.1f%%', startangle=90, shadow=False, legend=False, fontsize=16, title=title, subplots=True) ax2 = plt.subplot(122) plt.axis('off') tbl = table(ax2, data.T, loc='center right') tbl.auto_set_font_size(False) tbl.set_fontsize(8) #plt.legend(loc='right') plt.savefig(self.p(title))
def QualityofFittables(DataElement, pdf, outpath, truncate, nameStr=''): groupCats = ['class', 'targetstrat'] tradedata = DataElement.data.tradedata.copy() mod = DataElement for groupCat in groupCats: r = pd.DataFrame() grouped = tradedata.groupby(groupCat) for name, data in grouped: mod.setTradedata(data) nameStrLoc = name if (len(nameStr) > 0): nameStrLoc = name + "_" + nameStr r = pd.concat([ r, reporter_impl.QualityofFitgrouptables(mod, '/home/charles/', truncate=truncate, nameStr=nameStrLoc) ]) try: r = np.round( r[[ 'Group', 'const (est)', 'beta (est)', 'gamma (est)', 'lin r^2', 'pow_law r^2' ]], 4) except KeyError: r = np.round( r[[ 'Group', 'beta (est)', 'gamma (est)', 'lin r^2', 'pow_law r^2' ]], 4) from pandas.tools.plotting import table fig, ax = plt.subplots(1, 1) table(ax, r, loc='upper right', colWidths=[0.1, 0.15, 0.15, 0.15, 0.15, 0.15]) df = pd.DataFrame(r['gamma (est)']) df.index = r['Group'] df.plot(kind='bar', color='y', ax=ax, ylim=(0, 2), legend=False) plt.ylabel('Gamma (est)') plt.title('Coefficient by ' + groupCat) pdf.savefig() plt.close()
def cross_tab(data): header = None sep = ',' df = pd.read_csv( data, sep, header) # C:\Users\Usha\Desktop\c.csv Ecommerce Purchases # to get table of a series use reset_index arr = [] ap = df.select_dtypes(exclude=['number']) first_col = list(ap) n = 0 str_arr = [] for cln in first_col: arr.append(np.array(ap.iloc[:, n])) n = n + 1 print(type(arr)) indx_colno = 0 last_no = 0 ar_len = len(first_col) - 1 #print(ar_len) i = 0 # with each column with other column with bigger index than it while i <= ar_len - 1: f_a = [] j = 0 f_a = arr[i] j = i + 1 while j <= ar_len: ddf = pd.crosstab(f_a, arr[j], rownames=[first_col[i]], colnames=[first_col[j]]) # print('\n\n') # print(ddf) ax = plt.subplot(111) fig = plt.figure(figsize=(9, 11)) table(ax, ddf, loc='center') ax.set_axis_off() plt.savefig('img' + str(i) + str(j) + '.png', bbox_inches='tight', figsize=(9, 11)) j = j + 1 i = i + 1 #cross_tab(data)
def stats(data): df = pd.read_csv(data) #get only the numeric values of dataframe pp=df._get_numeric_data() pp2=pp.describe() #pp2.to_csv("new.csv") ax = plt.subplot(111) table(ax, pp2, loc='center') ax.set_axis_off() f=0 plt.savefig(fpath + '\\' + 'stat' + str(f) + '.png') #stats(data)
def RaRi(RecVlist, fdname): R = np.zeros((len(RecVlist), 3)) for n, i in enumerate(RecVlist): t, v, pho = readtraces(fdname, i) dt = t[1] - t[0] baseline = np.mean(v[:int(0.005 / dt)]) #np.mean(v[:int(0.003/dt)]) Ra = 10 / abs(min(v) - baseline) * 1000 Rinpt = 10 / abs( np.mean(v[int(0.019 / dt):int(0.020 / dt)]) - baseline ) * 1000 #10/abs(np.mean(v[int(0.007/dt):int(0.008/dt)])-baseline)*1000 R[n][0] = i R[n][1] = Ra R[n][2] = Rinpt # print('Rec%d: Ra is %f Mohm' %(i,Ra)) # print(' Rinput is %f Mohm' %Rinpt) Rdf = pd.DataFrame(R) Rdf.columns = ['RecN', 'Ra', 'Rinput'] #save dir_aligned = 'Analysis/%s' % fdname + '/aligned/Isteps' try: os.makedirs(dir_aligned) except OSError as e: if e.errno != errno.EEXIST: raise Rdf.to_pickle(dir_aligned + '/RaRinput.ASCII') from pandas.tools.plotting import table plt.figure(figsize=[10, 1 + 0.1 * len(RecVlist)]) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, Rdf, loc='center') # where df is your data frame dir_pic = 'Pics/%s' % fdname + '/Isteps' try: os.makedirs(dir_pic) except OSError as e: if e.errno != errno.EEXIST: raise plt.savefig(dir_pic + '/RaRinput.jpeg') plt.close() return (Rdf)
def plot_statistic(self, error, statistic='mean'): statistics = pd.read_csv(self.save_dir_statistics+'statistics.csv', header=[0, 1, 2], index_col=[0]) statistics.sortlevel(axis=0, inplace=True, sort_remaining=True) statistics.sortlevel(axis=1, inplace=True, sort_remaining=True) statistics[statistics == np.inf] = np.nan # index for sorting dataframe methods idx = np.argsort(np.argsort(self.methods)) # second argsort do get idx for undoing sorting # change color cycle cmap = pl.get_cmap('jet') colors = cmap(np.linspace(0.1, 0.9, len(self.methods))) fig = pl.figure(figsize=(9, 8)) ax = fig.add_subplot(111) ax.set_prop_cycle(cycler('color', colors)) for method in self.methods: if statistic == 'min': statistic_min = statistics.loc[(slice(None)), (slice(None), error, statistic)] ax.plot(statistics.index, statistic_min[method], label=method) table(ax, statistic_min[idx].transpose().apply(lambda x: x.map(lambda y: "%.6f" % y)), rowLabels=self.methods, loc='bottom', bbox=[0, -0.7, 1, 0.55]) elif statistic == 'mean': statistic_mean = statistics.loc[(slice(None)), (slice(None), error, statistic)] statistic_std = np.sqrt(statistics.loc[(slice(None)), (slice(None), error, 'var')]) base_line, = ax.plot(statistics.index, statistic_mean[method], label=method) ax.fill_between(statistics.index.values, (statistic_mean[method].values - statistic_std[method].values).flatten(), (statistic_mean[method].values + statistic_std[method].values).flatten(), facecolor=base_line.get_color(), alpha=0.1) table(ax, statistic_mean[idx].transpose().apply(lambda x: x.map(lambda y: "%.6f" % y)), rowLabels=self.methods, loc='bottom', bbox=[0, -0.7, 1, 0.55]) pl.tight_layout(rect=[0.15, 0.36, 1.0, 1.0]) ax.set_xticks(statistics.index) ax.set_xticklabels(statistics.index) pl.legend(fontsize=12) pl.ylim([0, None]) pl.xlabel(statistics.index.name) pl.ylabel(statistic + ' ' + error) pl.savefig(self.save_dir_statistics+'/plot_'+error+'_'+statistic+'.png') pl.show()
def create_key(alt_csv, file, dir): fig, ax = plt.subplots(figsize=(66, len(alt_csv)/5.2)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) ax.set_frame_on(False) tabla = table(ax, alt_csv, loc='upper right', colWidths=[0.15] * len(alt_csv.columns)) tabla.auto_set_font_size(False) tabla.set_fontsize(5) tabla.scale(.8, .8) plt.savefig(os.path.join(ana_dir, os.path.join(tree_dir, os.path.join(key_dir, os.path.join(dir, file[:len(file)-4] + "_key.png")))), transparent=True)
def tablegen(dict): df = pd.DataFrame(dict,index=['MSE','QL']) fig, ax = plt.subplots(figsize=(14, 2)) # set size frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis ax.set_frame_on(False) # no visible frame, uncomment if size is ok tabla = table(ax, df.round(3), loc='center', colWidths=[0.17]*len(df.columns)) # where df is your data frame tabla.auto_set_font_size(False) # Activate set fontsize manually tabla.set_fontsize(12) # if ++fontsize is necessary ++colWidths tabla.scale(1, 1)
def compare_specific_reference(actual_res_path, expected_res_path, result_dir, test_name, max_allowed_mismatch, ref_expected_id=None): test_df = get_test_df(actual_res_path) expected_df = get_expected_df(expected_res_path) match_df = get_expected_test_map_df(test_df, expected_df, max_allowed_mismatch) match_df = match_df[[Header.ref_id + "_e", Header.ref_id + "_t"]].drop_duplicates() test_df = test_df.rename(columns={Header.ref_id: Header.ref_id + '_t', Header.sequence: Header.sequence + '_t', Header.prior: Header.prior + '_t'}) expected_df = expected_df.rename( columns={Header.ref_id: Header.ref_id + '_e', Header.sequence: Header.sequence + '_e', Header.prior: Header.prior + '_e'}) scored_merge_df = pd.merge(test_df, match_df, on=[Header.ref_id + '_t']) scored_merge_df = pd.merge(expected_df, scored_merge_df, on=[Header.region, Header.ref_id + '_e']) scored_merge_df['mismatch_score'] = scored_merge_df.apply( lambda r: calc_mismatch_score(r[Header.sequence + '_t'], r[Header.sequence + '_e']), axis=1) if not ref_expected_id: ref_expected_ids = scored_merge_df[Header.ref_id + '_e'].drop_duplicates().tolist() else: ref_expected_ids = list(ref_expected_id) for ref_expected_id in ref_expected_ids: mapped_data = scored_merge_df[(scored_merge_df[Header.ref_id + '_e'] == ref_expected_id)] presented_data = mapped_data[[Header.ref_id + '_e', Header.ref_id + '_t', Header.region, 'prior_e', 'prior_t', 'mismatch_score' ]].copy() presented_data = presented_data.sort([Header.ref_id + '_e', Header.ref_id + '_t', 'prior_t', Header.region],ascending=False ) presented_data.index = range(1, len(presented_data) + 1) presented_data['prior_e'] = presented_data['prior_e'].apply(lambda val: "{0:.2f}%".format(val * 100)) presented_data['prior_t'] = presented_data['prior_t'].apply(lambda val: "{0:.2f}%".format(val * 100)) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, presented_data, loc='center') # where df is your data frame # table(ax, presented2, loc='center') res_name = 'emirge_smurf_'+ test_name + '_reference_id_' + str(ref_expected_id) + '.png' im_path = os.path.join(result_dir, res_name) plt.tight_layout() plt.savefig(im_path, bbox_inches='tight') plt.clf() logging.info("saving results to: {}".format(im_path))
def metrics(self, return_type=None, ax=None): '''Returns Corrleation of feature and target, r2,mse,&rmse of fit regressions line between the two, and confidence intervals of boot strap samples regression lines Can be returned as data or as ''' if self.fit_conf_int == False: self.confidence_intervals() data = [ self.tgt_mean(), self.corr(), self.r2(), self.mse(), self.rmse(), self.bs_slopes.min(), np.median(self.bs_slopes), self.bs_slopes.max(), self.conf_intervals_values[0], self.conf_intervals_values[1], self.feature_units, ] df = pd.DataFrame(data, columns=['Values'], index=[ 'Target Mean', 'Correlation', 'R2', 'MSE', 'RMSE', 'Increase Min', 'Increase Median', 'Increase Max', 'CI_Low', 'CI_High', 'Feature Units' ]) if return_type == 'df': return df elif return_type == 'img': assert not ax == None, "Please pass an axes object to plot img data on" from pandas.tools.plotting import table ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis ax.set_frame_on(False) # no visible frame, uncomment if size is ok tabla = table(ax, df, loc='upper right', colWidths=[0.17] * len(df.columns)) # where df is your data frame return tabla # where df is your data frame else: return data
def plot_agg_table(agg_tbl,oName,meter): fig = plt.figure(figsize=(6,1.5)) ax2 = fig.add_subplot(111) ax2.xaxis.set_visible(False) ax2.yaxis.set_visible(False) for sp in ax2.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) the_table = table(ax2, agg_tbl ,loc='upper center',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1]) the_table.set_fontsize(10) plt.suptitle(meter +' meter grid') plt.tight_layout() plt.savefig(oName, dpi = 600)
def main_compare_length(): res_dir = "/home/vered/EMIRGE/EMIRGE-data/mock_for_noam_test/results" expected_dir = "/home/vered/EMIRGE/EMIRGE-data/mock_for_noam_test/" STATIC = "_static.csv" STATIC_WEIGHT = "_static_weight.csv" WEIGHT = "_weight.csv" BASIC = ".csv" FINAL_RES = "final_results_" indexes = ['5', '10', '15'] indexes = ['15'] for i in indexes: static = get_test_df(os.path.join(res_dir, FINAL_RES + i + STATIC)) static_weight = get_test_df(os.path.join(res_dir, FINAL_RES + i + STATIC_WEIGHT)) weight = get_test_df(os.path.join(res_dir, FINAL_RES + i + WEIGHT)) basic = get_test_df(os.path.join(res_dir, FINAL_RES + i + BASIC)) expected = get_expected_df(os.path.join(expected_dir, "mock_" + i + "seq/reads/expected_res.csv")) static_compare = get_presented_data(compare(static, expected, i + "_static")) static_weight_compare = get_presented_data(compare(static_weight, expected, i + "_static_weight")) weight_compare = get_presented_data(compare(weight, expected, i + "_weight")) basic_compare = get_presented_data(compare(basic, expected, i + "_basic")) full_comparison = pd.merge(pd.merge(static_compare, static_weight_compare, on=['ref_e', 'expected frequency'], suffixes=(" s", " s+w")), pd.merge(weight_compare, basic_compare, on=['ref_e', 'expected frequency'], suffixes=(" w", "-"))) suffixes = [" s+w", " w", " s", "-"] full_comparison = full_comparison[ ["# of refs" + s for s in suffixes] + ['expected frequency'] + ['freq diff' + s for s in suffixes]] # of match references fig = plt.figure(figsize=(10, 4), dpi=300) ax = fig.add_subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis the_table = table(ax, full_comparison, loc='center') # where df is your data frame the_table.set_fontsize(18) # the_table.scale(3, 3) res_name = i + '_emirge_smurf_full_compare.png' # plt.show() plt.tight_layout() plt.savefig(os.path.join('/home/vered/EMIRGE/EMIRGE-data/', res_name), bbox_inches='tight') plt.clf() fig.clear()
def plot(df, name): plt.tight_layout() # set fig size fig, ax = plt.subplots() # no axes ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) # no frame ax.set_frame_on(False) # plot table tab = table(ax, df, loc='upper right') # set font manually tab.auto_set_font_size(False) tab.set_fontsize(8) # save the result plt.savefig(name + '.png', dpi=199, bbox_inches="tight")
def KNN(vol_data, k=1, warmup=100, filename=None, Timedt=None, method=[3]): vol_data_input = vol_data.iloc[:, 1] dates = pd.Series(vol_data.Date) # This can be done more efficiently by moving k list directly into k # knns = [[ ks, m, KNNcalc(vol_data=vol_data_input, dates=dates, k=ks, warmup=warmup, filename=filename, Timedt=Timedt, method=m) ] for count, m in enumerate(method) for ks in np.linspace(1, 20, 20)] # ks=20 # knns = [[ks, m, KNNcalc(vol_data=vol_data_input, dates =dates, k=ks, warmup=warmup,filename=filename, Timedt=Timedt, method=m)] # for count, m in enumerate(method)] mse = [knns[i][2][0] for i in range(len(knns))] ql = [knns[i][2][1] for i in range(len(knns))] kval = [int(knns[i][0]) for i in range(len(knns))] one_method_result = pd.DataFrame(np.transpose([kval, mse, ql]), columns=['k', 'MSE', 'QL']) # one_method_result = one_method_result.set_index('k') one_method_result.plot('k', 'MSE', figsize=[12, 7]).set_title(filename) one_method_result.plot('k', 'QL', figsize=[12, 7]).set_title(filename) # making a table fig, ax = plt.subplots() # set size frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis ax.set_frame_on(False) # no visible frame, uncomment if size is ok tabla = table( ax, one_method_result.round(7), loc='center', colWidths=[0.2] * len(one_method_result.columns)) # where df is your data frame tabla.auto_set_font_size(False) # Activate set fontsize manually tabla.set_fontsize(10) # if ++fontsize is necessary ++colWidths tabla.scale(1, 1) # feel free to comment out the line below for additional speed. But may cause overflow errors # with too many figs produced # plt.show() return one_method_result #knns[-1][2]
def fig_creator(data_frame): #bring in sorthead as the data_frame fig, ax = plt.subplots(figsize=(12, 4)) # set size frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis ax.set_frame_on(False) # no visible frame, uncomment if size is ok tabla = table(ax, data_frame, loc='upper left', colWidths=[0.12] * len(data_frame.columns)) # where df is your data frame tabla.auto_set_font_size(False) # Activate set fontsize manually tabla.set_fontsize(15) # if ++fontsize is necessary ++colWidths tabla.scale(1.5, 1.5) # change size table plt.savefig(f'images/{today.date()}_losers.png', transparent=True, bbox_inches='tight', dpi=300, pad_inches=0)
def tabulate_data(fig, table_info): """Prepare a matplotlib table using provided table info and adding result to figure.""" fig.suptitle(table_info.title, fontsize=20, fontweight='bold') ax = fig.add_subplot(111) #configure table colors tableau20 = get_tableau_colors() color_1 = tableau20[0] color_2 = tableau20[1] #setup table at the middle of the figure df = table_info.df df.index = ' ' + df.index + ' ' #adding spaces to index(label) column since label column is fixed width nrows, ncols = df.shape colwidth = 0.16 rowheight = 0.1 tab = table(ax, np.round(df, 2), loc='upper center', bbox=[.5-ncols*colwidth/2,.5-nrows*rowheight/2,ncols*colwidth,nrows*rowheight]) for key, cell in tab.get_celld().items(): #set cell properties cell._text.set_size(14) cell.set_edgecolor('w') cell.set_linestyle('-') cell.set_facecolor('w') cell.set_linewidth(1) #change color of even rows vs. odd rows row, col = key if row%2 == 0: cell.set_facecolor(color_1) cell._text.set_color('w') else: cell.set_facecolor(color_2) cell._text.set_color([i*0.65 for i in color_1]) #set color for header and index column if row == 0 or col == -1: cell._text.set_color('w') cell._text.set_weight('bold') cell.set_facecolor([i*0.65 for i in color_1]) if row == 0: cell.set_height(cell.get_height()*1.4) #makes first row a bit taller ax.axis('off')
may_df = pd.DataFrame(may_sed_class.flatten()) may_df.rename(columns={0:'sed5class'}, inplace=True) may_df = may_df.dropna() may_df['sed5name'] = may_df.apply(lambda row: assign_class(row), axis=1) print 'Now plotting distributions...' ax1 = plt.subplot2grid((5,2),(4, 0)) aug_df.groupby('sed5name').size().plot(kind='bar', ax=ax1,rot=45) ax1.set_ylabel('Frequency') ax1.set_xlabel('Substrate Type') table_aug = pd.pivot_table(aug_df,index=['sed5name'], values = ['sed5class'],aggfunc='count') table_aug['Percent_Area'] = table_aug['sed5class']/aug_df.sed5name.count() table_aug = table_aug[['Percent_Area']] table1 = table(ax1, np.round(table_aug,3), loc='upper right',colWidths=[0.2]) ax = plt.subplot2grid((5,2),(4, 1),sharey=ax1) may_df.groupby('sed5name').size().plot(kind='bar', ax=ax,rot=45) table_may = pd.pivot_table(may_df,index=['sed5name'], values = ['sed5class'],aggfunc='count') table_may['Percent_Area'] = table_may['sed5class']/may_df.sed5name.count() table_may = table_may[['Percent_Area']] table2 = table(ax, np.round(table_may,3), loc='upper right',colWidths=[0.2]) ax.set_ylabel('Frequency') ax.set_xlabel('Substrate Type') plt.tight_layout() print 'Now Saving figure...' plt.savefig(r"C:\workspace\Reach_4a\Multibeam\mb_sed_class\output\mb_aug_may_comparison_diverging_cmap.png",dpi=1000) #plt.show()
tbl['substrate']=['sand','gravel','boulders'] tbl = tbl.set_index('substrate') tbl.loc['sand'] = pd.Series({'mean':np.mean(s_df['dBW']),'std':np.std(s_df['dBW']) ,'CV':np.mean(s_df['dBW'])/np.std(s_df['dBW']),'25%':float(s_df.describe().iloc[4].values), '50%':float(s_df.describe().iloc[5].values),'75%':float(s_df.describe().iloc[6].values),'kurt':float(s_df.kurtosis().values),'skew':float(s_df.skew().values)}) tbl.loc['gravel'] = pd.Series({'mean':np.mean(g_df['dBW']),'std':np.std(g_df['dBW']) ,'CV':np.mean(g_df['dBW'])/np.std(g_df['dBW']),'25%':float(g_df.describe().iloc[4].values), '50%':float(g_df.describe().iloc[5].values),'75%':float(g_df.describe().iloc[6].values),'kurt':float(g_df.kurtosis().values),'skew':float(g_df.skew().values)}) tbl.loc['boulders'] = pd.Series({'mean':np.mean(b_df['dBW']),'std':np.std(b_df['dBW']) ,'CV':np.mean(b_df['dBW'])/np.std(b_df['dBW']),'25%':float(b_df.describe().iloc[4].values), '50%':float(b_df.describe().iloc[5].values),'75%':float(b_df.describe().iloc[6].values),'kurt':float(b_df.kurtosis().values),'skew':float(b_df.skew().values)}) tbl = tbl.applymap(lambda x: round(x,3)) del s_df, g_df, b_df fig = plt.figure() ax = fig.add_subplot(111) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) the_table = table(ax, tbl.round(3),loc='best',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1]) the_table.set_fontsize(12) plt.tight_layout() plt.savefig(r"c:\workspace\Texture_Classification\output\substrate_stat_plots\visual_agg_distribution_table_spet_14.png") del tbl in_shp = r"C:\workspace\Merged_SS\window_analysis\shapefiles\tex_seg_800_3class.shp" ss_raster = r"C:\workspace\Merged_SS\window_analysis\raster\ss_10_rasterclipped.tif" z_stats_46 = zonal_stats(in_shp ,ss_raster,stats=['count'],raster_out=True) #Lets get get the substrate codes ds = ogr.Open(in_shp) lyr = ds.GetLayer(0) a=[]
circ2 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[1],alpha=a_val) circ3 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[2],alpha=a_val) circ4 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[3],alpha=a_val) circ5 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[4],alpha=a_val) circ6 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[5],alpha=a_val) #ax1 = fig.add_subplot(2,2,2) ax1 = plt.subplot2grid((3,2),(0,1)) ax1.xaxis.set_visible(False) ax1.yaxis.set_visible(False) #hide the spines for sp in ax1.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) ax1.legend((circ1, circ2, circ3,circ4, circ5,circ6), ("sand", "sand/gravel", "gravel/sand","gravel","gravel/boulders","boulders"), numpoints=1, loc='center left', borderaxespad=0.) #bbox_to_anchor=(0.3, 0.9), #ax2 = fig.add_subplot(2,2,3) ax2 = plt.subplot2grid((3,2),(1,1),colspan=2) ax2.xaxis.set_visible(False) ax2.yaxis.set_visible(False) for sp in ax2.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) the_table = table(ax2, pivot_table.round(3),loc='center left',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1]) the_table.set_fontsize(10) plt.tight_layout(w_pad=10)#w_pad = plt.savefig(r"C:\workspace\Merged_SS\window_analysis\10_percent_shift\output\ss_visual_seg_2014_09_R01767.png", dpi=1000) #plt.show()
def plot2D(self): """Draw method for current data. Relies on pandas plot functionality if possible. There is some temporary code here to make sure only the valid plot options are passed for each plot kind.""" if not hasattr(self, 'data'): return #needs cleaning up valid = {'line': ['alpha', 'colormap', 'grid', 'legend', 'linestyle', 'linewidth', 'marker', 'subplots', 'rot', 'logx', 'logy', 'sharey', 'kind'], 'scatter': ['alpha', 'grid', 'linewidth', 'marker', 'subplots', 's', 'legend', 'colormap','sharey', 'logx', 'logy', 'use_index','c', 'cscale','colorbar'], 'pie': ['colormap','legend'], 'hexbin': ['alpha', 'colormap', 'grid', 'linewidth'], 'bootstrap': ['grid'], 'bar': ['alpha', 'colormap', 'grid', 'legend', 'linewidth', 'subplots', 'sharey', 'logy', 'stacked', 'rot', 'kind'], 'barh': ['alpha', 'colormap', 'grid', 'legend', 'linewidth', 'subplots', 'stacked', 'rot', 'kind', 'logx'], 'histogram': ['alpha', 'linewidth','grid','stacked','subplots','colormap', 'sharey','rot','bins', 'logx', 'logy'], 'heatmap': ['colormap','rot'], 'area': ['alpha','colormap','grid','linewidth','legend','stacked', 'kind','rot','logx'], 'density': ['alpha', 'colormap', 'grid', 'legend', 'linestyle', 'linewidth', 'marker', 'subplots', 'rot', 'kind'], 'boxplot': ['rot', 'grid', 'logy','colormap','alpha','linewidth'], 'scatter_matrix':['alpha', 'linewidth', 'marker', 'grid', 's'], 'contour': ['linewidth','colormap','alpha'], 'imshow': ['colormap','alpha'] } data = self.data if self._checkNumeric(data) == False: self.showWarning('no numeric data to plot') return #get all options from the mpl options object kwds = self.mplopts.kwds kind = kwds['kind'] table = kwds['table'] by = kwds['by'] by2 = kwds['by2'] errorbars = kwds['errorbars'] useindex = kwds['use_index'] #valid kwd args for this plot type kwargs = dict((k, kwds[k]) for k in valid[kind] if k in kwds) #initialise the figure self._initFigure() ax = self.ax #plt.style.use('dark_background') if by != '': #groupby needs to be handled per group so we can add all the axes to #our figure correctly if by not in data.columns: self.showWarning('the grouping column must be in selected data') return if by2 != '' and by2 in data.columns: by = [by,by2] g = data.groupby(by) if len(g) >25: self.showWarning('too many groups to plot') return self.ax.set_visible(False) kwargs['subplots'] = False size = len(g) nrows = round(np.sqrt(size),0) ncols = np.ceil(size/nrows) i=1 for n,df in g: ax = self.fig.add_subplot(nrows,ncols,i) kwargs['legend'] = False #remove axis legends d=df.drop(by,1) #remove grouping columns self._doplot(d, ax, kind, False, errorbars, useindex, kwargs) ax.set_title(n) handles, labels = ax.get_legend_handles_labels() i+=1 self.fig.legend(handles, labels, loc='center right')#, bbox_to_anchor=(1, 0.5)) self.fig.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1, hspace=.25) axs = self.fig.get_axes() #self.canvas.draw() else: axs = self._doplot(data, ax, kind, kwds['subplots'], errorbars, useindex, kwargs) if table == True: from pandas.tools.plotting import table if self.table.child != None: tabledata = self.table.child.model.df table(axs, np.round(tabledata, 2), loc='upper right', colWidths=[0.1 for i in tabledata.columns]) #set options general for all plot types #annotation optons are separate lkwds = self.labelopts.kwds.copy() lkwds.update(kwds) self.setFigureOptions(axs, lkwds) scf = 12/kwds['fontsize'] try: self.fig.tight_layout() self.fig.subplots_adjust(top=0.9) except: self.fig.subplots_adjust(left=0.1, right=0.9, top=0.89, bottom=0.1, hspace=.4/scf, wspace=.2/scf) print ('tight_layout failed') self.canvas.draw() return
circ4 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[3],alpha=a_val) circ5 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[4],alpha=a_val) circ6 = Line2D([0], [0], linestyle="none", marker="o", markersize=10, markerfacecolor=colors[5],alpha=a_val) ax.legend((circ1, circ2, circ3,circ4, circ5,circ6), ("sand", "sand/gravel", "gravel/sand","gravel","gravel/boulders","boulders"), numpoints=1, loc='best', borderaxespad=0.) #bbox_to_anchor=(0.3, 0.9), #ax2 = fig.add_subplot(2,2,3) ax2 = plt.subplot2grid((5,2),(4, 0)) ax2.xaxis.set_visible(False) ax2.yaxis.set_visible(False) for sp in ax2.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) the_table = table(ax2, pivot_table.round(3),loc='upper right',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1]) the_table.set_fontsize(10) #plt.savefig(r"C:\workspace\Merged_SS\window_analysis\10_percent_shift\output\ss_visual_seg_2014_09_R01767.png", dpi=1000) ss_raster = r"C:\workspace\Merged_SS\window_analysis\raster\ss_10_rasterclipped.tif" ds = gdal.Open(ss_raster) data = ds.GetRasterBand(1).ReadAsArray() data[data<=0] = np.nan gt = ds.GetGeoTransform() proj = ds.GetProjection() xres = gt[1] yres = gt[5] # get the edge coordinates and add half the resolution
ax.add_collection(PatchCollection(sg_patch, facecolor=colors[3], alpha=a_val, edgecolor="none", zorder=10)) ax.add_collection(PatchCollection(g_patch, facecolor=colors[2], alpha=a_val, edgecolor="none", zorder=10)) ax.add_collection(PatchCollection(sr_patch, facecolor=colors[1], alpha=a_val, edgecolor="none", zorder=10)) ax.add_collection(PatchCollection(r_patch, facecolor=colors[0], alpha=a_val, edgecolor="none", zorder=10)) ax.legend( (circ1, circ2, circ3, circ4, circ5), ("rock", "sand/rock", "gravel", "sand/gravel", "sand"), numpoints=1, loc="best" ) print "Now plotting focal statistics..." ax = plt.subplot2grid((5, 2), (4, 0)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.itervalues(): sp.set_color("w") sp.set_zorder(0) the_table = table(ax, tbl_28.round(3), loc="best", colWidths=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) the_table.set_fontsize(12) ax = plt.subplot2grid((5, 2), (4, 1)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.itervalues(): sp.set_color("w") sp.set_zorder(0) the_table = table(ax, tbl_31.round(3), loc="best", colWidths=[0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]) the_table.set_fontsize(12) plt.tight_layout() # plt.show() print "Now Saving figure..." plt.savefig(r"C:\workspace\Reach_4a\Multibeam\mb_sed_class\output\mb_sed_class_ground_truth_3m_agg_dist.png", dpi=600)
merchant_spend_legend = merchant_spend.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=2, mode="expand", borderaxespad=0.) mdb_spend = merchant_spend.twinx() mdb_spend.set(ylabel=("MDB Spend(£000s)")) mdb_spend.get_yaxis().set_major_formatter(FuncFormatter(lambda x, p: format(int(x/1000), ','))) mdb_spend_figures = test_file["MDB Spend Figures"] mdb_spend_series = mdb_spend.plot(reporting_period, mdb_spend_figures, "r") mdb_spend_legend = mdb_spend.legend(frameon=False, bbox_to_anchor=(0., 1.006, 1., .1), loc='center right', borderaxespad=0.) #Second plot - data table showing the data with currency mark data_table = fig.add_subplot(2,2,2) data_table.axis('off') the_table = table(data_table, test_file[["Reported Figures (£)", "MDB Spend Figures (£)"]], rowLabels=xlabels, loc='center') the_table.auto_set_font_size(False) the_table.set_fontsize(10) #Third plot - correlation scatter plot of reported spend vs mdb spend with line of best fit correlation_graph = fig.add_subplot(2,2,3) correlation_graph.set(title=("Correlation"), xlabel=("MDB Spend(£000s)"), ylabel=("Reported Spend(£000s)")) correlation_graph.get_yaxis().set_major_formatter(FuncFormatter(lambda x, p: format(int(x/1000), ','))) correlation_graph.get_xaxis().set_major_formatter(FuncFormatter(lambda x, p: format(int(x/1000), ','))) correlation_graph.scatter(mdb_spend_figures,reported_figures) # calc the trendline (linear) z = np.polyfit(mdb_spend_figures, reported_figures, 1) p = np.poly1d(z) correlation_graph.plot(mdb_spend_figures,p(mdb_spend_figures),"r--") r, p_value = pearsonr(mdb_spend_figures, reported_figures)
print "Total Datasets", len(datalist) print "Filenames", len(namelist) print "" if len(datalist) != len(namelist): print "Inconsitent length of data and corresponing names" sys.exit() for item in datalist: title = str(namelist[i]) + " " + str(len(item)) + " runs" print title, np.mean(item) fig, axs = pl.subplots(2,2) fig.suptitle(title, fontsize=14, fontweight='bold') item[['Decomp-SOMs','ticks']].plot(kind='hist', legend=True, bins=20, alpha=0.5,ax=axs[0][0]) item[['Decomp-SOMs','Necromass', 'ticks']].plot(kind='box',ax=axs[1][0]) table(axs[1][1], np.round(item[['Necromass', 'SOMs', 'ticks', 'Hotspots']].describe(), 0), loc='upper right', colWidths=[0.1, 0.1, 0.1, 0.1]) item.plot(kind='hexbin', x='SOMs', y='Necromass', C='ticks', reduce_C_function=np.max,gridsize=15, ax=axs[1][1]) item.plot(kind='hexbin', x='ticks', y='Necromass', C='SOMs', reduce_C_function=np.max,gridsize=20, ax=axs[0][1]) #bootstrap_plot(item['Necromass'], size=50, samples=500, color='grey') i += 1 #pl.show() print "" pl.show() '''setting for run which made the histogram for the PhD Seminar (pore like distribution) x = 0 y = 0 z = 4.5
tex_data_160[tex_data_160<=0] = np.nan del ds df_10 = convert_to_dataframe(tex_data_10) df_20 = convert_to_dataframe(tex_data_20) df_40 = convert_to_dataframe(tex_data_40) df_80 = convert_to_dataframe(tex_data_80) df_160 = convert_to_dataframe(tex_data_160) bin_s=[0.05,0.1,0.15,0.2,0.25,0.3,0.35,0.4,0.45,0.5,0.55,0.60,0.65,0.70,0.75,0.80,0.85,0.90,0.95,1.0] fig = plt.figure(figsize=(12,3)) ax1 = fig.add_subplot(1,5,1) df_10.plot(ax = ax1, kind='hist',bins=bin_s, legend=False) table(ax1, np.round(df_10.describe(),3), loc='upper right', colWidths=[0.2]) ax1.set_ylabel('frequency') ax1.set_xlabel('Texture Lengthscale (m)') ax1.set_title('10 Pixel Window') ax2 = fig.add_subplot(1,5,2) df_20.plot(ax=ax2,kind='hist',bins=bin_s, legend=False) table(ax2, np.round(df_20.describe(),3), loc='upper right', colWidths=[0.2]) ax2.set_ylabel('frequency') ax2.set_xlabel('Texture Lengthscale (m)') ax2.set_title('20 Pixel Window') ax3 = fig.add_subplot(1,5,3) df_40.plot(ax=ax3,kind='hist',bins=bin_s, legend=False) table(ax3, np.round(df_40.describe(),3), loc='upper right', colWidths=[0.2]) ax3.set_ylabel('frequency')
df_50 = convert_to_dataframe(tex_data_50) df_55 = convert_to_dataframe(tex_data_55) df_60 = convert_to_dataframe(tex_data_60) df_65 = convert_to_dataframe(tex_data_65) df_70 = convert_to_dataframe(tex_data_70) df_80 = convert_to_dataframe(tex_data_80) df_120 = convert_to_dataframe(tex_data_120) df_160 = convert_to_dataframe(tex_data_160) bin_s = list(np.arange(0,3.25,0.05)) fig = plt.figure(figsize=(22,3)) ax1 = fig.add_subplot(1,8,1) df_50.plot(ax = ax1, kind='hist',bins=bin_s, legend=False) table2 = table(ax1, np.round(df_50.describe(),3), loc='upper right', colWidths=[0.2]) table2.auto_set_font_size(False) table2.set_fontsize(4) ax1.set_ylabel('frequency') ax1.set_xlabel('Texture Lengthscale (m)') ax1.set_title('50 Pixel Window') ax1 = fig.add_subplot(1,8,2) df_55.plot(ax = ax1, kind='hist',bins=bin_s, legend=False) table2 = table(ax1, np.round(df_55.describe(),3), loc='upper right', colWidths=[0.2]) table2.auto_set_font_size(False) table2.set_fontsize(4) ax1.set_ylabel('frequency') ax1.set_xlabel('Texture Lengthscale (m)') ax1.set_title('55 Pixel Window')
ax.add_collection(PatchCollection(s_patch, facecolor = colors[0],alpha=a_val, edgecolor='none',zorder=10)) ax.add_collection(PatchCollection(sg_patch, facecolor = colors[1],alpha=a_val, edgecolor='none',zorder=10)) ax.add_collection(PatchCollection(g_patch, facecolor = colors[2],alpha=a_val, edgecolor='none',zorder=10)) ax.add_collection(PatchCollection(sr_patch, facecolor = colors[3],alpha=a_val, edgecolor='none',zorder=10)) ax.add_collection(PatchCollection(r_patch, facecolor = colors[4],alpha=a_val, edgecolor='none',zorder=10)) ax.legend((circ1, circ2, circ3,circ4,circ5),('rock','sand/rock','Gravel','Sand/Gravel','sand'),numpoints=1, loc='best') print 'Now plotting focal statistics...' ax = plt.subplot2grid((5,2),(4, 0)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) the_table = table(ax, pivot_table_28.round(3),loc='center left',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1]) ax = plt.subplot2grid((5,2),(4, 1)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) for sp in ax.spines.itervalues(): sp.set_color('w') sp.set_zorder(0) the_table = table(ax, pivot_table_31.round(3),loc='center left',colWidths=[0.1,0.1,0.1,0.1,0.1,0.1,0.1]) plt.tight_layout() plt.show() print 'Now Saving figure...' plt.savefig(r"C:\workspace\Reach_4a\Multibeam\mb_sed_class\output\mb_aug_may_comparison_diverging_cmap.png",dpi=1000)
def plot2D(self): """Plot method for current data. Relies on pandas plot functionality if possible. There is some temporary code here to make sure only the valid plot options are passed for each plot kind.""" if not hasattr(self, "data"): return # needs cleaning up valid = { "line": [ "alpha", "colormap", "grid", "legend", "linestyle", "linewidth", "marker", "subplots", "rot", "logx", "logy", "sharey", "kind", ], "scatter": [ "alpha", "grid", "linewidth", "marker", "subplots", "s", "legend", "colormap", "sharey", "logx", "logy", "use_index", "c", "cscale", "colorbar", "bw", ], "pie": ["colormap", "legend"], "hexbin": ["alpha", "colormap", "grid", "linewidth"], "bootstrap": ["grid"], "bar": [ "alpha", "colormap", "grid", "legend", "linewidth", "subplots", "sharey", "logy", "stacked", "rot", "kind", ], "barh": ["alpha", "colormap", "grid", "legend", "linewidth", "subplots", "stacked", "rot", "kind", "logx"], "histogram": [ "alpha", "linewidth", "grid", "stacked", "subplots", "colormap", "sharey", "rot", "bins", "logx", "logy", ], "heatmap": ["colormap", "rot"], "area": ["alpha", "colormap", "grid", "linewidth", "legend", "stacked", "kind", "rot", "logx"], "density": [ "alpha", "colormap", "grid", "legend", "linestyle", "linewidth", "marker", "subplots", "rot", "kind", ], "boxplot": ["rot", "grid", "logy", "colormap", "alpha", "linewidth"], "scatter_matrix": ["alpha", "linewidth", "marker", "grid", "s"], "contour": ["linewidth", "colormap", "alpha"], "imshow": ["colormap", "alpha"], } data = self.data if self._checkNumeric(data) == False: self.showWarning("no numeric data to plot") return # get all options from the mpl options object kwds = self.mplopts.kwds kind = kwds["kind"] table = kwds["table"] by = kwds["by"] by2 = kwds["by2"] errorbars = kwds["errorbars"] useindex = kwds["use_index"] bw = kwds["bw"] # valid kwd args for this plot type kwargs = dict((k, kwds[k]) for k in valid[kind] if k in kwds) # initialise the figure self._initFigure() ax = self.ax # plt.style.use('dark_background') if by != "": # groupby needs to be handled per group so we can add all the axes to # our figure correctly if by not in data.columns: self.showWarning("the grouping column must be in selected data") return if by2 != "" and by2 in data.columns: by = [by, by2] g = data.groupby(by) if len(g) > 30: self.showWarning("too many groups to plot") return self.ax.set_visible(False) kwargs["subplots"] = False size = len(g) nrows = round(np.sqrt(size), 0) ncols = np.ceil(size / nrows) i = 1 for n, df in g: ax = self.fig.add_subplot(nrows, ncols, i) kwargs["legend"] = False # remove axis legends d = df.drop(by, 1) # remove grouping columns self._doplot(d, ax, kind, False, errorbars, useindex, bw=bw, kwargs=kwargs) ax.set_title(n) handles, labels = ax.get_legend_handles_labels() i += 1 # single plot """cmap = plt.cm.get_cmap(kwargs['colormap']) colors = [] names = [] for n,df in g: ax = self.ax kwargs['legend'] = False #remove axis legends d = df.drop(by,1) #remove grouping columns self._doplot(d, ax, kind, False, errorbars, useindex, bw=bw, kwargs=kwargs) names.append(n) handles, labels = ax.get_legend_handles_labels() print (labels) labels = [l+' '+n for l in labels] i+=1""" self.fig.legend(handles, labels, loc="center right") self.fig.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1, hspace=0.25) axs = self.fig.get_axes() # self.ax = axs[0] else: axs = self._doplot(data, ax, kind, kwds["subplots"], errorbars, useindex, bw=bw, kwargs=kwargs) if table == True: from pandas.tools.plotting import table if self.table.child != None: tabledata = self.table.child.model.df table(axs, np.round(tabledata, 2), loc="upper right", colWidths=[0.1 for i in tabledata.columns]) # set options general for all plot types # annotation optons are separate lkwds = self.labelopts.kwds.copy() lkwds.update(kwds) self.setFigureOptions(axs, lkwds) scf = 12 / kwds["fontsize"] try: self.fig.tight_layout() self.fig.subplots_adjust(top=0.9) except: self.fig.subplots_adjust(left=0.1, right=0.9, top=0.89, bottom=0.1, hspace=0.4 / scf, wspace=0.2 / scf) print("tight_layout failed") # redraw annotations self.labelopts.redraw() self.canvas.draw() return
workspace += 'Data Mining & Text Mining\\kaggle competitions\\san francisco crime' os.chdir(workspace) train = './train.csv' crimeData = pd.read_csv(train, parse_dates=['Dates'], index_col='Dates', delimiter=',') workspace = "C:\\Users\\Giammi\\OneDrive\\Università\\Machine Learning\\project" os.chdir(workspace) head = crimeData.head(5) ax = plt.subplot(411, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, head) # where head is your data frame plt.savefig('mytable.png') # INSPECTION ====================================================================================== pylab.rcParams['figure.figsize'] = (14.5, 6.0) crimes_rating = crimeData['Category'].value_counts() print ('San Francisco Crimes\n') print ('Category\t\tNumber of occurences') print (crimes_rating) top = 18 y_pos = np.arange(len(crimes_rating[0:top].keys()))