def lcplot_withsummary(axs, lc, trace, summary_kwargs={}, lc_kwargs={}): lcplot(axs[0], lc, trace=trace) axs[1].xaxis.set_visible(False) axs[1].yaxis.set_visible(False) table(axs[1], pm.summary(trace).round(3)) pl.tight_layout() return axs
def plot_point_sets( points_list, Div_mat, diversity_names ): #, diversity_measure_list, diversity_measure_name_list): """ Plot the points in the points list with the diversity measurement :param points_list: the list of points :param diversity_measure_name_list: The list of diversity measures :param diversity_measure_list: The list of names of diversity measures to show at legend """ df = pd.DataFrame(Div_mat) #, columns = diversity_names) df.columns = diversity_names for cnt, points in enumerate(points_list): #df = pd.DataFrame(np.transpose(Div_mat[cnt,:]))#, columns = diversity_names) #df.info() #df = df.T #df.columns = diversity_names f = plt.figure() ax = plt.gca() table(ax, np.round(df.iloc[cnt, :], 2), loc='upper right', colWidths=[0.05]) plt.scatter(points[:, 0], points[:, 1]) plt.xlabel('x1') plt.ylabel('x2') plt.title(str(cnt)) plt.xlim(0, 1) plt.ylim(0, 1) f.savefig('{}.png'.format(cnt))
def PlotDFrames(df): print(type(df)) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df.head(10)) plt.savefig('LAT,LONG,DIRECTION.png')
async def display(self, ctx): """Displays the boys with their current stats""" all_boys = boys.find({}) names = [] points = [] bcards = [] for boy in all_boys: names.append(boy["name"]) points.append(boy["points"]) bcards.append(boy["bitchcard"]) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis df = pd.DataFrame({ 'Names': names, 'Ferda Points': points, 'Bitch Cards': bcards }) table(ax, df.sort_values(by=['Ferda Points'], ascending=False), rowLabels=[''] * df.shape[0], loc='center') #Hide the index in the dataframe plt.savefig("ferdatable.png") await ctx.send(file=discord.File('ferdatable.png'))
def describe_dataframe(df, name): """ Obtains the basic statistical descriptors for the given DataFrame. Parameters ---------- df : pd.DataFrame DataFrame from which is going to be obtained the information. name : str Name of the DataFrame. Returns ------- None. """ desc = df.describe() desc.to_csv(name + '.csv') plt.figure(figsize = (30, 20)) plot = plt.subplot(111, frame_on=False) plot.xaxis.set_visible(False) plot.yaxis.set_visible(False) table(plot, desc,loc='upper right') plt.savefig(name + '.png')
def plot_timeline_table(gf): """Plot dinosar inventory acquisitions as a timeline with a table. Parameters ---------- gf : GeoDataFrame A geopandas GeoDataFrame """ dfA = gf.query('platform == "Sentinel-1A"') dfAa = dfA.query(' flightDirection == "ASCENDING" ') dfAd = dfA.query(' flightDirection == "DESCENDING" ') dfB = gf.query('platform == "Sentinel-1B"') dfBa = dfB.query(' flightDirection == "ASCENDING" ') dfBd = dfB.query(' flightDirection == "DESCENDING" ') # summary table dfS = gpd.pd.DataFrame(index=gf.relativeOrbit.unique()) dfS['Start'] = gf.groupby('relativeOrbit').sceneDateString.min() dfS['Stop'] = gf.groupby('relativeOrbit').sceneDateString.max() dfS['Dates'] = gf.groupby('relativeOrbit').sceneDateString.nunique() dfS['Frames'] = gf.groupby('relativeOrbit').sceneDateString.count() dfS['Direction'] = gf.groupby('relativeOrbit').flightDirection.first() dfS['UTC'] = gf.groupby('relativeOrbit').utc.first() dfS.sort_index(inplace=True, ascending=False) dfS.index.name = 'Orbit' # Same colors as map orbits = gf.relativeOrbit.unique() colors = plt.cm.jet(np.linspace(0, 1, orbits.size)) fig, ax = plt.subplots(figsize=(11, 8.5)) plt.scatter(dfAa.timeStamp.values, dfAa.orbitCode.values, c=colors[dfAa.orbitCode.values], cmap='jet', s=60, facecolor='none', label='S1A') plt.scatter(dfBa.timeStamp.values, dfBa.orbitCode.values, c=colors[dfBa.orbitCode.values], cmap='jet', s=60, facecolor='none', marker='d', label='S1B') plt.scatter(dfAd.timeStamp.values, dfAd.orbitCode.values, c=colors[dfAd.orbitCode.values], cmap='jet', s=60, label='S1A') plt.scatter(dfBd.timeStamp.values, dfBd.orbitCode.values, c=colors[dfBd.orbitCode.values], cmap='jet', s=60, marker='d', label='S1B') plt.yticks(gf.orbitCode.unique(), gf.relativeOrbit.unique()) table(ax, dfS, loc='top', zorder=10, fontsize=12, cellLoc='center', rowLoc='center', bbox=[0.1, 0.7, 0.6, 0.3]) # [left, bottom, width, height]) ax.xaxis.set_minor_locator(MonthLocator()) ax.xaxis.set_major_locator(YearLocator()) plt.legend(loc='upper right') plt.ylim(-1, orbits.size+3) plt.ylabel('Orbit Number') fig.autofmt_xdate() plt.title('Sentinel-1 timeline') plt.savefig('timeline_with_table.pdf', bbox_inches='tight')
def PlotDFrames(df): ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df[10:20]) # AJUSTAR EL DATAFRAME !!! plt.savefig('LAT,LONG,DIRECTION.png')
def get_img(df): ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df) # where df is your data frame plt.savefig('mytable.png')
def acfplot_withsummary(axs, lc, trace, summary_kwargs={}, acf_kwargs={}): summary = pm.summary(trace) summary['mode'] = list(utils.modes(trace).values()) plotacf(axs[0], lc, **acf_kwargs) axs[1].xaxis.set_visible(False) axs[1].yaxis.set_visible(False) table(axs[1], summary.round(3)) pl.tight_layout() return axs
def print_summary(self, save_file = None): trace_summary = pm.summary(self.trace) print(trace_summary) if save_file is not None: ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, trace_summary, loc='upper right') # where df is your data frame plt.savefig(save_file)
def plot_tables(table_dict,filename): df = pd.DataFrame.from_dict(table_dict, orient='index') ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df) # where df is your data frame plt.savefig(filename,bbox_inches='tight',dpi=250) plt.show()
def make_image(df, name): ax = plt.subplot(111, frame_on=False) # no visible frame #ax.xaxis.set_visible(False) # hide the x axis #ax.yaxis.set_visible(False) # hide the y axis table(ax, df) # where df is your data frame file_name = f'{name}_picture.png' plt.savefig(file_name) return file_name
def __df_to_png(self, df, file_path): # Clear prev sub plot subplots(clear=True) matplotlib.rc('figure', dpi=160) ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, df, loc='center') # where df is your data frame savefig(file_path) return self.response
def plot_text_to_png(data, path, figsize=(12, 10)): fig, ax = plt.subplots(figsize=figsize) if isinstance(data, pd.DataFrame): table(ax, data, loc='upper left') else: plt.text(0.01, 0.05, str(data), {'fontsize': 10}, fontproperties='monospace') plt.axis('off') plt.tight_layout() plt.savefig(path)
def getAsGif(): import matplotlib.pyplot as plt from pandas.plotting import table ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis table(ax, server_list) # where df is your data frame plt.savefig('mytable.png') os.rename('mytable.png', 'mytable.gif') return static_file('mytable.gif')
def to_table(df): """ Need to pass the rows, columns data etc to table and create axis to create the table I want. But honestly the to latex function is much better. :param df: :return: """ from pandas.plotting import table # table() # df = df.astype(str) table(data=df.values) table(df) df.plot(table=True)
def mcts_variance(game_state: GameState, cheater: bool, options: MctsPlayerOptions, num_samples: int): dataframe = _get_dataframe(game_state, cheater, options, num_samples) print(dataframe.describe()) dataframe.boxplot() details = dataframe.describe() details.columns = [""] * len(details.columns) table(plt.gca(), np.round(details, 2)) plt.gca().xaxis.tick_top() plt.xticks(rotation=45, ha='left') plt.gcf().set_size_inches((5, 10)) plt.tight_layout() plt.savefig("mcts_variance.png")
def add_raw_table(fig, df, mouse_name, day): tab = df.xs((mouse_name, day), level=[1, 2], drop_level=False) tab.reset_index(level=['event_type', 'error_code'], inplace=True) yb, h = -0.3, 0.3 tot = tab.instances.values.max() if tot > 5: yb -= 0.1 * (tot / 5.) h += 0.1 * (tot / 5.) ax2 = fig.add_axes([0.2, yb, 0.5, 0.3]) table(ax2, tab, bbox=(0.2, 0, 0.8, 1)) ax2.xaxis.set_visible(False) ax2.yaxis.set_visible(False)
def hangman_show_rating(bot, message): fig, ax = plt.subplots(1, frameon=False, figsize=(6, 4), edgecolor='black') # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis conn = sqlite3.connect('hangman/final_score.bd') c = conn.cursor() c.execute('SELECT * FROM score') players = c.fetchall() conn.close() percent_of_wins = numpy.zeros(len(players)) N_games = numpy.zeros(len(players)) for i in range(len(players)): percent_of_wins[i] = players[i][3] N_games[i] = players[i][2] players = numpy.asarray(players) index = numpy.flip(numpy.argsort(percent_of_wins)) players = players[index][:10] percent_of_wins = percent_of_wins[index][:10] N_games = N_games[index][:10] for i in range(len(percent_of_wins)): if i != 0 and percent_of_wins[i] == percent_of_wins[i - 1]: continue sel = (percent_of_wins == percent_of_wins[i]) temp2 = N_games[sel] index_flip = numpy.flip(numpy.argsort(temp2)) players[sel] = players[sel][index_flip] columns = ['Name', 'W', 'G', '%'] index = range(1, len(players) + 1, 1) dataframe = pandas.DataFrame(data=players[:, 0:4], index=index, columns=columns) table(ax, dataframe, loc='center', colWidths=[0.2, 0.1, 0.1, 0.1]) # where df is your data frame plt.tight_layout(rect=(-0.35, -0.3, 1.35, 1.3)) plt.savefig('hangman/results.png', dpi=150) plt.close(fig) with open('hangman/results.png', 'rb') as result: bot.send_photo(message.chat.id, result) result.close()
def count_of_each_categories(self, y_train=None, y_test=None, technology_segment=None): """ Shows the plot of the training and test split data :param y_train: Takes the Y_train data. Used to train the model :param y_test: Takes the Y_test data. This is the data which will be further used to find the accuracy of the model. :return: saves the plot in a particular path """ df_train = pd.DataFrame() df_test = pd.DataFrame() # Converting into proper pd dataframe df_train[technology_segment] = y_train df_test[technology_segment] = y_test #Getting the value count of each category/technology segment for training data df = pd.DataFrame() df['freq'] = df_train[technology_segment].value_counts() # ax = plt.subplot(111, frame_on=False) # no visible frame # ax.xaxis.set_visible(False) # hide the x axis # ax.yaxis.set_visible(False) # hide the y axis #Plotting the a graph of training data split fig, ax = plt.subplots(1, 1) table(ax, df, loc='upper right', colWidths=[0.2, 0.2, 0.2]) df.plot(ax=ax) # table(ax, df,cellLoc = 'center', rowLoc = 'center', # loc='down') # where df is your data frame # plt.show() plt.savefig( r"/Users/nitesh/OneDrive/Work/GE_Python_Workspace/ClassifierApproaches/Excel Documents/" r"Fintech/train_data_split.png") # Getting the value count of each category/technology segment for test data df = pd.DataFrame() df['freq'] = df_test[technology_segment].value_counts() # Plotting the a graph of test data split fig, ax = plt.subplots(1, 1) table(ax, df, loc='upper right', colWidths=[0.2, 0.2, 0.2]) df.plot(ax=ax) plt.savefig( r"/Users/nitesh/OneDrive/Work/GE_Python_Workspace/ClassifierApproaches/Excel Documents/" r"Fintech/test_data_split.png")
def plot_precision_recall_f1_table(self, description=None, df=None, save_image=True, file_name="precision_recall.png"): if df is None: df = self._get_precision_recall_f1_df() plt.figure() ax = plt.subplot(111, frame_on=False) # no visible frame ax.xaxis.set_visible(False) # hide the x axis ax.yaxis.set_visible(False) # hide the y axis if not isinstance(description, list): description = [description] table(ax, df, description) # where df is your data frame self.plot_or_save(plt, save_image, file_name)
def _make_table(self, ax, df, title, height=None): if df is None: ax.set_visible(False) return import pandas.plotting as plotting idx_nlevels = df.index.nlevels col_nlevels = df.columns.nlevels # must be convert here to get index levels for colorization df = self._insert_index(df) tb = plotting.table(ax, df, loc=9) tb.set_fontsize(self.font_size) if height is None: height = 1.0 / (len(df) + 1) props = tb.properties() for (r, c), cell in compat.iteritems(props['celld']): if c == -1: cell.set_visible(False) elif r < col_nlevels and c < idx_nlevels: cell.set_visible(False) elif r < col_nlevels or c < idx_nlevels: cell.set_facecolor('#AAAAAA') cell.set_height(height) ax.set_title(title, size=self.font_size) ax.axis('off')
def pca_figure(df, atts, compare_groups, saveloc=None): from pandas.plotting import table print(atts) x = df.reset_index(drop=True) g, dfx, pca = make_pca_figures(x, atts, compare_groups) plt.gca().legend(loc='upper left') def find_points(x): return [x['principal component 1'], x['principal component 2']] palette = itertools.cycle(sns.color_palette()) for u in dfx['Sample Type'].unique(): points = list(dfx[dfx['Sample Type'] == u].apply(find_points, axis=1)) hull = ConvexHull(points) points = np.array(points) plot_point_cov(points, nstd=1.5, alpha=0.3, color=next(palette), ax=g.ax) print(pca.components_.T) d = np.around( pd.DataFrame( pca.components_.T, columns=['PC-1', 'PC-2'], index=['Width', 'Length', 'Depth', 'Volume', 'Surface A .']), 2) p = np.around(d.values, 2) # p = abs(p) normalized = (p - p.min()) / (p.max() - p.min()) mtable = table(plt.gca(), d, loc='right', colWidths=[0.2, 0.2, 0.2], zorder=3, bbox=(1.2, 0.8, 0.3, 0.2)) table_props = mtable.properties() table_cells = table_props['child_artists'] for cell in table_cells: cell.set_width(0.2) plt.subplots_adjust(right=0.7) plt.gcf().suptitle('') # this is modified for the extra atts, plt.gcf().savefig(saveloc.replace('pdf', '-L_W.png')) plt.gcf().savefig(saveloc) # for simplex in hull.simplices: # g.ax.plot(hull.points[simplex, 0], hull.points[simplex, 1], 'k-') # g.ax.fill(points[hull.vertices, 0], # points[hull.vertices, 1], color=next(palette), alpha=0.5) # plt.show(block=False) return g, dfx, pca
def graph_most_upvoted_comments(commentsDataFrame): udf_get_name = udf(get_name, StringType()) comments_data_frame_converted = commentsDataFrame.withColumn('author', udf_get_name('author')) comments_by_score = comments_data_frame_converted.orderBy(desc('score')).limit(10).toPandas() comments_by_score['body'] = comments_by_score['body'].str.wrap(140) comments_by_score.rename(columns={'body': 'comment'}) comments_by_score.set_index('body') ax2 = plt.subplot2grid((8,3), (4,0), rowspan=4, colspan=3, frame_on=False) ax2.xaxis.set_visible(False) ax2.yaxis.set_visible(False) ax2.axis('off') tab = table(ax2, comments_by_score, loc='upper center', cellLoc='left') tab.auto_set_font_size(False) tab.set_fontsize(17) cell_dict=tab.get_celld() for i in range(11): cell_dict[(i,0)].set_width(0.9) cell_dict[(i,0)].set_height(0.1) cell_dict[(i,1)].set_width(0.06) cell_dict[(i,1)].set_height(0.1) cell_dict[(i,2)].set_width(0.2) cell_dict[(i,2)].set_height(0.1) cell_dict[(0,0)].set_height(0.03) cell_dict[(0,1)].set_height(0.03) cell_dict[(0,2)].set_height(0.03)
def plot_freqency(self, mapping): honeypot = [ self.ip_encoder('sip: 160.26.57.181'), self.ip_encoder('sip: 160.26.57.192'), self.ip_encoder('sip:160.26.57.203') ] plt.figure(figsize=(12, 10)) ax1 = plt.subplot(121, title=self.signature.split('/')[-1]) fre_base = mapping.query(f'ip_src not in {honeypot}') fre_base = fre_base[fre_base['ip_id'].duplicated()] fre_base.plot(kind='scatter', x=self.xlabel, y='ip_src', s=0.5, ax=ax1) frequent = fre_base['ip_src'].map(self.ip_decoder).value_counts()[:30] fre_base['ip_src'] = fre_base['ip_src'].map(self.ip_decoder) ftmp = fre_base.set_index('ip_src') ftmp = ftmp.join(frequent).sort_values('ip_src', ascending=False) ip_info = pd.concat([ftmp['ip_src'], ftmp['country'], ftmp['ASN']], axis=1) ip_info = ip_info[~ip_info.duplicated()] ip_info = ip_info.iloc[:30, :] ax2 = plt.subplot(122, title='freqency') plt.axis('off') tb = table(ax2, ip_info, cellLoc='center', loc='center', rowLoc='center', colWidths=[0.2, 0.3, 0.3], fontsize=13)
def file_reader(request): chart_type = radio(request) pivot_data = file_data.pivot_table(index='Technology', aggfunc=[sum], fill_value=0) p = pivot_data.plot(kind=chart_type) plt.axis('off') tbl = table(p, pivot_data, loc='bottom') tbl.auto_set_font_size(False) tbl.set_fontsize(14) plt.tight_layout() print('\n') plt.title('Job Opening In Jan According To Technologies') plt.ylabel('No Of Opening') plt.xlabel('Technology Name') plt.legend().set_visible(False) # fig = matplotlib.pyplot.gcf() # plt.figure(figsize=(8, 18)) buffer = io.BytesIO() plt.savefig(buffer, format='png') plt.figure(figsize=(10, 6), dpi=300) # plt.rcParams["figure.figsize"] = [16, 9] buffer.seek(0) image_png = buffer.getvalue() buffer.close() graphic = base64.b64encode(image_png) graphic = graphic.decode('utf-8') return render(request, 'upload/question.html', {'graphic': graphic})
def globalSalesTrend(merged_data, result_path): """ Saves a most sale genre and its global sales in millions as a .png file; And another 12 line graphes (3 by 4) in one file displaying the global sales trend among game genres. It will take in a merged dataset, merged_data, and a string as a output file path, result_path. """ global_sales = merged_data[['Genre', 'Year_of_Release', 'Global_Sales']] sum_global_sales = global_sales.groupby(['Genre', 'Year_of_Release']).sum() ave_global_sales = sum_global_sales.groupby(['Genre']).mean() most_selling_genre = ave_global_sales[ave_global_sales.Global_Sales == ave_global_sales.Global_Sales.max()] fig, ax = plt.subplots(figsize=(4, 1)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) most_selling_genre = table(ax, most_selling_genre, loc='center right', colWidths=[0.75] * len(most_selling_genre.columns)) most_selling_genre.auto_set_font_size(False) most_selling_genre.set_fontsize(10) plt.savefig(result_path + 'most_selling_genre.png', transparent=True) fig, ax = plt.subplots(nrows=3, ncols=4, figsize=(25, 10)) sum_global_sales.unstack('Genre').plot(subplots=True, ax=ax, xlim=[1995, 2016], ylim=[0, 300]) plt.suptitle('Global Sales trend (in millions) from 1996 to 2015)', fontsize=20) plt.subplots_adjust(wspace=0.3, hspace=0.3) plt.savefig(result_path + '/global_sales_trend.png', bbox_inches='tight')
def userRatingTrend(merged_data, result_path): """ Saves a higest rating genre and its rate in a 0-10 scale as a .png file; And another 12 line graphes (3 by 4) in one file displaying the user ratings trend among game genres. It will take in a merged dataset, merged_data, and a string as a output file path, result_path. """ user_ratings = merged_data[['Genre', 'Year_of_Release', 'User_Score']] ave_user_ratings = user_ratings.groupby(['Genre', 'Year_of_Release']).mean() user_ratings = ave_user_ratings.groupby(['Genre']).mean() highest_rating_genre = user_ratings[user_ratings.User_Score == user_ratings.User_Score.max()] fig, ax = plt.subplots(figsize=(4, 1)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) highest_rating_genre = table(ax, highest_rating_genre, loc='center right', colWidths=[0.6] * len(highest_rating_genre.columns)) highest_rating_genre.auto_set_font_size(False) highest_rating_genre.set_fontsize(10) plt.savefig(result_path + 'highest_rating_genre.png', transparent=True) fig, ax = plt.subplots(nrows=3, ncols=4, figsize=(25, 10)) ave_user_ratings.unstack('Genre').plot(subplots=True, ax=ax, xlim=[1995, 2016], ylim=[1, 10]) plt.suptitle('User Ratings (1-10) trend from 1996 to 2015', fontsize=20) plt.subplots_adjust(wspace=0.3, hspace=0.3) plt.savefig(result_path + 'user_rating_trend.png', bbox_inches='tight')
def est_jug(df, jug): """ Elabora una tabla que compara las estadísticas del jugador con la media de su equipo en puntos, rebotes y asistencias """ jugador = df[['nombre', 'iniciales_equipo', 'pts', 'reb', 'ast']][df.nombre == jug] team = jugador.iniciales_equipo.iloc[0] equip = df[['nombre', 'iniciales_equipo', 'pts', 'reb', 'ast']][df.iniciales_equipo == team] jugador.loc[1] = [ "MEDIA EQUIPO", team, round((equip.pts.mean()), 2), round((equip.reb.mean()), 2), round((equip.ast.mean()), 2) ] jugador = jugador.reset_index(drop=True) fig, ax = plt.subplots(figsize=(10, 6)) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) ax.set_frame_on(False) tab = table(ax, jugador, loc='center') tab.auto_set_font_size(False) tab.set_fontsize(9) fig.savefig('output/graficos/est_jug_equipo.png')
def apdex(df): df['total_requests'] = df['fast'] + df['slow'] + df['fail'] print(tabulate(df, headers='keys', tablefmt="grid")) fig, ax = plt.subplots(figsize=(16, 8)) ax1 = plt.subplot(121, aspect='equal') df.plot(kind='pie', y='total_requests', ax=ax1, autopct='%1.1f%%', startangle=90, shadow=False, labels=df['data_center'], legend=False, fontsize=14) # plot table ax2 = plt.subplot(122) plt.axis('off') tbl = table(ax2, df[['data_center', 'total_requests', 'fast', 'slow', 'fail']], loc='center') tbl.scale(1.5, 1.5) tbl.auto_set_font_size(False) tbl.set_fontsize(14) plt.show() fig.savefig('apdex.png')
def create_table_of_data(data, sig=False, index='identifier', save_name=None, plot=False, write_latex=False): """ Creates a summary table of data. Parameters ---------- data : ExperimentalData sig: bool Flag to summarize significant species only save_name: None, str Name to save csv and .tex file index: str Index to create counts plot: bool If you want to create a plot of the table write_latex: bool Create latex file of table Returns ------- pandas.DataFrame """ if sig: data_copy = data.species.sig.copy() else: data_copy = data.species.copy() count_table = data_copy.pivot_table(values=index, index=exp_method, columns=sample_id, fill_value=np.nan, aggfunc=lambda x: x.dropna().nunique()) # This just makes sure things are printed as ints, not floats for i in count_table.columns: count_table[i] = count_table[i].fillna(-1).astype(int).replace(-1, '-') unique_col = {} for i in data.exp_methods: if sig: unique_col[i] = len(set(data[i].sig[index].values)) else: unique_col[i] = len(set(data[i][index].values)) count_table['Total Unique Across'] = pd.Series(unique_col, index=count_table.index) if plot: ax = plt.subplot(111, frame_on=False) table(ax, count_table, loc='center') ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) plt.tight_layout() if save_name is not None: plt.savefig('{}.png'.format(save_name), dpi=300, bbox_inches='tight') if save_name is not None: count_table.to_csv('{}.csv'.format(save_name)) if write_latex and save_name is not None: _write_to_latex(pd_table=count_table, save_name=save_name) return count_table