def production_envelope(self, dataframe, grid=None, width=None, height=None, title=None, points=None, points_colors=None, palette=None, x_axis_label=None, y_axis_label=None): palette = self.get_option('palette') if palette is None else palette width = self.get_option('width') if width is None else width colors = self._palette(palette, len(dataframe.strain.unique())) plot = aes(data=dataframe, ymin="lb", ymax="ub", x="value", color=scale_colour_manual(colors)) + geom_area() if title: plot += geom_tile(title) if x_axis_label: plot += scale_x_continuous(name=x_axis_label) if y_axis_label: plot += scale_y_continuous(name=y_axis_label) return plot
def histogram(self, dataframe, bins=100, width=None, height=None, palette=None, title='Histogram', values=None, groups=None, legend=True): palette = self.__default_options__.get('palette', None) if palette is None else palette return ggplot(dataframe, aes(x=values, fill=groups, color=groups)) + \ geom_histogram(alpha=0.6, breaks=bins, position="fill") + \ self._palette(palette) + \ ggtitle(title) + \ scale_y_continuous(name="Count (%s)" % values)
def signature_data_plot(sd): import ggplot as gg aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r') return gg.ggplot(aes, data=sd) \ + gg.geom_point(size=15) \ + gg.scale_color_gradient(low='yellow', high='red') \ + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \ + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None, yaxis_label=None): color = self.__default_options__.get('palette', None) if color is None else color width = self.__default_options__.get('width', None) if width is None else width gg = ggplot(dataframe, aes(x, y)) + geom_point(color=color, alpha=0.6) + ggtitle(title) if xaxis_label: gg += scale_x_continuous(name=xaxis_label) if yaxis_label: gg += scale_y_continuous(name=xaxis_label) return gg
def histogram(self, dataframe, bins=100, width=None, height=None, palette=None, title='Histogram', values=None, groups=None, legend=True): palette = self.__default_options__.get( 'palette', None) if palette is None else palette return ggplot(dataframe, aes(x=values, fill=groups, color=groups)) + \ geom_histogram(alpha=0.6, breaks=bins, position="fill") + \ self._palette(palette) + \ ggtitle(title) + \ scale_y_continuous(name="Count (%s)" % values)
def _plot_and_save_local_ancestry(df, kmer, image_filename, num_chromosomes, id_vars, x_axis, y_scale): print('saving plot as: {}'.format(image_filename)) var_name='chromosome' local_ancestry_df_long = pd.melt(df, id_vars=id_vars, var_name=var_name, value_name='estimated_ancestry') new_names = {} for i in range(1, num_chromosomes + 1): new_names['test_{}'.format(i)] = 2*i - 2 * y_scale new_names['true_{}'.format(i)] = 2*i - 1 * y_scale for key, value in new_names.items(): local_ancestry_df_long.replace(key, value, inplace=True) plot = ggplot.ggplot(ggplot.aes(x=x_axis, y=var_name, color='estimated_ancestry'), data=local_ancestry_df_long) \ + ggplot.geom_point() \ + ggplot.scale_y_continuous(labels=list(new_names.keys()), breaks=list(new_names.values())) \ + ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \ + ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale plot.save(image_filename)
def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None, yaxis_label=None, label=None): color = self.__default_options__.get('palette', None) if color is None else color width = self.__default_options__.get('width', None) if width is None else width gg = ggplot(dataframe, aes(x, y)) + geom_point( color=color, alpha=0.6) + ggtitle(title) if xaxis_label: gg += scale_x_continuous(name=xaxis_label) if yaxis_label: gg += scale_y_continuous(name=xaxis_label) return gg
p += ggtitle("sarimax coefficient magnitude distribution") p += facet_wrap("feature", ncol=3, scales="free") p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "histogram.png") # boxplot p = ggplot(aes(x='variable', y='value'), data=master_df) p += geom_boxplot() p += scale_y_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitudes") p += facet_wrap("feature", ncol=3) p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "boxplot.png") for feature in [ "home_goal", "away_goal", "home_yellow", "away_yellow", "home_red", "away_red"
# Generate new dataframe with analyses performed per window if options.graphics == True: print "Analysing by "+ str(windowsize) +"sliding windows and generating plots" windowed_df = pd.DataFrame({'window':sorted(list(set(vcfdf['window']))), 'MaxMinor':windowMax(sorted(list(set(vcfdf['window'])))), 'Pi':windowPi(sorted(list(set(vcfdf['window']))))}) # Now try and plot graph p_MaxMinor = gg.ggplot(gg.aes('window', 'MaxMinor'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Minor Variant Frequency (%)") +gg.ggtitle(vcfoutput + "\n Valid Minor Variant Sites :" + str(len(minorvar))) # Plot Nucleotide Diversity (Pi) along genome p_pi =gg.ggplot(gg.aes('window', 'Pi'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Mean nucleotide diversity (" + u"\u03c0" +")") +gg.scale_y_continuous(expand=(0,0),limits=(0, windowed_df['Pi'].max(axis=0)+0.001)) +gg.ggtitle(vcfoutput + "\n Genome-wide Mean Nucleotide Diversity (" +u"\u03c0"+ ") :" +str(round(gw_Pi,6))) #p_pi # Facetted plot (still not sorted y axes labels yet) windowed_df_melt = pd.melt(windowed_df, id_vars=['window']) p_combi = gg.ggplot(gg.aes('window', 'value',colour='variable'),data=windowed_df_melt) p_combi = p_combi + gg.geom_point(colour='variable') + gg.facet_grid('variable',scales='free_y')+gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")") # Print graphs to .png p_combi.save(vcfinput + ".MinorVar_combo.png") p_MaxMinor.save(vcfinput + ".MinorVar.png") p_pi.save(vcfinput + ".Pi-diversity.png")
def new_plot_ancestry_with_correct_results(test, true, y_scale=0.5, image_filename=None): columns_to_ignore = ['POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT'] ### we want only 'POS' and ancestry columns ancestry_cols = list(filter(lambda x: x not in columns_to_ignore, test.columns)) merged = pd.DataFrame(test['POS']) for col_name in ancestry_cols: if col_name not in true: raise KeyError('true ancestry dataframe is missing ancestry for id: {}'.format(col_name)) merged[col_name+'_test'] = test[col_name] merged[col_name+'_true'] = true[col_name] melted = pd.melt(merged, id_vars=['POS'], var_name='chromosome', value_name='ancestry') # the above takes merged from something like this: ### ### columns: POS sample1_test sample1_true sample2_test sample2_true ### 111 pop1 pop1 pop2 pop1 ### 124 pop1 pop1 pop2 pop1 ### # to this: (spaces between rows added for clarity) ### ### columns: POS chromosome ancestry # 111 sample1_test pop1 # 124 sample1_test pop1 # # 111 sample1_true pop1 # 124 sample1_true pop1 # # 111 sample2_test pop2 # 124 sample2_test pop2 # # 111 sample2_true pop1 # 124 sample2_true pop1 spacing = {} for i, col_name in enumerate(ancestry_cols): spacing[col_name+'_test'] = 2*i - 2 * y_scale spacing[col_name+'_true'] = 2*i - 1 * y_scale # taks above example to something like: ### ### columns: POS chromosome ancestry # 111 0 pop1 # 124 0 pop1 # # 111 1 pop1 # 124 1 pop1 # # 111 2 pop2 # 124 2 pop2 # # 111 3 pop1 # 124 3 pop1 for col_name, spacing_val in spacing.items(): melted.replace(col_name, spacing_val, inplace=True) plot = ggplot.ggplot(ggplot.aes(x='POS', y='chromosome', color='ancestry'), data=melted) \ + ggplot.geom_point() \ + ggplot.scale_y_continuous(labels=list(spacing.keys()), breaks=list(spacing.values())) \ + ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \ + ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale if image_filename is not None: plot.save(image_filename) else: plot.show()
def firms_dynamics_plot(decision): data = pd.read_csv(os.path.join( parameters.OUTPUT_PATH, "temp_general_firms_pop_%s_decision_%s_time_%s.txt" % (parameters.pop_redutor, decision, parameters.final_Time)), sep=",", header=None, decimal=",").astype(float) # renaming the collunms names data.columns = [ 'time', 'total_firms', 'average_output', 'average_age', 'average_size', 'new_firms', 'exit_firms', 'max_size', 'total_effort', 'average_effort' ] #logical test to control the process of burn the initial if parameters.time_to_cut_plots > 0: data = data.loc[( data['time']).astype(int) >= parameters.time_to_cut_plots, :] # variable to add in the plot title title_pop_val = float(parameters.pop_redutor) * 100 # create a list of a years to plot list_of_years_division = list( range(int(data['time'].min()), int(data['time'].max()), 12)) + [data['time'].max() + 1] list_of_years = [int(i / 12) for i in list_of_years_division] # graph paramter variables dpi_var_plot = 700 width_var_plot = 15 height_var_plot = 10 ############################################################################################################### # plotting AGENTS UTILITY # Total firms plot_data = gg.ggplot(data, gg.aes('time', 'total_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years) +\ gg.ggtitle('Total firms') + gg.xlab('Years') + gg.ylab('Total of Firms')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_total_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of output plot_data = gg.ggplot(data, gg.aes('time', 'average_output')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of output') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_output_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of age plot_data = gg.ggplot(data, gg.aes('time', 'average_age')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of age of firms') + gg.xlab('Years') + gg.ylab('Age of Firms')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_age_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average of size plot_data = gg.ggplot(data, gg.aes('time', 'average_size')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Average of size of firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_average_size_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # number of new firms plot_data = gg.ggplot(data, gg.aes('time', 'new_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Number of new firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_new_firms_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Number of firms out plot_data = gg.ggplot(data, gg.aes('time', 'exit_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \ gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\ +gg.ggtitle('Number of firms out') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw() # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot gg.ggsave(plot_data, os.path.join(parameters.OUTPUT_PATH, ('temp_general_number_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), width=width_var_plot, height=height_var_plot, units="in") # Average and max size of firms dat_merged = pd.concat([ data.iloc[:, data.columns == 'average_effort'], data.iloc[:, data.columns == 'total_effort'] ], axis=1) plot_data = dat_merged.plot( title='Average and maximum effort of employees') plot_data.set_xlabel('Years') plot_data.set_ylabel('Values units of effort') plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plot_data.set_xticks(list_of_years_division) plot_data.set_xticklabels(list_of_years) plot_data.set_axis_bgcolor('w') fig = plot_data.get_figure() fig.set_size_inches(width_var_plot, height_var_plot) # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot fig.savefig(os.path.join( parameters.OUTPUT_PATH, ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), dpi=dpi_var_plot) dat_merged = pd.concat([ data.iloc[:, data.columns == 'average_size'], data.iloc[:, data.columns == 'max_size'] ], axis=1) plot_data = dat_merged.plot(title='Average and maximum size firms') plot_data.set_xlabel('Years') plot_data.set_ylabel('Number of employees') plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5)) plot_data.set_xticks(list_of_years_division) plot_data.set_xticklabels(list_of_years) plot_data.set_axis_bgcolor('w') fig = plot_data.get_figure() fig.set_size_inches(width_var_plot, height_var_plot) # logical test for presence of plot, if is TRUE is deleted before save the new one if os.path.isfile( os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) is True: os.remove( os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time)))) # saving the plot fig.savefig(os.path.join( parameters.OUTPUT_PATH, ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' % (decision, title_pop_val, parameters.final_Time))), dpi=dpi_var_plot)
def plot_vol(dates, x, cp, my_domain): # -------------------- Prepare for Plotting -------------------------- # # Prepare DataFrame objects for graphing #Add a column for the label to show in the legend in the graph #Need to reshape it, from (124,) to (124,1) for exmple, so that it #will concatenate. This gives a df with [date, vol_data, 'Volume'] v = ['Volume' for i in xrange(x.shape[0])] #df_domain = np.concatenate((x, v), axis=1) ndf_vol = np.transpose(np.array([dates, x, v])) df_vol = pd.DataFrame(ndf_vol, columns=['Date', 'Volume', 'Data']) #Create pre-allocated lists for plotting means and cp xmin_list = [0 for i in xrange(len(cp))] #hold lft pt of vol_mean xmax_list = [0 for i in xrange(len(cp))] #hold rt pt of vol_mean yint_list = [0 for i in xrange(len(cp))] #holds vol_means cp_date_list = [0 for i in xrange(len(cp))] #holds date for cp cp_value_list = [0 for i in xrange(len(cp))] #holds cp value ref_idx = 0 #used to keep track of vol_means #collect list data for plotting for i in xrange(len(cp)): cp_idx = cp[i][0] - 1 #-1 b/c 1-indexed (includes cp itself) xmin_list[i] = dates[ref_idx].toordinal() #convert to match ggplot xmax_list[i] = dates[cp_idx].toordinal() #convert to match ggplot yint_list[i] = cp[i][2] #use value from_mean for vol_mean cp_date_list[i] = dates[cp_idx] #date of cp #cp_value_list[i] = x[cp_idx] #value of cp cp_value_list[i] = cp[i][2] ref_idx = cp_idx + 1 #+1 b/c moving to next point #Reform lists into a data frame and attach to df_domains. The first two #lists can be created together since they are both numeric, but if I try #to create all three together all types will be downgraded to strings. #np.concatenate avoids this conversion. The transpose is needed to take #an item from each to form a single row. cp_lbl = ['Change Point' for i in xrange(len(yint_list))] #Need to create a dummy entry to put 'Volume Mean' into legend cp_date_list.append(dates[0]) yint_list.append(x[0]) cp_lbl.append('Volume Mean') ndf_cp = np.transpose(np.array([cp_date_list, yint_list, cp_lbl])) yint_list.pop(-1) cp_date_list.pop(-1) df_cp = pd.DataFrame(ndf_cp, columns=['Date', 'Volume', 'Data']) df_plot = pd.concat((df_vol, df_cp), axis=0) #Need to create a dummy entry to put 'Volume Mean' into legend #dummy = np.array([dates[0], x[0], 'Volume Mean']).reshape(1,-1) #df_cp = np.concatenate( (df_cp, dummy), axis=0) #add to bottom df_cp #df_domain = np.concatenate( (df_domain, df_cp), axis=0 ) #add df_domains #convert final array into a pd.DataFrame for printing and plotting #df_domain = pd.DataFrame(df_domain, columns=['Date','Volume','Data']) #df_domain.to_html(open('out.html','w')) #os.system('sudo cp out.html /usr/local/www/analytics/rwing') margin = 0.10 * (np.max(x) - np.min(x)) p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \ ggplot.geom_line(color='blue',size=2) + \ ggplot.geom_point(x=xmax_list, y=cp_value_list, color='black', \ shape='D', size=50) + \ ggplot.geom_hline(xmin=xmin_list, \ xmax=xmax_list, \ yintercept=yint_list, color="red", size=3) + \ ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \ ggplot.scale_colour_manual(values = ["black", "blue", "red"]) + \ ggplot.scale_y_continuous(labels='comma') + \ ggplot.ylim(low=np.min(x)-margin/4.0, high=np.max(x)+margin) + \ ggplot.xlab("Week (Marked on Mondays)") + \ ggplot.ylab("Message Vol") + \ ggplot.ggtitle("%s\nMessage Volume by Week" % my_domain) + \ ggplot.theme_seaborn() return p
def main(): parser = argparse.ArgumentParser(description="Draws displacement plots.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--limits', type=int, help="Maximum extent of the axes") parser.add_argument('--no-plots', action='store_true', help="Don't save plots") parser.add_argument('--summary', help='Save summary stats by file') parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots") parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float, help="Pixels per µm (length scale of tracked images)") parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float, help="Minutes between each frame of the tracked images") parser.add_argument('--plot-titles', type=argparse.FileType('r'), help="CSV file with filename and title columns") parser.add_argument('--style', action='append', default=[], choices=['theme-bw', 'no-terminal-dot'], help='Change style options for the plot.') parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int, metavar=('start', 'end', 'step'), help="Beginning and end tick breaks on displacement plots") parser.add_argument('--plot-text', type=int, default=8, help='Plot text size (pt)') parser.add_argument('--plot-height', type=float, default=1.81, help='Plot height (in)') parser.add_argument('--plot-width', type=float, default=2.5, help='Plot width (in)') parser.add_argument('infile', nargs='+', help="File(s) to process.") args = parser.parse_args() style = {argument: True for argument in args.style} plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None all_dfs = [] for filename in args.infile: # there has to be a better pattern for this try: df = read_mtrackj_mdf(filename) except ValueError: try: df = read_mtrack2(filename) except Exception: df = read_manual_track(filename) centered = center(df) centered.to_csv(filename + '.centered') if not args.no_plots: g = displacement_plot(centered, limits=args.limits, style=style) g += gg.theme(axis_text=gg.element_text(size=args.plot_text)) g += gg.labs(x='px', y='px') if args.tick_breaks: g += gg.scale_x_continuous(breaks=range(*args.tick_breaks)) g += gg.scale_y_continuous(breaks=range(*args.tick_breaks)) if plot_titles is not None and filename in plot_titles.index: g += gg.labs(title=plot_titles.ix[filename, 'title']) g.save('{}.{}'.format(filename, args.imagetype), width=args.plot_width, height=args.plot_height) centered['filename'] = filename all_dfs.append(centered) mega_df = pd.concat(all_dfs, ignore_index=True) stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron, time_scale=args.minutes_per_frame) obj_stats = (mega_df.groupby('filename', sort=False) .apply(stats_for) .reset_index()) summary_by_file = obj_stats.groupby('filename').apply(summary) if args.summary: summary_by_file.to_csv(args.summary, index=False) print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime())) print("# {} pixels per micron, {} minutes per frame". format(args.pixels_per_micron, args.minutes_per_frame)) print("# distance units are microns; velocity units are microns/hour") obj_stats.to_csv(sys.stdout, index=False)