def bar_chart(self, conn, column1, column2, table_chosen, title): # since this is a bar graph only two columns will be there data_df = dfile.double_selector(conn = conn, table= table_chosen, col1 = column1, col2 = column2) bar_plot = ggplot(aes(x=column1, weight=column2), data=data_df) + geom_bar() + labs(title=title) print(bar_plot)
def plot_matches(df_in, date, filename_out, x_var='date_time', y_var="shorthand_search_vol"): """ Plot y-var and save based on specified variables. Assumes that df has already been filtered using dplyr's sift mechanism. Also assumes that a date has been passed in. """ # basic data processing for viz df_in['date_time'] = date + " " + df_in['time'].astype(str) df_in['date_time'] = pd.to_datetime(df_in['date_time'], errors="coerce", infer_datetime_format=True) # build layers for plot p = ggplot(aes(x=x_var, y=y_var, group="match_id", color="match_id"), data=df_in) p += geom_line(size=2) # informative p += labs(x="time (gmt)", y="search volume (scaled to 100)") # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium") p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes") # visual t = theme_gray() t._rcParams['font.size'] = 8 t._rcParams['font.family'] = 'monospace' p += t # done p.save(filename_out, width=16, height=8)
def two_var_intr_effects(self, target, vars, nval=100, plot=True): """ Loads first level interactions. Args: target - Variable identifier (column name or number) specifying the target variable vars - List of variable identifiers (column names or numbers) specifying other selected variables. Must not contain target nval - Number of evaluation points used for calculation. plot - Determines whether or not to plot results. Returns: Pandas dataframe of interaction effects """ # Check if null.models have already been generated check_str = """ function(){ if(exists("null.models")){ return(T) } else { return(F) } } """ if not robjects.r(check_str)()[0]: self.logger.info( 'Null models not generated, generating null models ' '(n=10)') self._generate_interaction_null_models(10, quiet=False) int_str = """ function(target, vars, nval){ interactions <- twovarint(tvar=target, vars=vars, null.models, nval=nval, plot=F) } """ # Check the input type. If int, add one, if string do nothing. target = target if type(target) is str else target + 1 vars = [var if type(var) is str else var + 1 for var in vars] r_interact = robjects.r(int_str)(target, robjects.Vector(np.array(vars)), nval) interact = pd.DataFrame( { 'interact_str': list(r_interact[0]), 'exp_null_int': list(r_interact[1]), 'std_null_int': list(r_interact[2]) }, index=vars) if plot: int_effects = interact.reset_index().rename( columns={'index': 'vars'}) int_effects_m = pd.melt( int_effects, id_vars='vars', value_vars=['interact_str', 'exp_null_int']) p = gg.ggplot(gg.aes(x='vars', fill='variable', weight='value'), data=int_effects_m) \ + gg.geom_bar() \ + gg.labs( title='Two-var interaction effects - {}'.format(target)) print(p) return interact
def plotAverageLatency(self): averages = [d.averageLatency() for d in self.data] dat = {"device": range(1, len(averages) + 1), "average": averages} dataframe = pandas.DataFrame(dat) chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \ + ggplot.labs(title="Average Latency Per Device") + \ ggplot.ylab("Average Latency (ms)") + \ ggplot.xlab("Device Number") + \ ggplot.geom_bar(stat="identity") chart.show()
def plotAverageLatency(self): averages = [d.averageLatency() for d in self.data] dat = { "device" : range(1, len(averages) + 1), "average" : averages } dataframe = pandas.DataFrame(dat) chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \ + ggplot.labs(title="Average Latency Per Device") + \ ggplot.ylab("Average Latency (ms)") + \ ggplot.xlab("Device Number") + \ ggplot.geom_bar(stat="identity") chart.show()
def plot_timeline(scenes): # Plot character vs scene timelime # NB: due to limitations in Python ggplot we need to plot with scene on y-axis # in order to label x-ticks by character. # scale_x_continuous and scale_y_continuous behave slightly differently. print (gg.ggplot(gg.aes(y='scene', x='character_code'), data=scenes) + gg.geom_point() + gg.labs(x='Character', y='Scene') + gg.scale_x_continuous( labels=scenes['character'].cat.categories.values.tolist(), breaks=range(len(scenes['character'].cat.categories))) + gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1, size=10)))
def boxplot(self, conn, column, table_chosen, title): data_df = dfile.single_selector(conn=conn, table=table_chosen, column=column) box_plot = ggplot( aes(x=column), data=data_df) + geom_boxplot() + theme_gray() + labs(title=title) now = datetime.datetime.now() b = now print(b) print(b - a) print(box_plot)
def line_chart(self, conn, column1, column2, table_chosen, title): data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2) line_plot = ggplot( aes(y=column2, x=column1), data=data_df) + geom_line() + theme_gray() + labs(title=title) now = datetime.datetime.now() b = now print(b) print(b - a) print(line_plot)
def plot_predictions(date_times, actual_values, predictions, match_id, feature_set_in, filename): """ Plot y-var and save based on specified variables. Assumes that df has already been filtered using dplyr's sift mechanism. Also assumes that a date has been passed in. """ actual_df = pd.DataFrame() actual_df['date_time'] = pd.to_datetime(date_times, errors="coerce", infer_datetime_format=True) actual_df['search_vol'] = actual_values actual_df['match_id'] = "actual" + match_id predict_df = pd.DataFrame() predict_df['date_time'] = pd.to_datetime(date_times, errors="coerce", infer_datetime_format=True) predict_df['search_vol'] = list(predictions) predict_df['match_id'] = "predictedby_" + str(feature_set_in) + match_id plotting_df = pd.concat([actual_df, predict_df], axis=0, ignore_index=True) # build layers for plot p = ggplot(aes(x='date_time', y='search_vol', group="match_id", color="match_id"), data=plotting_df) p += geom_line(size=2) # informative p += labs(x="time (gmt)", y="search volume (scaled to 100)") # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium") p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes") # visual t = theme_gray() t._rcParams['font.size'] = 8 t._rcParams['font.family'] = 'monospace' p += t # done p.save(filename, width=16, height=8)
def graph1(score_data): """ Average score as time goes on; Creates and returns graph 1, a line graph. """ date_column = score_data[0][find_time_stamp(score_data)] data = DataFrame(score_data[1:], columns=score_data[0]) # Get all columns that arlabels = date_format("%Y-%m-%d")e numerical # questions so we know what to graph num_questions = data.select_dtypes(include=['int64']).columns.values # Melt data so that each question is in a seperate row new_data = pd.melt(data, id_vars=date_column, value_vars=num_questions, var_name="Question", value_name="Score") # Convert date string into an actual date type new_data[date_column] = pd.to_datetime(new_data[date_column], format="%m/%d/%Y") # Group all rows with same date and question, and then take the average. new_data = new_data.groupby([date_column, 'Question']).mean().reset_index() new_data['All'] = "Indiviual Questions" new_data2 = new_data.groupby(date_column).mean().reset_index() new_data2['Question'] = "All Questions" new_data2['All'] = "Average of All Questions" new_data = pd.concat([new_data, new_data2]) new_data[date_column] = new_data[date_column].astype('int64') # Create time graph with seperate lines for each question ret = ggplot.ggplot(ggplot.aes(x=date_column, y="Score", colour="Question"), new_data) +\ ggplot.geom_point() +\ ggplot.geom_line() +\ ggplot.facet_grid("All") +\ ggplot.scale_x_continuous(labels=[""], breaks=0) +\ ggplot.labs(x="Time", y="Average Question Score") +\ ggplot.ggtitle("Question Scores Over Time") return ret
def area_chart(self, conn, column1, column2, table_chosen, title): data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2) ymin = float( input("Enter the minimum value that should be plotted: ")) ymax = float( input("Enter the maximum value that should be plotted: ")) area_plot = ggplot( aes(x=column2, ymin=ymin, ymax=ymax), data=data_df) + geom_area() + theme_gray() + labs(title=title) now = datetime.datetime.now() b = now print(b) print(b - a) print(area_plot)
} df = pd.DataFrame.from_dict(values_dict, orient='index') df = df.transpose() df = pd.melt(df) df['feature'] = feature dfs_to_concat.append(df) master_df = pd.concat(dfs_to_concat) # histogram p = ggplot(aes(x='value', fill='variable', color='variable'), data=master_df) p += geom_histogram(bins=25, alpha=0.5) p += scale_x_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitude distribution") p += facet_wrap("feature", ncol=3, scales="free") p += labs(x=" ", y=" ") # visuals t = theme_gray() t._rcParams['font.size'] = 10 t._rcParams['font.family'] = 'monospace' p += t p.save("arima_1/" + "histogram.png") # boxplot p = ggplot(aes(x='variable', y='value'), data=master_df) p += geom_boxplot() p += scale_y_continuous(limits=(-25, 25)) p += ggtitle("sarimax coefficient magnitudes") p += facet_wrap("feature", ncol=3)
def point_chart(self, conn, column1, column2, table_chosen, title): data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2) point_plot = ggplot(aes(x=column1, y=column2), data=data_df) + geom_point() + theme_gray() + labs(title=title) print(point_plot)
def hist_chart(self, conn, column, table_chosen, title): data_df = dfile.single_selector(conn = conn, table = table_chosen, column = column) hist_plot = ggplot(aes(x=column), data=data_df) + geom_histogram() + theme_gray() + labs(title=title) print(hist_plot)
(vcfdf['TestBias']=='Pass') & (vcfdf['CHROM']==reference) ]['Pi'])) return testwindows # Generate new dataframe with analyses performed per window if options.graphics == True: print "Analysing by "+ str(windowsize) +"sliding windows and generating plots" windowed_df = pd.DataFrame({'window':sorted(list(set(vcfdf['window']))), 'MaxMinor':windowMax(sorted(list(set(vcfdf['window'])))), 'Pi':windowPi(sorted(list(set(vcfdf['window']))))}) # Now try and plot graph p_MaxMinor = gg.ggplot(gg.aes('window', 'MaxMinor'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Minor Variant Frequency (%)") +gg.ggtitle(vcfoutput + "\n Valid Minor Variant Sites :" + str(len(minorvar))) # Plot Nucleotide Diversity (Pi) along genome p_pi =gg.ggplot(gg.aes('window', 'Pi'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Mean nucleotide diversity (" + u"\u03c0" +")") +gg.scale_y_continuous(expand=(0,0),limits=(0, windowed_df['Pi'].max(axis=0)+0.001)) +gg.ggtitle(vcfoutput + "\n Genome-wide Mean Nucleotide Diversity (" +u"\u03c0"+ ") :" +str(round(gw_Pi,6))) #p_pi # Facetted plot (still not sorted y axes labels yet) windowed_df_melt = pd.melt(windowed_df, id_vars=['window']) p_combi = gg.ggplot(gg.aes('window', 'value',colour='variable'),data=windowed_df_melt) p_combi = p_combi + gg.geom_point(colour='variable') + gg.facet_grid('variable',scales='free_y')+gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")") # Print graphs to .png p_combi.save(vcfinput + ".MinorVar_combo.png") p_MaxMinor.save(vcfinput + ".MinorVar.png")
def density_chart(self, conn, column, table_chosen, title): data_df = dfile.single_selector(conn=conn, table=table_chosen, column=column) density_plot = ggplot(aes(x=column), data=data_df) + geom_density() + theme_gray() + labs(title=title) print(density_plot)
def main(): parser = argparse.ArgumentParser(description="Draws displacement plots.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--limits', type=int, help="Maximum extent of the axes") parser.add_argument('--no-plots', action='store_true', help="Don't save plots") parser.add_argument('--summary', help='Save summary stats by file') parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots") parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float, help="Pixels per µm (length scale of tracked images)") parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float, help="Minutes between each frame of the tracked images") parser.add_argument('--plot-titles', type=argparse.FileType('r'), help="CSV file with filename and title columns") parser.add_argument('--style', action='append', default=[], choices=['theme-bw', 'no-terminal-dot'], help='Change style options for the plot.') parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int, metavar=('start', 'end', 'step'), help="Beginning and end tick breaks on displacement plots") parser.add_argument('--plot-text', type=int, default=8, help='Plot text size (pt)') parser.add_argument('--plot-height', type=float, default=1.81, help='Plot height (in)') parser.add_argument('--plot-width', type=float, default=2.5, help='Plot width (in)') parser.add_argument('infile', nargs='+', help="File(s) to process.") args = parser.parse_args() style = {argument: True for argument in args.style} plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None all_dfs = [] for filename in args.infile: # there has to be a better pattern for this try: df = read_mtrackj_mdf(filename) except ValueError: try: df = read_mtrack2(filename) except Exception: df = read_manual_track(filename) centered = center(df) centered.to_csv(filename + '.centered') if not args.no_plots: g = displacement_plot(centered, limits=args.limits, style=style) g += gg.theme(axis_text=gg.element_text(size=args.plot_text)) g += gg.labs(x='px', y='px') if args.tick_breaks: g += gg.scale_x_continuous(breaks=range(*args.tick_breaks)) g += gg.scale_y_continuous(breaks=range(*args.tick_breaks)) if plot_titles is not None and filename in plot_titles.index: g += gg.labs(title=plot_titles.ix[filename, 'title']) g.save('{}.{}'.format(filename, args.imagetype), width=args.plot_width, height=args.plot_height) centered['filename'] = filename all_dfs.append(centered) mega_df = pd.concat(all_dfs, ignore_index=True) stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron, time_scale=args.minutes_per_frame) obj_stats = (mega_df.groupby('filename', sort=False) .apply(stats_for) .reset_index()) summary_by_file = obj_stats.groupby('filename').apply(summary) if args.summary: summary_by_file.to_csv(args.summary, index=False) print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime())) print("# {} pixels per micron, {} minutes per frame". format(args.pixels_per_micron, args.minutes_per_frame)) print("# distance units are microns; velocity units are microns/hour") obj_stats.to_csv(sys.stdout, index=False)