def plot_timeline(scenes): # Plot character vs scene timelime # NB: due to limitations in Python ggplot we need to plot with scene on y-axis # in order to label x-ticks by character. # scale_x_continuous and scale_y_continuous behave slightly differently. print (gg.ggplot(gg.aes(y='scene', x='character_code'), data=scenes) + gg.geom_point() + gg.labs(x='Character', y='Scene') + gg.scale_x_continuous( labels=scenes['character'].cat.categories.values.tolist(), breaks=range(len(scenes['character'].cat.categories))) + gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1, size=10)))
def _plot_and_save_local_ancestry(df, kmer, image_filename, num_chromosomes, id_vars, x_axis, y_scale): print('saving plot as: {}'.format(image_filename)) var_name='chromosome' local_ancestry_df_long = pd.melt(df, id_vars=id_vars, var_name=var_name, value_name='estimated_ancestry') new_names = {} for i in range(1, num_chromosomes + 1): new_names['test_{}'.format(i)] = 2*i - 2 * y_scale new_names['true_{}'.format(i)] = 2*i - 1 * y_scale for key, value in new_names.items(): local_ancestry_df_long.replace(key, value, inplace=True) plot = ggplot.ggplot(ggplot.aes(x=x_axis, y=var_name, color='estimated_ancestry'), data=local_ancestry_df_long) \ + ggplot.geom_point() \ + ggplot.scale_y_continuous(labels=list(new_names.keys()), breaks=list(new_names.values())) \ + ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \ + ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale plot.save(image_filename)
) group by pod_id_location """) qry_job = bqclient.query(qry_str, location='EU', job_config=job_config) #save result as dataframe df = qry_job.to_dataframe() df_long = df.melt(id_vars=['pod_str', 'pod_idx'], value_vars=['p05', 'p25', 'med', 'p75', 'p95'], var_name='yparam', value_name='value') #plots #plt1 = gg.ggplot(df, gg.aes(x='date_UTC',y='no2_ppb'))+gg.geom_line()+gg.xlab('Time')+gg.ylab('NO2 (ppb)')+gg.theme_bw()+gg.facet_wrap('pod_id_location',scales='free_y') #plt1.save(filename = r'.\charts\ulezpodts.png', width=None, height=None, dpi=200) plt2 = gg.ggplot(df_long, gg.aes( x='pod_str', y='value', color='yparam')) + gg.geom_point() + gg.xlab( 'pod') + gg.ylab('NO2 (as % of median)') + gg.theme_bw() + gg.theme( figure_size=(12, 6)) + gg.scale_x_discrete() plt2.save(filename=r'.\charts\ulezpodvar.png', width=10, height=6, dpi=200) #repeat for mobile data using segid instead of podid where N = 10 and N = 40 #repeat for stationary data at mobile times qry_str = (""" with cte0 as ( --all data, ULEZ pods with 6000 hrs select date_UTC, a.pod_id_location, no2_ppb from AQMesh.NO2_scaled_hightimeres_ppb_20180901_20190630 a join AQMesh.NO2_site_metadata_v2_1_20180901_20190630 b on a.pod_id_location=b.pod_id_location where ULEZ = true and no2_ppb <> -999 and a.pod_id_location in --limit to pods with at least 6000 hours (select pod_id_location from (select pod_id_location,count(date) as hr_ct
def new_plot_ancestry_with_correct_results(test, true, y_scale=0.5, image_filename=None): columns_to_ignore = ['POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT'] ### we want only 'POS' and ancestry columns ancestry_cols = list(filter(lambda x: x not in columns_to_ignore, test.columns)) merged = pd.DataFrame(test['POS']) for col_name in ancestry_cols: if col_name not in true: raise KeyError('true ancestry dataframe is missing ancestry for id: {}'.format(col_name)) merged[col_name+'_test'] = test[col_name] merged[col_name+'_true'] = true[col_name] melted = pd.melt(merged, id_vars=['POS'], var_name='chromosome', value_name='ancestry') # the above takes merged from something like this: ### ### columns: POS sample1_test sample1_true sample2_test sample2_true ### 111 pop1 pop1 pop2 pop1 ### 124 pop1 pop1 pop2 pop1 ### # to this: (spaces between rows added for clarity) ### ### columns: POS chromosome ancestry # 111 sample1_test pop1 # 124 sample1_test pop1 # # 111 sample1_true pop1 # 124 sample1_true pop1 # # 111 sample2_test pop2 # 124 sample2_test pop2 # # 111 sample2_true pop1 # 124 sample2_true pop1 spacing = {} for i, col_name in enumerate(ancestry_cols): spacing[col_name+'_test'] = 2*i - 2 * y_scale spacing[col_name+'_true'] = 2*i - 1 * y_scale # taks above example to something like: ### ### columns: POS chromosome ancestry # 111 0 pop1 # 124 0 pop1 # # 111 1 pop1 # 124 1 pop1 # # 111 2 pop2 # 124 2 pop2 # # 111 3 pop1 # 124 3 pop1 for col_name, spacing_val in spacing.items(): melted.replace(col_name, spacing_val, inplace=True) plot = ggplot.ggplot(ggplot.aes(x='POS', y='chromosome', color='ancestry'), data=melted) \ + ggplot.geom_point() \ + ggplot.scale_y_continuous(labels=list(spacing.keys()), breaks=list(spacing.values())) \ + ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \ + ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale if image_filename is not None: plot.save(image_filename) else: plot.show()
def visualize_segmentation(X, var): ''' Prints with ggplot a visualization of the different segments. ''' aux = pandas.DataFrame(index = X.index) aux['fecha'] = X.index.values aux[var] = X[var] aux['Segmento'] = X['segmento'].astype(str) return ggplot(aes(x="fecha", y=var, color="Segmento"), aux) + geom_point() + xlab("Fecha") + ylab(var) + ggtitle("Segmentacion de la variable \"" + var + "\"") + theme(axis_text_x = element_text(color=[0,0,0,0]))
def visualize_clusters(X, var, color = 'cluster'): ''' Prints with ggplot a visualization of the different clusters. ''' aux = pandas.DataFrame() aux['fecha'] = X.index aux.index = X.index aux[var] = X[var] aux['Cluster'] = X[color] return ggplot(aes(x='fecha', y=var, color='Cluster'), aux) + geom_point() + xlab(var) + ylab("Valor") + ggtitle("Clustering de la variable \"" + var + "\"") + theme(axis_text_x = element_text(color=[0,0,0,0]))
def main(): parser = argparse.ArgumentParser(description="Draws displacement plots.", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--limits', type=int, help="Maximum extent of the axes") parser.add_argument('--no-plots', action='store_true', help="Don't save plots") parser.add_argument('--summary', help='Save summary stats by file') parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots") parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float, help="Pixels per µm (length scale of tracked images)") parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float, help="Minutes between each frame of the tracked images") parser.add_argument('--plot-titles', type=argparse.FileType('r'), help="CSV file with filename and title columns") parser.add_argument('--style', action='append', default=[], choices=['theme-bw', 'no-terminal-dot'], help='Change style options for the plot.') parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int, metavar=('start', 'end', 'step'), help="Beginning and end tick breaks on displacement plots") parser.add_argument('--plot-text', type=int, default=8, help='Plot text size (pt)') parser.add_argument('--plot-height', type=float, default=1.81, help='Plot height (in)') parser.add_argument('--plot-width', type=float, default=2.5, help='Plot width (in)') parser.add_argument('infile', nargs='+', help="File(s) to process.") args = parser.parse_args() style = {argument: True for argument in args.style} plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None all_dfs = [] for filename in args.infile: # there has to be a better pattern for this try: df = read_mtrackj_mdf(filename) except ValueError: try: df = read_mtrack2(filename) except Exception: df = read_manual_track(filename) centered = center(df) centered.to_csv(filename + '.centered') if not args.no_plots: g = displacement_plot(centered, limits=args.limits, style=style) g += gg.theme(axis_text=gg.element_text(size=args.plot_text)) g += gg.labs(x='px', y='px') if args.tick_breaks: g += gg.scale_x_continuous(breaks=range(*args.tick_breaks)) g += gg.scale_y_continuous(breaks=range(*args.tick_breaks)) if plot_titles is not None and filename in plot_titles.index: g += gg.labs(title=plot_titles.ix[filename, 'title']) g.save('{}.{}'.format(filename, args.imagetype), width=args.plot_width, height=args.plot_height) centered['filename'] = filename all_dfs.append(centered) mega_df = pd.concat(all_dfs, ignore_index=True) stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron, time_scale=args.minutes_per_frame) obj_stats = (mega_df.groupby('filename', sort=False) .apply(stats_for) .reset_index()) summary_by_file = obj_stats.groupby('filename').apply(summary) if args.summary: summary_by_file.to_csv(args.summary, index=False) print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime())) print("# {} pixels per micron, {} minutes per frame". format(args.pixels_per_micron, args.minutes_per_frame)) print("# distance units are microns; velocity units are microns/hour") obj_stats.to_csv(sys.stdout, index=False)