Exemple #1
0
def plot_timeline(scenes):
    # Plot character vs scene timelime
    # NB: due to limitations in Python ggplot we need to plot with scene on y-axis
    # in order to label x-ticks by character.
    # scale_x_continuous and scale_y_continuous behave slightly differently.

    print (gg.ggplot(gg.aes(y='scene', x='character_code'), data=scenes) +
            gg.geom_point() + gg.labs(x='Character', y='Scene') +
           gg.scale_x_continuous(
               labels=scenes['character'].cat.categories.values.tolist(),
           breaks=range(len(scenes['character'].cat.categories))) +
           gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1, size=10)))
Exemple #2
0
def _plot_and_save_local_ancestry(df, kmer, image_filename, num_chromosomes, id_vars, x_axis, y_scale):
	print('saving plot as: {}'.format(image_filename))
	var_name='chromosome'

	local_ancestry_df_long = pd.melt(df, id_vars=id_vars, var_name=var_name, value_name='estimated_ancestry')

	new_names = {}
	for i in range(1, num_chromosomes + 1):
		new_names['test_{}'.format(i)] = 2*i - 2 * y_scale
		new_names['true_{}'.format(i)] = 2*i - 1 * y_scale

	for key, value in new_names.items():
		local_ancestry_df_long.replace(key, value, inplace=True)

	plot = ggplot.ggplot(ggplot.aes(x=x_axis, y=var_name, color='estimated_ancestry'), data=local_ancestry_df_long) \
		+ ggplot.geom_point() \
		+ ggplot.scale_y_continuous(labels=list(new_names.keys()), breaks=list(new_names.values())) \
		+ ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \
		+ ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale

	plot.save(image_filename)
    )
    group by pod_id_location
""")
qry_job = bqclient.query(qry_str, location='EU', job_config=job_config)
#save result as dataframe
df = qry_job.to_dataframe()
df_long = df.melt(id_vars=['pod_str', 'pod_idx'],
                  value_vars=['p05', 'p25', 'med', 'p75', 'p95'],
                  var_name='yparam',
                  value_name='value')
#plots
#plt1 = gg.ggplot(df, gg.aes(x='date_UTC',y='no2_ppb'))+gg.geom_line()+gg.xlab('Time')+gg.ylab('NO2 (ppb)')+gg.theme_bw()+gg.facet_wrap('pod_id_location',scales='free_y')
#plt1.save(filename = r'.\charts\ulezpodts.png', width=None, height=None, dpi=200)
plt2 = gg.ggplot(df_long, gg.aes(
    x='pod_str', y='value', color='yparam')) + gg.geom_point() + gg.xlab(
        'pod') + gg.ylab('NO2 (as % of median)') + gg.theme_bw() + gg.theme(
            figure_size=(12, 6)) + gg.scale_x_discrete()
plt2.save(filename=r'.\charts\ulezpodvar.png', width=10, height=6, dpi=200)

#repeat for mobile data using segid instead of podid where N = 10 and N = 40
#repeat for stationary data at mobile times
qry_str = ("""
    with cte0 as (
    --all data, ULEZ pods with 6000 hrs
    select date_UTC, a.pod_id_location, no2_ppb
    from AQMesh.NO2_scaled_hightimeres_ppb_20180901_20190630 a
    join AQMesh.NO2_site_metadata_v2_1_20180901_20190630 b
    on a.pod_id_location=b.pod_id_location
    where ULEZ = true and no2_ppb <> -999
    and a.pod_id_location in 
      --limit to pods with at least 6000 hours
      (select pod_id_location from (select pod_id_location,count(date) as hr_ct
Exemple #4
0
def new_plot_ancestry_with_correct_results(test, true, y_scale=0.5, image_filename=None):
   columns_to_ignore = ['POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT'] ### we want only 'POS' and ancestry columns
   ancestry_cols = list(filter(lambda x: x not in columns_to_ignore, test.columns))

   merged = pd.DataFrame(test['POS'])
   for col_name in ancestry_cols:
       if col_name not in true:
           raise KeyError('true ancestry dataframe is missing ancestry for id: {}'.format(col_name))
       merged[col_name+'_test'] = test[col_name]
       merged[col_name+'_true'] = true[col_name]

   melted = pd.melt(merged, id_vars=['POS'], var_name='chromosome', value_name='ancestry')
   # the above takes merged from something like this:
   ###
   ### columns: POS sample1_test sample1_true sample2_test sample2_true
   ###                      111      pop1         pop1         pop2         pop1
   ###          124      pop1         pop1         pop2         pop1
   ###
   # to this: (spaces between rows added for clarity)
   ###
   ### columns: POS   chromosome    ancestry
   #            111   sample1_test    pop1
   #            124   sample1_test    pop1
   #
   #            111   sample1_true    pop1
   #            124   sample1_true    pop1
   #
   #            111   sample2_test    pop2
   #            124   sample2_test    pop2
   #
   #            111   sample2_true    pop1
   #            124   sample2_true    pop1

   spacing = {}
   for i, col_name in enumerate(ancestry_cols):
       spacing[col_name+'_test'] = 2*i - 2 * y_scale
       spacing[col_name+'_true'] = 2*i - 1 * y_scale

   # taks above example to something like:
   ###
   ### columns: POS  chromosome  ancestry
   #            111       0        pop1
   #            124       0        pop1
   #
   #            111       1        pop1
   #            124       1        pop1
   #
   #            111       2        pop2
   #            124       2        pop2
   #
   #            111       3        pop1
   #            124       3        pop1

   for col_name, spacing_val in spacing.items():
       melted.replace(col_name, spacing_val, inplace=True)

   plot = ggplot.ggplot(ggplot.aes(x='POS', y='chromosome', color='ancestry'), data=melted) \
       + ggplot.geom_point() \
       + ggplot.scale_y_continuous(labels=list(spacing.keys()), breaks=list(spacing.values())) \
       + ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \
       + ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale

   if image_filename is not None:
       plot.save(image_filename)
   else:
       plot.show()
def visualize_segmentation(X, var):
    '''
    Prints with ggplot a visualization of the different segments.
    '''
    aux = pandas.DataFrame(index = X.index)
    
    aux['fecha'] = X.index.values
    aux[var] = X[var]
    aux['Segmento'] = X['segmento'].astype(str)
    
    return ggplot(aes(x="fecha", y=var, color="Segmento"), aux) + geom_point() + xlab("Fecha") + ylab(var) + ggtitle("Segmentacion de la variable \"" + var + "\"") +  theme(axis_text_x  = element_text(color=[0,0,0,0]))
def visualize_clusters(X, var, color = 'cluster'):
    '''
    Prints with ggplot a visualization of the different clusters.
    '''
    aux = pandas.DataFrame()
    
    aux['fecha'] = X.index
    aux.index = X.index
    
    aux[var] = X[var]
    aux['Cluster'] = X[color]
    
    return ggplot(aes(x='fecha', y=var, color='Cluster'), aux) + geom_point() + xlab(var) + ylab("Valor") + ggtitle("Clustering de la variable \"" + var + "\"") +  theme(axis_text_x  = element_text(color=[0,0,0,0]))
Exemple #7
0
def main():
    parser = argparse.ArgumentParser(description="Draws displacement plots.",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--limits', type=int, help="Maximum extent of the axes")
    parser.add_argument('--no-plots', action='store_true', help="Don't save plots")
    parser.add_argument('--summary', help='Save summary stats by file')
    parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots")
    parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float,
                        help="Pixels per µm (length scale of tracked images)")
    parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float,
                        help="Minutes between each frame of the tracked images")
    parser.add_argument('--plot-titles', type=argparse.FileType('r'),
                        help="CSV file with filename and title columns")
    parser.add_argument('--style', action='append', default=[],
                        choices=['theme-bw', 'no-terminal-dot'],
                        help='Change style options for the plot.')
    parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int,
                        metavar=('start', 'end', 'step'),
                        help="Beginning and end tick breaks on displacement plots")
    parser.add_argument('--plot-text', type=int, default=8,
                        help='Plot text size (pt)')
    parser.add_argument('--plot-height', type=float, default=1.81,
                        help='Plot height (in)')
    parser.add_argument('--plot-width', type=float, default=2.5,
                        help='Plot width (in)')
    parser.add_argument('infile', nargs='+', help="File(s) to process.")
    args = parser.parse_args()

    style = {argument: True for argument in args.style}

    plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None

    all_dfs = []
    for filename in args.infile:
        # there has to be a better pattern for this
        try:
            df = read_mtrackj_mdf(filename)
        except ValueError:
            try:
                df = read_mtrack2(filename)
            except Exception:
                df = read_manual_track(filename)
        centered = center(df)
        centered.to_csv(filename + '.centered')
        if not args.no_plots:
            g = displacement_plot(centered, limits=args.limits, style=style)
            g += gg.theme(axis_text=gg.element_text(size=args.plot_text))
            g += gg.labs(x='px', y='px')
            if args.tick_breaks:
                g += gg.scale_x_continuous(breaks=range(*args.tick_breaks))
                g += gg.scale_y_continuous(breaks=range(*args.tick_breaks))
            if plot_titles is not None and filename in plot_titles.index:
                g += gg.labs(title=plot_titles.ix[filename, 'title'])
            g.save('{}.{}'.format(filename, args.imagetype),
                   width=args.plot_width, height=args.plot_height)
        centered['filename'] = filename
        all_dfs.append(centered)
    mega_df = pd.concat(all_dfs, ignore_index=True)
    stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron,
                                time_scale=args.minutes_per_frame)
    obj_stats = (mega_df.groupby('filename', sort=False)
                        .apply(stats_for)
                        .reset_index())
    summary_by_file = obj_stats.groupby('filename').apply(summary)
    if args.summary:
        summary_by_file.to_csv(args.summary, index=False)
    print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime()))
    print("# {} pixels per micron, {} minutes per frame".
          format(args.pixels_per_micron, args.minutes_per_frame))
    print("# distance units are microns; velocity units are microns/hour")
    obj_stats.to_csv(sys.stdout, index=False)