Example #1
0
    def production_envelope(self,
                            dataframe,
                            grid=None,
                            width=None,
                            height=None,
                            title=None,
                            points=None,
                            points_colors=None,
                            palette=None,
                            x_axis_label=None,
                            y_axis_label=None):

        palette = self.get_option('palette') if palette is None else palette
        width = self.get_option('width') if width is None else width
        colors = self._palette(palette, len(dataframe.strain.unique()))

        plot = aes(data=dataframe,
                   ymin="lb",
                   ymax="ub",
                   x="value",
                   color=scale_colour_manual(colors)) + geom_area()
        if title:
            plot += geom_tile(title)
        if x_axis_label:
            plot += scale_x_continuous(name=x_axis_label)
        if y_axis_label:
            plot += scale_y_continuous(name=y_axis_label)

        return plot
Example #2
0
    def histogram(self, dataframe, bins=100, width=None, height=None, palette=None, title='Histogram', values=None,
                  groups=None, legend=True):
        palette = self.__default_options__.get('palette', None) if palette is None else palette

        return ggplot(dataframe, aes(x=values, fill=groups, color=groups)) + \
               geom_histogram(alpha=0.6, breaks=bins, position="fill") + \
               self._palette(palette) + \
               ggtitle(title) + \
               scale_y_continuous(name="Count (%s)" % values)
Example #3
0
def signature_data_plot(sd):
    import ggplot as gg

    aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r')
    return gg.ggplot(aes, data=sd) \
        + gg.geom_point(size=15) \
        + gg.scale_color_gradient(low='yellow', high='red') \
        + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \
        + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
Example #4
0
def signature_data_plot(sd):
    import ggplot as gg

    aes = gg.aes(x='set_exp', y='not_exp', color='pearson_r')
    return gg.ggplot(aes, data=sd) \
        + gg.geom_point(size=15) \
        + gg.scale_color_gradient(low='yellow', high='red') \
        + gg.scale_x_log() + gg.scale_x_continuous(limits=(0.5, 10000)) \
        + gg.scale_y_log() + gg.scale_y_continuous(limits=(0.05, 10000))
Example #5
0
    def scatter(self, dataframe, x=None, y=None, width=None, height=None, color=None, title='Scatter', xaxis_label=None,
                yaxis_label=None):
        color = self.__default_options__.get('palette', None) if color is None else color
        width = self.__default_options__.get('width', None) if width is None else width

        gg = ggplot(dataframe, aes(x, y)) + geom_point(color=color, alpha=0.6) + ggtitle(title)
        if xaxis_label:
            gg += scale_x_continuous(name=xaxis_label)
        if yaxis_label:
            gg += scale_y_continuous(name=xaxis_label)

        return gg
Example #6
0
    def histogram(self,
                  dataframe,
                  bins=100,
                  width=None,
                  height=None,
                  palette=None,
                  title='Histogram',
                  values=None,
                  groups=None,
                  legend=True):
        palette = self.__default_options__.get(
            'palette', None) if palette is None else palette

        return ggplot(dataframe, aes(x=values, fill=groups, color=groups)) + \
               geom_histogram(alpha=0.6, breaks=bins, position="fill") + \
               self._palette(palette) + \
               ggtitle(title) + \
               scale_y_continuous(name="Count (%s)" % values)
Example #7
0
def _plot_and_save_local_ancestry(df, kmer, image_filename, num_chromosomes, id_vars, x_axis, y_scale):
	print('saving plot as: {}'.format(image_filename))
	var_name='chromosome'

	local_ancestry_df_long = pd.melt(df, id_vars=id_vars, var_name=var_name, value_name='estimated_ancestry')

	new_names = {}
	for i in range(1, num_chromosomes + 1):
		new_names['test_{}'.format(i)] = 2*i - 2 * y_scale
		new_names['true_{}'.format(i)] = 2*i - 1 * y_scale

	for key, value in new_names.items():
		local_ancestry_df_long.replace(key, value, inplace=True)

	plot = ggplot.ggplot(ggplot.aes(x=x_axis, y=var_name, color='estimated_ancestry'), data=local_ancestry_df_long) \
		+ ggplot.geom_point() \
		+ ggplot.scale_y_continuous(labels=list(new_names.keys()), breaks=list(new_names.values())) \
		+ ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \
		+ ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale

	plot.save(image_filename)
Example #8
0
    def scatter(self,
                dataframe,
                x=None,
                y=None,
                width=None,
                height=None,
                color=None,
                title='Scatter',
                xaxis_label=None,
                yaxis_label=None,
                label=None):
        color = self.__default_options__.get('palette',
                                             None) if color is None else color
        width = self.__default_options__.get('width',
                                             None) if width is None else width

        gg = ggplot(dataframe, aes(x, y)) + geom_point(
            color=color, alpha=0.6) + ggtitle(title)
        if xaxis_label:
            gg += scale_x_continuous(name=xaxis_label)
        if yaxis_label:
            gg += scale_y_continuous(name=xaxis_label)

        return gg
Example #9
0
p += ggtitle("sarimax coefficient magnitude distribution")
p += facet_wrap("feature", ncol=3, scales="free")
p += labs(x=" ", y=" ")

# visuals
t = theme_gray()
t._rcParams['font.size'] = 10
t._rcParams['font.family'] = 'monospace'

p += t
p.save("arima_1/" + "histogram.png")

# boxplot
p = ggplot(aes(x='variable', y='value'), data=master_df)
p += geom_boxplot()
p += scale_y_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitudes")
p += facet_wrap("feature", ncol=3)
p += labs(x=" ", y=" ")

# visuals
t = theme_gray()
t._rcParams['font.size'] = 10
t._rcParams['font.family'] = 'monospace'

p += t
p.save("arima_1/" + "boxplot.png")

for feature in [
        "home_goal", "away_goal", "home_yellow", "away_yellow", "home_red",
        "away_red"

# Generate new dataframe with analyses performed per window
if options.graphics == True:
	print "Analysing by "+ str(windowsize) +"sliding windows and generating plots"
	windowed_df = pd.DataFrame({'window':sorted(list(set(vcfdf['window']))),
        	                   'MaxMinor':windowMax(sorted(list(set(vcfdf['window'])))),
                	           'Pi':windowPi(sorted(list(set(vcfdf['window']))))})


# Now try and plot graph
	p_MaxMinor = gg.ggplot(gg.aes('window', 'MaxMinor'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Minor Variant Frequency (%)") +gg.ggtitle(vcfoutput + "\n Valid Minor Variant Sites :" + str(len(minorvar))) 


# Plot Nucleotide Diversity (Pi) along genome 
	p_pi =gg.ggplot(gg.aes('window', 'Pi'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Mean nucleotide diversity (" + u"\u03c0" +")") +gg.scale_y_continuous(expand=(0,0),limits=(0, windowed_df['Pi'].max(axis=0)+0.001)) +gg.ggtitle(vcfoutput + "\n Genome-wide Mean Nucleotide Diversity (" +u"\u03c0"+ ") :" +str(round(gw_Pi,6))) 

#p_pi

# Facetted plot (still not sorted y axes labels yet)
	windowed_df_melt = pd.melt(windowed_df, id_vars=['window'])
	p_combi = gg.ggplot(gg.aes('window', 'value',colour='variable'),data=windowed_df_melt)
	p_combi = p_combi + gg.geom_point(colour='variable') + gg.facet_grid('variable',scales='free_y')+gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")")

# Print graphs to .png
	p_combi.save(vcfinput + ".MinorVar_combo.png")
	p_MaxMinor.save(vcfinput + ".MinorVar.png")
	p_pi.save(vcfinput + ".Pi-diversity.png")


Example #11
0
def new_plot_ancestry_with_correct_results(test, true, y_scale=0.5, image_filename=None):
   columns_to_ignore = ['POS', 'ID', 'REF', 'ALT', 'QUAL', 'FILTER', 'INFO', 'FORMAT'] ### we want only 'POS' and ancestry columns
   ancestry_cols = list(filter(lambda x: x not in columns_to_ignore, test.columns))

   merged = pd.DataFrame(test['POS'])
   for col_name in ancestry_cols:
       if col_name not in true:
           raise KeyError('true ancestry dataframe is missing ancestry for id: {}'.format(col_name))
       merged[col_name+'_test'] = test[col_name]
       merged[col_name+'_true'] = true[col_name]

   melted = pd.melt(merged, id_vars=['POS'], var_name='chromosome', value_name='ancestry')
   # the above takes merged from something like this:
   ###
   ### columns: POS sample1_test sample1_true sample2_test sample2_true
   ###                      111      pop1         pop1         pop2         pop1
   ###          124      pop1         pop1         pop2         pop1
   ###
   # to this: (spaces between rows added for clarity)
   ###
   ### columns: POS   chromosome    ancestry
   #            111   sample1_test    pop1
   #            124   sample1_test    pop1
   #
   #            111   sample1_true    pop1
   #            124   sample1_true    pop1
   #
   #            111   sample2_test    pop2
   #            124   sample2_test    pop2
   #
   #            111   sample2_true    pop1
   #            124   sample2_true    pop1

   spacing = {}
   for i, col_name in enumerate(ancestry_cols):
       spacing[col_name+'_test'] = 2*i - 2 * y_scale
       spacing[col_name+'_true'] = 2*i - 1 * y_scale

   # taks above example to something like:
   ###
   ### columns: POS  chromosome  ancestry
   #            111       0        pop1
   #            124       0        pop1
   #
   #            111       1        pop1
   #            124       1        pop1
   #
   #            111       2        pop2
   #            124       2        pop2
   #
   #            111       3        pop1
   #            124       3        pop1

   for col_name, spacing_val in spacing.items():
       melted.replace(col_name, spacing_val, inplace=True)

   plot = ggplot.ggplot(ggplot.aes(x='POS', y='chromosome', color='ancestry'), data=melted) \
       + ggplot.geom_point() \
       + ggplot.scale_y_continuous(labels=list(spacing.keys()), breaks=list(spacing.values())) \
       + ggplot.scale_color_manual(values=['#FF0000', '#0000FF', '#73008C']) \
       + ggplot.theme(plot_margin={'top':0.7, 'bottom':0.3}) ### TODO: this should depend on scale

   if image_filename is not None:
       plot.save(image_filename)
   else:
       plot.show()
def firms_dynamics_plot(decision):
    data = pd.read_csv(os.path.join(
        parameters.OUTPUT_PATH,
        "temp_general_firms_pop_%s_decision_%s_time_%s.txt" %
        (parameters.pop_redutor, decision, parameters.final_Time)),
                       sep=",",
                       header=None,
                       decimal=",").astype(float)
    # renaming the collunms names
    data.columns = [
        'time', 'total_firms', 'average_output', 'average_age', 'average_size',
        'new_firms', 'exit_firms', 'max_size', 'total_effort', 'average_effort'
    ]

    #logical test to control the process of burn the initial
    if parameters.time_to_cut_plots > 0:
        data = data.loc[(
            data['time']).astype(int) >= parameters.time_to_cut_plots, :]

    # variable to add in the plot title
    title_pop_val = float(parameters.pop_redutor) * 100

    # create a list of a years to plot
    list_of_years_division = list(
        range(int(data['time'].min()), int(data['time'].max()),
              12)) + [data['time'].max() + 1]
    list_of_years = [int(i / 12) for i in list_of_years_division]

    # graph paramter variables
    dpi_var_plot = 700
    width_var_plot = 15
    height_var_plot = 10

    ###############################################################################################################
    # plotting AGENTS UTILITY
    # Total firms
    plot_data = gg.ggplot(data, gg.aes('time', 'total_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years) +\
                gg.ggtitle('Total firms') + gg.xlab('Years') + gg.ylab('Total of Firms')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_total_firms_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_total_firms_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_total_firms_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of output
    plot_data = gg.ggplot(data, gg.aes('time', 'average_output')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of output') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_output_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_output_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_output_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of age
    plot_data = gg.ggplot(data, gg.aes('time', 'average_age')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of age of firms') + gg.xlab('Years') + gg.ylab('Age of Firms')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_age_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_age_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_age_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average of size
    plot_data = gg.ggplot(data, gg.aes('time', 'average_size')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Average of size of firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_average_size_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_average_size_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_average_size_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # number of new firms
    plot_data = gg.ggplot(data, gg.aes('time', 'new_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Number of new firms') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_number_of_new_firms_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Number of firms out
    plot_data = gg.ggplot(data, gg.aes('time', 'exit_firms')) + gg.geom_line() + gg.scale_y_continuous(breaks=11) + \
                gg.scale_x_discrete(breaks=list_of_years_division, labels=list_of_years)\
                +gg.ggtitle('Number of firms out') + gg.xlab('Years') + gg.ylab('Units')+ gg.theme_bw()

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(parameters.OUTPUT_PATH,
                         ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                          (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    gg.ggsave(plot_data,
              os.path.join(parameters.OUTPUT_PATH,
                           ('temp_general_number_of_firms_out_%s_%s_%s.png' %
                            (decision, title_pop_val, parameters.final_Time))),
              width=width_var_plot,
              height=height_var_plot,
              units="in")

    # Average and max size of firms
    dat_merged = pd.concat([
        data.iloc[:, data.columns == 'average_effort'],
        data.iloc[:, data.columns == 'total_effort']
    ],
                           axis=1)

    plot_data = dat_merged.plot(
        title='Average and maximum effort of employees')
    plot_data.set_xlabel('Years')
    plot_data.set_ylabel('Values units of effort')
    plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plot_data.set_xticks(list_of_years_division)
    plot_data.set_xticklabels(list_of_years)
    plot_data.set_axis_bgcolor('w')
    fig = plot_data.get_figure()
    fig.set_size_inches(width_var_plot, height_var_plot)

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    fig.savefig(os.path.join(
        parameters.OUTPUT_PATH,
        ('temp_average_and_maximum_effort_of_firms_out_%s_%s_%s.png' %
         (decision, title_pop_val, parameters.final_Time))),
                dpi=dpi_var_plot)

    dat_merged = pd.concat([
        data.iloc[:, data.columns == 'average_size'],
        data.iloc[:, data.columns == 'max_size']
    ],
                           axis=1)

    plot_data = dat_merged.plot(title='Average and maximum size firms')
    plot_data.set_xlabel('Years')
    plot_data.set_ylabel('Number of employees')
    plot_data.legend(loc='center left', bbox_to_anchor=(1, 0.5))
    plot_data.set_xticks(list_of_years_division)
    plot_data.set_xticklabels(list_of_years)
    plot_data.set_axis_bgcolor('w')
    fig = plot_data.get_figure()
    fig.set_size_inches(width_var_plot, height_var_plot)

    # logical test for presence of plot, if is TRUE is deleted before save the new one
    if os.path.isfile(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time)))) is True:
        os.remove(
            os.path.join(
                parameters.OUTPUT_PATH,
                ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
                 (decision, title_pop_val, parameters.final_Time))))
    # saving the plot
    fig.savefig(os.path.join(
        parameters.OUTPUT_PATH,
        ('temp_average_size_and_maximum_of_firms_out_%s_%s_%s.png' %
         (decision, title_pop_val, parameters.final_Time))),
                dpi=dpi_var_plot)
Example #13
0
def plot_vol(dates, x, cp, my_domain):
    # -------------------- Prepare for Plotting -------------------------- #
    # Prepare DataFrame objects for graphing
    #Add a column for the label to show in the legend in the graph
    #Need to reshape it, from (124,) to (124,1) for exmple, so that it
    #will concatenate. This gives a df with [date, vol_data, 'Volume']
    v = ['Volume' for i in xrange(x.shape[0])]
    #df_domain = np.concatenate((x, v), axis=1)
    ndf_vol = np.transpose(np.array([dates, x, v]))
    df_vol = pd.DataFrame(ndf_vol, columns=['Date', 'Volume', 'Data'])

    #Create pre-allocated lists for plotting means and cp
    xmin_list = [0 for i in xrange(len(cp))]  #hold lft pt of vol_mean
    xmax_list = [0 for i in xrange(len(cp))]  #hold rt pt of vol_mean
    yint_list = [0 for i in xrange(len(cp))]  #holds vol_means
    cp_date_list = [0 for i in xrange(len(cp))]  #holds date for cp
    cp_value_list = [0 for i in xrange(len(cp))]  #holds cp value

    ref_idx = 0  #used to keep track of vol_means
    #collect list data for plotting
    for i in xrange(len(cp)):
        cp_idx = cp[i][0] - 1  #-1 b/c 1-indexed (includes cp itself)
        xmin_list[i] = dates[ref_idx].toordinal()  #convert to match ggplot
        xmax_list[i] = dates[cp_idx].toordinal()  #convert to match ggplot
        yint_list[i] = cp[i][2]  #use value from_mean for vol_mean
        cp_date_list[i] = dates[cp_idx]  #date of cp
        #cp_value_list[i] = x[cp_idx] #value of cp
        cp_value_list[i] = cp[i][2]
        ref_idx = cp_idx + 1  #+1 b/c moving to next point

    #Reform lists into a data frame and attach to df_domains. The first two
    #lists can be created together since they are both numeric, but if I try
    #to create all three together all types will be downgraded to strings.
    #np.concatenate avoids this conversion. The transpose is needed to take
    #an item from each to form a single row.
    cp_lbl = ['Change Point' for i in xrange(len(yint_list))]

    #Need to create a dummy entry to put 'Volume Mean' into legend
    cp_date_list.append(dates[0])
    yint_list.append(x[0])
    cp_lbl.append('Volume Mean')
    ndf_cp = np.transpose(np.array([cp_date_list, yint_list, cp_lbl]))
    yint_list.pop(-1)
    cp_date_list.pop(-1)
    df_cp = pd.DataFrame(ndf_cp, columns=['Date', 'Volume', 'Data'])

    df_plot = pd.concat((df_vol, df_cp), axis=0)

    #Need to create a dummy entry to put 'Volume Mean' into legend
    #dummy = np.array([dates[0], x[0], 'Volume Mean']).reshape(1,-1)
    #df_cp = np.concatenate( (df_cp, dummy), axis=0) #add to bottom df_cp
    #df_domain = np.concatenate( (df_domain, df_cp), axis=0 ) #add df_domains

    #convert final array into a pd.DataFrame for printing and plotting
    #df_domain = pd.DataFrame(df_domain, columns=['Date','Volume','Data'])
    #df_domain.to_html(open('out.html','w'))
    #os.system('sudo cp out.html /usr/local/www/analytics/rwing')

    margin = 0.10 * (np.max(x) - np.min(x))
    p = ggplot.ggplot(aes(x='Date', y='Volume', color='Data'), data=df_plot) + \
            ggplot.geom_line(color='blue',size=2) + \
            ggplot.geom_point(x=xmax_list, y=cp_value_list, color='black', \
                        shape='D', size=50) + \
            ggplot.geom_hline(xmin=xmin_list, \
                        xmax=xmax_list, \
                        yintercept=yint_list, color="red", size=3) + \
            ggplot.scale_x_date(labels = date_format("%Y-%m-%d"), breaks="1 week") + \
            ggplot.scale_colour_manual(values = ["black", "blue", "red"]) + \
            ggplot.scale_y_continuous(labels='comma') + \
            ggplot.ylim(low=np.min(x)-margin/4.0, high=np.max(x)+margin) + \
            ggplot.xlab("Week (Marked on Mondays)") + \
            ggplot.ylab("Message Vol") + \
            ggplot.ggtitle("%s\nMessage Volume by Week" % my_domain) + \
            ggplot.theme_seaborn()

    return p
Example #14
0
def main():
    parser = argparse.ArgumentParser(description="Draws displacement plots.",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--limits', type=int, help="Maximum extent of the axes")
    parser.add_argument('--no-plots', action='store_true', help="Don't save plots")
    parser.add_argument('--summary', help='Save summary stats by file')
    parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots")
    parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float,
                        help="Pixels per µm (length scale of tracked images)")
    parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float,
                        help="Minutes between each frame of the tracked images")
    parser.add_argument('--plot-titles', type=argparse.FileType('r'),
                        help="CSV file with filename and title columns")
    parser.add_argument('--style', action='append', default=[],
                        choices=['theme-bw', 'no-terminal-dot'],
                        help='Change style options for the plot.')
    parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int,
                        metavar=('start', 'end', 'step'),
                        help="Beginning and end tick breaks on displacement plots")
    parser.add_argument('--plot-text', type=int, default=8,
                        help='Plot text size (pt)')
    parser.add_argument('--plot-height', type=float, default=1.81,
                        help='Plot height (in)')
    parser.add_argument('--plot-width', type=float, default=2.5,
                        help='Plot width (in)')
    parser.add_argument('infile', nargs='+', help="File(s) to process.")
    args = parser.parse_args()

    style = {argument: True for argument in args.style}

    plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None

    all_dfs = []
    for filename in args.infile:
        # there has to be a better pattern for this
        try:
            df = read_mtrackj_mdf(filename)
        except ValueError:
            try:
                df = read_mtrack2(filename)
            except Exception:
                df = read_manual_track(filename)
        centered = center(df)
        centered.to_csv(filename + '.centered')
        if not args.no_plots:
            g = displacement_plot(centered, limits=args.limits, style=style)
            g += gg.theme(axis_text=gg.element_text(size=args.plot_text))
            g += gg.labs(x='px', y='px')
            if args.tick_breaks:
                g += gg.scale_x_continuous(breaks=range(*args.tick_breaks))
                g += gg.scale_y_continuous(breaks=range(*args.tick_breaks))
            if plot_titles is not None and filename in plot_titles.index:
                g += gg.labs(title=plot_titles.ix[filename, 'title'])
            g.save('{}.{}'.format(filename, args.imagetype),
                   width=args.plot_width, height=args.plot_height)
        centered['filename'] = filename
        all_dfs.append(centered)
    mega_df = pd.concat(all_dfs, ignore_index=True)
    stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron,
                                time_scale=args.minutes_per_frame)
    obj_stats = (mega_df.groupby('filename', sort=False)
                        .apply(stats_for)
                        .reset_index())
    summary_by_file = obj_stats.groupby('filename').apply(summary)
    if args.summary:
        summary_by_file.to_csv(args.summary, index=False)
    print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime()))
    print("# {} pixels per micron, {} minutes per frame".
          format(args.pixels_per_micron, args.minutes_per_frame))
    print("# distance units are microns; velocity units are microns/hour")
    obj_stats.to_csv(sys.stdout, index=False)