Exemple #1
0
	def bar_chart(self, conn, column1, column2, table_chosen, title):
		# since this is a bar graph only two columns will be there

		data_df = dfile.double_selector(conn = conn, table= table_chosen, col1 = column1, col2 = column2)

		bar_plot = ggplot(aes(x=column1, weight=column2), data=data_df) + geom_bar() + labs(title=title)
		print(bar_plot)
def plot_matches(df_in,
                 date,
                 filename_out,
                 x_var='date_time',
                 y_var="shorthand_search_vol"):
    """
    Plot y-var and save based on specified variables.

    Assumes that df has already been filtered using dplyr's sift mechanism.
    Also assumes that a date has been passed in.
    """
    # basic data processing for viz
    df_in['date_time'] = date + " " + df_in['time'].astype(str)
    df_in['date_time'] = pd.to_datetime(df_in['date_time'],
                                        errors="coerce",
                                        infer_datetime_format=True)

    # build layers for plot
    p = ggplot(aes(x=x_var, y=y_var, group="match_id", color="match_id"),
               data=df_in)
    p += geom_line(size=2)

    # informative
    p += labs(x="time (gmt)", y="search volume (scaled to 100)")
    # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium")
    p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes")

    # visual
    t = theme_gray()
    t._rcParams['font.size'] = 8
    t._rcParams['font.family'] = 'monospace'
    p += t

    # done
    p.save(filename_out, width=16, height=8)
Exemple #3
0
    def two_var_intr_effects(self, target, vars, nval=100, plot=True):
        """ Loads first level interactions.
        Args:
          target - Variable identifier (column name or number) specifying the
                   target variable
          vars - List of variable identifiers (column names or numbers) specifying
                 other selected variables. Must not contain target
          nval - Number of evaluation points used for calculation.
          plot - Determines whether or not to plot results.
        Returns: Pandas dataframe of interaction effects
        """
        # Check if null.models have already been generated
        check_str = """
                function(){
                  if(exists("null.models")){
                    return(T)
                  } else {
                    return(F)
                  }
                }
                """
        if not robjects.r(check_str)()[0]:
            self.logger.info(
                'Null models not generated, generating null models '
                '(n=10)')
            self._generate_interaction_null_models(10, quiet=False)

        int_str = """
              function(target, vars, nval){
                interactions <- twovarint(tvar=target, vars=vars, null.models, 
                                          nval=nval, plot=F)
              }
              """
        # Check the input type. If int, add one, if string do nothing.
        target = target if type(target) is str else target + 1
        vars = [var if type(var) is str else var + 1 for var in vars]
        r_interact = robjects.r(int_str)(target,
                                         robjects.Vector(np.array(vars)), nval)
        interact = pd.DataFrame(
            {
                'interact_str': list(r_interact[0]),
                'exp_null_int': list(r_interact[1]),
                'std_null_int': list(r_interact[2])
            },
            index=vars)

        if plot:
            int_effects = interact.reset_index().rename(
                columns={'index': 'vars'})
            int_effects_m = pd.melt(
                int_effects,
                id_vars='vars',
                value_vars=['interact_str', 'exp_null_int'])
            p = gg.ggplot(gg.aes(x='vars', fill='variable', weight='value'),
                          data=int_effects_m) \
                + gg.geom_bar() \
                + gg.labs(
                    title='Two-var interaction effects - {}'.format(target))
            print(p)
        return interact
Exemple #4
0
 def plotAverageLatency(self):
     averages = [d.averageLatency() for d in self.data]
     dat = {"device": range(1, len(averages) + 1), "average": averages}
     dataframe = pandas.DataFrame(dat)
     chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \
       + ggplot.labs(title="Average Latency Per Device") + \
       ggplot.ylab("Average Latency (ms)") + \
       ggplot.xlab("Device Number")  + \
       ggplot.geom_bar(stat="identity")
     chart.show()
	def plotAverageLatency(self):
		averages = [d.averageLatency() for d in self.data]
		dat = { "device" : range(1, len(averages) + 1), "average" : averages }
		dataframe = pandas.DataFrame(dat)
		chart = ggplot.ggplot(ggplot.aes(x="device", weight="average"), dataframe) \
				+ ggplot.labs(title="Average Latency Per Device") + \
				ggplot.ylab("Average Latency (ms)") + \
				ggplot.xlab("Device Number")  + \
				ggplot.geom_bar(stat="identity")
		chart.show()
Exemple #6
0
def plot_timeline(scenes):
    # Plot character vs scene timelime
    # NB: due to limitations in Python ggplot we need to plot with scene on y-axis
    # in order to label x-ticks by character.
    # scale_x_continuous and scale_y_continuous behave slightly differently.

    print (gg.ggplot(gg.aes(y='scene', x='character_code'), data=scenes) +
            gg.geom_point() + gg.labs(x='Character', y='Scene') +
           gg.scale_x_continuous(
               labels=scenes['character'].cat.categories.values.tolist(),
           breaks=range(len(scenes['character'].cat.categories))) +
           gg.theme(axis_text_x=gg.element_text(angle=30, hjust=1, size=10)))
Exemple #7
0
    def boxplot(self, conn, column, table_chosen, title):

        data_df = dfile.single_selector(conn=conn,
                                        table=table_chosen,
                                        column=column)

        box_plot = ggplot(
            aes(x=column),
            data=data_df) + geom_boxplot() + theme_gray() + labs(title=title)
        now = datetime.datetime.now()
        b = now
        print(b)
        print(b - a)
        print(box_plot)
Exemple #8
0
    def line_chart(self, conn, column1, column2, table_chosen, title):

        data_df = dfile.double_selector(conn=conn,
                                        table=table_chosen,
                                        col1=column1,
                                        col2=column2)

        line_plot = ggplot(
            aes(y=column2, x=column1),
            data=data_df) + geom_line() + theme_gray() + labs(title=title)
        now = datetime.datetime.now()
        b = now
        print(b)
        print(b - a)
        print(line_plot)
Exemple #9
0
def plot_predictions(date_times, actual_values, predictions, match_id,
                     feature_set_in, filename):
    """
    Plot y-var and save based on specified variables.

    Assumes that df has already been filtered using dplyr's sift mechanism.
    Also assumes that a date has been passed in.
    """
    actual_df = pd.DataFrame()
    actual_df['date_time'] = pd.to_datetime(date_times,
                                            errors="coerce",
                                            infer_datetime_format=True)
    actual_df['search_vol'] = actual_values
    actual_df['match_id'] = "actual" + match_id

    predict_df = pd.DataFrame()
    predict_df['date_time'] = pd.to_datetime(date_times,
                                             errors="coerce",
                                             infer_datetime_format=True)
    predict_df['search_vol'] = list(predictions)
    predict_df['match_id'] = "predictedby_" + str(feature_set_in) + match_id

    plotting_df = pd.concat([actual_df, predict_df], axis=0, ignore_index=True)

    # build layers for plot
    p = ggplot(aes(x='date_time',
                   y='search_vol',
                   group="match_id",
                   color="match_id"),
               data=plotting_df)
    p += geom_line(size=2)

    # informative
    p += labs(x="time (gmt)", y="search volume (scaled to 100)")
    # p += ggtitle("man. city (h) vs. chelsea (a)\naug. 8 '16, etihad stadium")
    p += scale_x_date(labels=date_format("%H:%M:%S"), date_breaks="30 minutes")

    # visual
    t = theme_gray()
    t._rcParams['font.size'] = 8
    t._rcParams['font.family'] = 'monospace'
    p += t

    # done
    p.save(filename, width=16, height=8)
Exemple #10
0
def graph1(score_data):
    """ Average score as time goes on;
        Creates and returns graph 1, a line graph. """

    date_column = score_data[0][find_time_stamp(score_data)]

    data = DataFrame(score_data[1:], columns=score_data[0])

    # Get all columns that arlabels = date_format("%Y-%m-%d")e numerical
    # questions so we know what to graph
    num_questions = data.select_dtypes(include=['int64']).columns.values

    # Melt data so that each question is in a seperate row
    new_data = pd.melt(data,
                       id_vars=date_column,
                       value_vars=num_questions,
                       var_name="Question",
                       value_name="Score")

    # Convert date string into an actual date type
    new_data[date_column] = pd.to_datetime(new_data[date_column],
                                           format="%m/%d/%Y")

    # Group all rows with same date and question, and then take the average.
    new_data = new_data.groupby([date_column, 'Question']).mean().reset_index()
    new_data['All'] = "Indiviual Questions"

    new_data2 = new_data.groupby(date_column).mean().reset_index()
    new_data2['Question'] = "All Questions"
    new_data2['All'] = "Average of All Questions"

    new_data = pd.concat([new_data, new_data2])

    new_data[date_column] = new_data[date_column].astype('int64')

    # Create time graph with seperate lines for each question
    ret = ggplot.ggplot(ggplot.aes(x=date_column, y="Score", colour="Question"), new_data) +\
        ggplot.geom_point() +\
        ggplot.geom_line() +\
        ggplot.facet_grid("All") +\
        ggplot.scale_x_continuous(labels=[""], breaks=0) +\
        ggplot.labs(x="Time", y="Average Question Score") +\
        ggplot.ggtitle("Question Scores Over Time")
    return ret
Exemple #11
0
    def area_chart(self, conn, column1, column2, table_chosen, title):

        data_df = dfile.double_selector(conn=conn,
                                        table=table_chosen,
                                        col1=column1,
                                        col2=column2)

        ymin = float(
            input("Enter the minimum value that should be plotted:  "))
        ymax = float(
            input("Enter the maximum value that should be plotted:  "))

        area_plot = ggplot(
            aes(x=column2, ymin=ymin, ymax=ymax),
            data=data_df) + geom_area() + theme_gray() + labs(title=title)
        now = datetime.datetime.now()
        b = now
        print(b)
        print(b - a)
        print(area_plot)
Exemple #12
0
    }
    df = pd.DataFrame.from_dict(values_dict, orient='index')
    df = df.transpose()
    df = pd.melt(df)
    df['feature'] = feature
    dfs_to_concat.append(df)

master_df = pd.concat(dfs_to_concat)

# histogram
p = ggplot(aes(x='value', fill='variable', color='variable'), data=master_df)
p += geom_histogram(bins=25, alpha=0.5)
p += scale_x_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitude distribution")
p += facet_wrap("feature", ncol=3, scales="free")
p += labs(x=" ", y=" ")

# visuals
t = theme_gray()
t._rcParams['font.size'] = 10
t._rcParams['font.family'] = 'monospace'

p += t
p.save("arima_1/" + "histogram.png")

# boxplot
p = ggplot(aes(x='variable', y='value'), data=master_df)
p += geom_boxplot()
p += scale_y_continuous(limits=(-25, 25))
p += ggtitle("sarimax coefficient magnitudes")
p += facet_wrap("feature", ncol=3)
Exemple #13
0
	def point_chart(self, conn, column1, column2, table_chosen, title):

		data_df = dfile.double_selector(conn=conn, table=table_chosen, col1=column1, col2=column2)

		point_plot = ggplot(aes(x=column1, y=column2), data=data_df) + geom_point() + theme_gray() + labs(title=title)
		print(point_plot)
Exemple #14
0
	def hist_chart(self, conn, column, table_chosen, title):

		data_df = dfile.single_selector(conn = conn, table = table_chosen, column = column)

		hist_plot = ggplot(aes(x=column), data=data_df) + geom_histogram() + theme_gray() + labs(title=title)
		print(hist_plot)
                                             (vcfdf['TestBias']=='Pass') &
                                             (vcfdf['CHROM']==reference) ]['Pi']))
    return testwindows



# Generate new dataframe with analyses performed per window
if options.graphics == True:
	print "Analysing by "+ str(windowsize) +"sliding windows and generating plots"
	windowed_df = pd.DataFrame({'window':sorted(list(set(vcfdf['window']))),
        	                   'MaxMinor':windowMax(sorted(list(set(vcfdf['window'])))),
                	           'Pi':windowPi(sorted(list(set(vcfdf['window']))))})


# Now try and plot graph
	p_MaxMinor = gg.ggplot(gg.aes('window', 'MaxMinor'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Minor Variant Frequency (%)") +gg.ggtitle(vcfoutput + "\n Valid Minor Variant Sites :" + str(len(minorvar))) 


# Plot Nucleotide Diversity (Pi) along genome 
	p_pi =gg.ggplot(gg.aes('window', 'Pi'),data=windowed_df) +gg.geom_point() +gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")", y="Mean nucleotide diversity (" + u"\u03c0" +")") +gg.scale_y_continuous(expand=(0,0),limits=(0, windowed_df['Pi'].max(axis=0)+0.001)) +gg.ggtitle(vcfoutput + "\n Genome-wide Mean Nucleotide Diversity (" +u"\u03c0"+ ") :" +str(round(gw_Pi,6))) 

#p_pi

# Facetted plot (still not sorted y axes labels yet)
	windowed_df_melt = pd.melt(windowed_df, id_vars=['window'])
	p_combi = gg.ggplot(gg.aes('window', 'value',colour='variable'),data=windowed_df_melt)
	p_combi = p_combi + gg.geom_point(colour='variable') + gg.facet_grid('variable',scales='free_y')+gg.theme_bw() +gg.labs(x="Genome Position (bp; windowsize="+ str(windowsize) +")")

# Print graphs to .png
	p_combi.save(vcfinput + ".MinorVar_combo.png")
	p_MaxMinor.save(vcfinput + ".MinorVar.png")
Exemple #16
0
	def density_chart(self, conn, column, table_chosen, title):

		data_df = dfile.single_selector(conn=conn, table=table_chosen, column=column)

		density_plot = ggplot(aes(x=column), data=data_df) + geom_density() + theme_gray() + labs(title=title)
		print(density_plot)
Exemple #17
0
def main():
    parser = argparse.ArgumentParser(description="Draws displacement plots.",
                                     formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('--limits', type=int, help="Maximum extent of the axes")
    parser.add_argument('--no-plots', action='store_true', help="Don't save plots")
    parser.add_argument('--summary', help='Save summary stats by file')
    parser.add_argument('--imagetype', '-i', default='png', help="Extension to use for plots")
    parser.add_argument('--pixels-per-micron', '--pixels', '-p', default=1.51, type=float,
                        help="Pixels per µm (length scale of tracked images)")
    parser.add_argument('--minutes-per-frame', '--minutes', '-m', default=10, type=float,
                        help="Minutes between each frame of the tracked images")
    parser.add_argument('--plot-titles', type=argparse.FileType('r'),
                        help="CSV file with filename and title columns")
    parser.add_argument('--style', action='append', default=[],
                        choices=['theme-bw', 'no-terminal-dot'],
                        help='Change style options for the plot.')
    parser.add_argument('--tick-breaks', '--ticks', '-t', nargs=3, type=int,
                        metavar=('start', 'end', 'step'),
                        help="Beginning and end tick breaks on displacement plots")
    parser.add_argument('--plot-text', type=int, default=8,
                        help='Plot text size (pt)')
    parser.add_argument('--plot-height', type=float, default=1.81,
                        help='Plot height (in)')
    parser.add_argument('--plot-width', type=float, default=2.5,
                        help='Plot width (in)')
    parser.add_argument('infile', nargs='+', help="File(s) to process.")
    args = parser.parse_args()

    style = {argument: True for argument in args.style}

    plot_titles = pd.read_csv(args.plot_titles, index_col="filename") if args.plot_titles else None

    all_dfs = []
    for filename in args.infile:
        # there has to be a better pattern for this
        try:
            df = read_mtrackj_mdf(filename)
        except ValueError:
            try:
                df = read_mtrack2(filename)
            except Exception:
                df = read_manual_track(filename)
        centered = center(df)
        centered.to_csv(filename + '.centered')
        if not args.no_plots:
            g = displacement_plot(centered, limits=args.limits, style=style)
            g += gg.theme(axis_text=gg.element_text(size=args.plot_text))
            g += gg.labs(x='px', y='px')
            if args.tick_breaks:
                g += gg.scale_x_continuous(breaks=range(*args.tick_breaks))
                g += gg.scale_y_continuous(breaks=range(*args.tick_breaks))
            if plot_titles is not None and filename in plot_titles.index:
                g += gg.labs(title=plot_titles.ix[filename, 'title'])
            g.save('{}.{}'.format(filename, args.imagetype),
                   width=args.plot_width, height=args.plot_height)
        centered['filename'] = filename
        all_dfs.append(centered)
    mega_df = pd.concat(all_dfs, ignore_index=True)
    stats_for = lambda x: stats(x, length_scale=args.pixels_per_micron,
                                time_scale=args.minutes_per_frame)
    obj_stats = (mega_df.groupby('filename', sort=False)
                        .apply(stats_for)
                        .reset_index())
    summary_by_file = obj_stats.groupby('filename').apply(summary)
    if args.summary:
        summary_by_file.to_csv(args.summary, index=False)
    print("# Produced by {} at {}".format(' '.join(sys.argv), time.ctime()))
    print("# {} pixels per micron, {} minutes per frame".
          format(args.pixels_per_micron, args.minutes_per_frame))
    print("# distance units are microns; velocity units are microns/hour")
    obj_stats.to_csv(sys.stdout, index=False)