Python geom_line 예제들, plotnine.geom_line Python 예제들

예제 #1

0

파일 보기

    def plot_replicate_groups(self):
        from plotnine import ggplot, aes, ylab, xlab, geom_line, scale_y_continuous, geom_col, geom_point
        df1 = self.data1df
        df2 = self.data2df

        df1.insert(0, 'Experiment', '1')
        df2.insert(0, 'Experiment', '2')

        #len1 = len(df1.index)
        #len2 = len(df2.index)

        #print len1-len2
        #exit()

        #if len1 > len2:
        #    df1 = df1.drop(df1.tail(len1 - len2).index, inplace=True)
        #else:
        #    df2 = df2.drop(df2.tail(len2 - len1).index, inplace=True)

        # df = pd.concat([df1, df2])

        print(df1)
        print(df2)

        plot = ((ggplot() + ylab(u'Current (μA)') + xlab('Time (seconds)') +
                 geom_line(df1, aes('Time', 'Current', color='Channel')) +
                 geom_line(df2, aes('Time', 'Current', color='Channel'))))

        print(plot)
        return plot

예제 #2

0

파일 보기

def plot_predict(forecast):
    p = (ggplot(data=forecast, mapping=aes(x='ds', y='y')) +
         geom_point(colour='blue', alpha=0.3, na_rm=True) +
         geom_line(colour='blue', na_rm=True) + geom_line(
             data=forecast, mapping=aes(x='ds', y='yhat'), colour='red') +
         geom_ribbon(data=forecast,
                     mapping=aes(ymin='yhat_lower', ymax='yhat_upper'),
                     fill='blue',
                     alpha=0.1) +
         scale_x_datetime(breaks='1 days', date_labels='%y-%m-%d %H:%M') +
         xlab('Time') + ylab('Pressure') + theme_bw() +
         theme(axis_text_x=element_text(
             angle=45, hjust=1, face='bold', color='black'),
               axis_text_y=element_text(face='bold', colour='black')))

    ggplot.save(p,
                filename='predict_pressure_chart.png',
                path=os.path.join(os.path.abspath(os.path.dirname(__file__)),
                                  'png'),
                width=8,
                height=6,
                units='in',
                dpi=326,
                verbose=False)
    return p

예제 #3

0

파일 보기

def plot_individual_returns(
        df_in: pd.DataFrame,
        max_episode: int,
        return_column: str = 'episode_return',
        colour_var: Optional[str] = None,
        yintercept: Optional[float] = None,
        sweep_vars: Optional[Sequence[str]] = None) -> gg.ggplot:
    """Plot individual learning curves: one curve per sweep setting."""
    df = df_in.copy()
    df['unique_group'] = _make_unique_group_col(df, sweep_vars)
    p = (gg.ggplot(df) +
         gg.aes(x='episode', y=return_column, group='unique_group') +
         gg.coord_cartesian(xlim=(0, max_episode)))
    if colour_var:
        p += gg.geom_line(gg.aes(colour=colour_var), size=1.1, alpha=0.75)
        if len(df[colour_var].unique()) <= 5:
            df[colour_var] = df[colour_var].astype('category')
            p += gg.scale_colour_manual(values=FIVE_COLOURS)
    else:
        p += gg.geom_line(size=1.1, alpha=0.75, colour='#313695')
    if yintercept:
        p += gg.geom_hline(yintercept=yintercept,
                           alpha=0.5,
                           size=2,
                           linetype='dashed')
    return facet_sweep_plot(p, sweep_vars, tall_plot=True)

예제 #4

0

파일 보기

def plot_train_test(ags):
    frontiers = data.train_test(ags)
    frontiers, model = data.train_test_model(frontiers)

    labs = frontiers.sort_values('train_flops').groupby(
        'elo').first().reset_index()
    desc = f'log₁₀(test) = {model.params[1]:.1f} · log₁₀(train) + {model.params[2]:.1g} · elo + {model.params[0]:.0f}'

    return (
        pn.ggplot(
            frontiers,
            pn.aes(x='train_flops', y='test_flops', color='elo',
                   group='elo')) + pn.geom_line(size=.5, show_legend=False) +
        pn.geom_line(pn.aes(y='test_flops_hat'),
                     size=.25,
                     show_legend=False,
                     linetype='dashed')
        # + pn.geom_point(size=.5, show_legend=False)
        + pn.geom_text(pn.aes(label='elo.astype(int)'),
                       labs,
                       show_legend=False,
                       size=6,
                       nudge_y=+.2) + pn.scale_color_cmap(limits=(-1500, 0)) +
        pn.scale_x_continuous(trans='log10') +
        pn.scale_y_continuous(trans='log10') + pn.annotate(
            'text', 1.5e13, 5e9, label=desc, ha='left', size=6, family='serif')
        + pn.labs(x='Train-time compute (FLOPS-seconds)',
                  y='Test-time compute (FLOPS-seconds)') + plot.IEEE())

예제 #5

0

파일 보기

def multiplot(files, smooth=100, alpha=0.6, loss_padd=None):
    if not isinstance(files, dict):
        files = [files]

    def load_hist(entry):
        name, file = entry
        try:
            hist = np.loadtxt(file)
        except OSError:
            warn = "{} could not be loaded with np.loadtext({})."
            warnings.warn(warn.format(name, file), UserWarning)
            return name, None
        is_fine = np.isfinite(hist)
        if not any(is_fine):
            return name, None
        iters = np.where(is_fine)[0]
        hist = hist[is_fine]
        lb = min(hist)
        if loss_padd is not None and lb < 0:
            hist += loss_padd - lb
            lb = loss_padd
        ldf = pd.DataFrame({
            "loss": hist,
            "iteration": iters,
            "model": [name] * len(hist)
        })
        if smooth is not False:
            if lb > 0:
                ldf["sloss"] = np.exp(
                    gaussian_filter1d(np.log(hist), sigma=smooth))
            else:
                ldf["sloss"] = gaussian_filter1d(hist, sigma=smooth)
        return name, ldf

    tasks = list(files.items())
    df = pd.DataFrame()
    with mp.Pool() as pool:
        for name, ldf in tqdm(pool.imap(load_hist, tasks),
                              total=len(tasks),
                              desc="models"):
            if ldf is not None:
                df = df.append(ldf)

    def breaks(limits):
        ll = np.log10(limits)
        if (ll[1] - ll[0]) > 3:
            ll = np.round(ll)
            ex = np.linspace(ll[0], ll[1], 10)
            ex = np.round(ex)
        else:
            ex = np.linspace(ll[0], ll[1], 10)
        return 10.0**ex

    pl = (pn.ggplot(pn.aes("iteration", "loss", color="model"), df) +
          pn.geom_line(alpha=alpha) + pn.scale_y_log10() + pn.theme_minimal())
    if smooth is not False:
        pl += pn.geom_line(pn.aes(y="sloss"), size=1, alpha=alpha)
    return pl, df

예제 #6

0

파일 보기

    def __call__(self, graph, *args, **kwargs):
        yvec = h.Vector()
        xvec = h.Vector()
        self._data.to_vector(yvec, xvec)
        if isinstance(graph, hoc.HocObject):
            return yvec.line(graph, xvec, *args)
        str_type_graph = str(type(graph))
        if str_type_graph == "<class 'plotly.graph_objs._figure.Figure'>":
            # plotly figure
            import plotly.graph_objects as go

            kwargs.setdefault("mode", "lines")
            return graph.add_trace(go.Scatter(x=xvec, y=yvec, *args, **kwargs))
        if str_type_graph == "<class 'plotnine.ggplot.ggplot'>":
            # ggplot object
            import plotnine as p9
            import pandas as pd

            return graph + p9.geom_line(*args,
                                        data=pd.DataFrame({
                                            "x": xvec,
                                            "y": yvec
                                        }),
                                        mapping=p9.aes(x="x", y="y"),
                                        **kwargs)
        str_graph = str(graph)
        if str_graph.startswith("<module 'plotly' from "):
            # plotly module
            import plotly.graph_objects as go

            fig = go.Figure()
            kwargs.setdefault("mode", "lines")
            return fig.add_trace(go.Scatter(x=xvec, y=yvec, *args, **kwargs))
        if str_graph.startswith("<module 'plotnine' from "):
            # plotnine module (contains ggplot)
            import plotnine as p9
            import pandas as pd

            return p9.geom_line(*args,
                                data=pd.DataFrame({
                                    "x": xvec,
                                    "y": yvec
                                }),
                                mapping=p9.aes(x="x", y="y"),
                                **kwargs)
        if hasattr(graph, "plot"):
            # works with e.g. pyplot or a matplotlib axis
            return graph.plot(xvec, yvec, *args, **kwargs)
        if hasattr(graph, "line"):
            # works with e.g. bokeh
            return graph.line(xvec, yvec, *args, **kwargs)
        if str_type_graph == "<class 'matplotlib.figure.Figure'>":
            raise Exception(
                "plot to a matplotlib axis not a matplotlib figure")
        raise Exception("Unable to plot to graphs of type {}".format(
            type(graph)))

예제 #7

0

파일 보기

def plot_optimal_model_size(ags):
    from statsmodels.formula import api as smf

    results = {}
    for b, g in ags.groupby('boardsize'):
        ordered = g.sort_values('elo').copy()
        ordered['params'] = g.width**2 * g.depth

        left = np.log10(g.train_flops.min())
        right = np.log10(g.train_flops.max())
        for f in np.linspace(left, right, 11)[1:]:
            subset = ordered[ordered.train_flops <= 10**f]
            results[b, 10**f] = subset.params.iloc[-1]
    df = pd.Series(results).reset_index()
    df.columns = ['boardsize', 'approx_flops', 'params']

    model = smf.ols('np.log10(params) ~ np.log10(approx_flops) + 1', df).fit()

    left, right = np.log10(df.approx_flops.min()), np.log10(
        df.approx_flops.max())
    preds = pd.DataFrame({'approx_flops': 10**np.linspace(left, right, 21)})
    preds['params'] = 10**model.predict(preds)

    labs = df.sort_values('approx_flops').groupby(
        'boardsize').last().reset_index()
    labs['params'] = labs.apply(
        lambda r: df[df.approx_flops <= r.approx_flops].params.max(), axis=1)

    points = df.sort_values('approx_flops').groupby(
        'boardsize').last().reset_index()

    desc = f'log₁₀(params) = {model.params[1]:.2f} · log₁₀(compute) − {-model.params[0]:.1f}'

    return (
        pn.ggplot(df, pn.aes(x='approx_flops', y='params')) +
        pn.geom_line(pn.aes(color='factor(boardsize)', group='boardsize'),
                     show_legend=False) +
        pn.geom_line(data=preds, linetype='dashed', size=.25) +
        pn.geom_point(pn.aes(color='factor(boardsize)', group='boardsize'),
                      data=points,
                      size=.5,
                      show_legend=False) +
        pn.geom_text(pn.aes(
            color='factor(boardsize)', group='boardsize', label='boardsize'),
                     data=labs,
                     nudge_y=+.5,
                     show_legend=False,
                     size=6) +
        pn.annotate(
            'text',
            1e9, 2e7, label=desc, ha='left', size=6, family='serif') +
        pn.scale_x_continuous(trans='log10') +
        pn.scale_y_continuous(trans='log10') + pn.scale_color_hue(l=.4) +
        pn.labs(x='Train-time compute (FLOPS-seconds)',
                y='Optimal model size (params)') + plot.IEEE())

예제 #8

0

파일 보기

파일: graphics.py 프로젝트: Nico011/proyecto-trigo2

def ranges_graphics(target, signatures_long, ranges, hydro_state, year,
                    algorithm):
    if ranges is None:
        return

    alg = ''
    if algorithm == "boruta":
        alg = "Boruta"
    elif algorithm == "lasso":
        alg = "LASSO"
    elif algorithm == "kbestcorr":
        alg = "SelectKBest (correlation)"
    elif algorithm == "kbestmi":
        alg = "SelectKBest (mutual information)"
    elif algorithm == "ga":
        alg = "Genetic Algorithm"

    signatures_long["wavelength"] = pandas.to_numeric(
        signatures_long["wavelength"])
    # signatures_long["value"] = pandas.to_numeric(signatures_long["value"])

    y_max = signatures_long["value"].max()

    graph_signatures = ggplot(signatures_long) \
        + theme(legend_position = "none") \
        + aes(x = "wavelength", y = "value", color = "variable") \
        + labs(
            x = "Wavelength (nm)",
            y = "Reflectance (%)",
            title = f"Ranges in signature: {target} {hydro_state} {alg}, {year}.",
            subtitle = f"{alg}, {hydro_state} set."
            )

    if ranges is not None:
        for i in range(len(ranges)):
            i_range = []
            for j in range(len(ranges[i])):
                # graph_signatures = graph_signatures + geom_vline(xintercept = ranges[i][j], color="black", alpha = 0.2)
                i_range.append(ranges[i][j])
            graph_signatures = graph_signatures + geom_rect(aes(
                xmin=i_range[0], xmax=i_range[1], ymin=0.0, ymax=y_max),
                                                            fill="steelblue",
                                                            alpha=0.1,
                                                            color=None)
        graph_signatures = graph_signatures + geom_line()

    else:
        graph_signatures = graph_signatures + geom_line()

    print(graph_signatures)
    graph_signatures.save(filename=os.path.join(
        PLOT_DIR, f"{hydro_state}-{year}-{algorithm}-ranges-{target}"))

    return

예제 #9

0

파일 보기

def plot_convergence(pile):
    stops = range(100, int(len(pile) / 10), utils.bills_per_pound)
    dist_stats = pd.DataFrame([get_sample_dist(pile, size) for size in stops])

    return (
        pn.ggplot(dist_stats) + pn.geom_line(pn.aes(x='size', y='mean')) +
        pn.geom_line(
            pn.aes(x='size', y='lower'), color='#FF5500', linetype='dotted') +
        pn.geom_line(
            pn.aes(x='size', y='upper'), color='#FF5500', linetype='dotted') +
        pn.scale_x_continuous(breaks=stops) +
        pn.theme(axis_text_x=pn.element_text(angle=270, hjust=1)))

예제 #10

0

파일 보기

파일: graph_utils.py 프로젝트: holdenlee/Online_Sampling

def plot_results(results):
    df = pd.DataFrame(results[0])
    #https://stackoverflow.com/questions/39092067/pandas-dataframe-convert-column-type-to-string-or-categorical
    df['agent_id'] = df.agent_id.astype('category')
    print(
        gg.ggplot(df) +
        gg.aes('t', 'cum_regret', color='agent_id', group='agent_id') +
        gg.geom_point() + gg.geom_line())
    print(
        gg.ggplot(df) +
        gg.aes('t', 'time', color='agent_id', group='agent_id') +
        gg.geom_point() + gg.geom_line())

예제 #11

0

파일 보기

def estimate_cutoffs_plot(output_file,
                          df_plt,
                          df_cell_estimate_cutoff,
                          df_fit=None,
                          scale_x_log10=False,
                          save_plot=True):
    """Plot UMI counts by sorted cell barcodes."""
    if min(df_plt['umi_counts']) <= 0:
        fix_log_scale = min(df_plt['umi_counts']) + 1
        df_plt['umi_counts'] = df_plt['umi_counts'] + fix_log_scale
    gplt = plt9.ggplot()
    gplt = gplt + plt9.theme_bw()
    if len(df_plt) <= 50000:
        gplt = gplt + plt9.geom_point(mapping=plt9.aes(x='barcode',
                                                       y='umi_counts'),
                                      data=df_plt,
                                      alpha=0.05,
                                      size=0.1)
    else:
        gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='barcode',
                                                      y='umi_counts'),
                                     data=df_plt,
                                     alpha=0.25,
                                     size=0.75,
                                     color='black')
    gplt = gplt + plt9.geom_vline(mapping=plt9.aes(xintercept='n_cells',
                                                   color='method'),
                                  data=df_cell_estimate_cutoff,
                                  alpha=0.75,
                                  linetype='dashdot')
    gplt = gplt + plt9.scale_color_brewer(palette='Dark2', type='qual')
    if scale_x_log10:
        gplt = gplt + plt9.scale_x_continuous(
            trans='log10', labels=comma_labels, minor_breaks=0)
    else:
        gplt = gplt + plt9.scale_x_continuous(labels=comma_labels,
                                              minor_breaks=0)
    gplt = gplt + plt9.scale_y_continuous(
        trans='log10', labels=comma_labels, minor_breaks=0)
    gplt = gplt + plt9.labs(title='',
                            y='UMI counts',
                            x='Barcode index, sorted by UMI count',
                            color='Cutoff')
    # Add the fit of the droplet utils model
    if df_fit:
        gplt = gplt + plt9.geom_line(mapping=plt9.aes(x='x', y='y'),
                                     data=df_fit,
                                     alpha=1,
                                     color='yellow')
    if save_plot:
        gplt.save('{}.png'.format(output_file), dpi=300, width=5, height=4)
    return gplt

예제 #12

0

파일 보기

    def plot_means(self):
        """
        Plots means of the two experiments vs time

        """
        from plotnine import ggplot, aes, ylab, xlab, geom_line
        df = self.t_test_df

        plot = ((ggplot(df, aes('Time', 'Mean 1')) +
                 ylab(u'Average Current (μA)') + xlab('Time (seconds)') +
                 geom_line() + geom_line(aes('Time', 'Mean 2'))))

        print(plot)
        return plot

예제 #13

0

파일 보기

    def plot_anova(self):
        """
        Plots F-value and P-value vs time

        """
        from plotnine import ggplot, aes, ylab, xlab, geom_line
        df = self.anova_df

        plot = ((ggplot(df, aes('Time', 'F-Value')) + ylab('F-Value') +
                 xlab('Time (seconds)') + geom_line() +
                 geom_line(aes('Time', 'P-Value'), color='red')))

        print(plot)
        return plot

예제 #14

0

파일 보기

    def plot_standard_deviations(self):
        """
        Plots standard deviation of two experiments vs time.

        """
        from plotnine import ggplot, aes, ylab, xlab, geom_line
        df = self.t_test_df

        plot = ((ggplot(df, aes('Time', 'Standard Deviation 1')) +
                 ylab('Standard Deviation') + xlab('Time (seconds)') +
                 geom_line() + geom_line(aes('Time', 'Standard Deviation 2'))))

        print(plot)
        return plot

예제 #15

0

파일 보기

    def plot_t_test(self):
        """
        Plots p-value vs time

        """
        from plotnine import ggplot, aes, ylab, xlab, geom_line, scale_y_continuous
        df = self.t_test_df

        plot = ((ggplot(df, aes('Time', 'P Value')) + ylab('P Value') +
                 xlab('Time (seconds)') + geom_line() +
                 scale_y_continuous(breaks=np.linspace(0, 0.0000005, 21)) +
                 geom_line(aes('Time', 'Significance'), color='red')))

        print(plot)
        return plot

예제 #16

0

파일 보기

    def plot_time_curve_with_threshold(self):
        toplot = self.aggregated.melt(
            id_vars='hour',
            value_vars=['number_bacteria', 'number_actin'],
            value_name='counts',
            var_name='Object')

        colors = self.create_color_list()

        myfig = (
            ggplot(toplot, aes("hour", "counts", color="Object")) +
            geom_point() + geom_line() + labels.xlab("Time [hours]") +
            labels.ylab("Average number of objects/nuclei") +
            pn.scale_colour_manual(values=colors,
                                   labels=list(self.sel_channel_time.value),
                                   name="") + pn.labs(colour="") +
            pn.scale_x_continuous(
                breaks=np.sort(self.result.hour.unique()),
                labels=list(np.sort(self.result.hour.unique()).astype(str))))

        self.time_curve_fig = myfig

        self.out_plot2.clear_output()
        with self.out_plot2:
            display(myfig)

예제 #17

0

파일 보기

    def plot_time_curve_by_channel(self, b=None):
        """Callback to polot time curve of number of bacteria/nuclei for
        each selected channel. Called by plot_time_curve_button."""

        if self.aggregated is None:
            self.data_aggregation()

        if len(self.sel_channel_time.value) == 0:
            print("Select at least one channel")
        else:
            subset = self.aggregated[self.aggregated.channel.isin(
                self.sel_channel_time.value)].copy(deep=True)
            subset.loc[:, "channel"] = subset.channel.astype(
                pd.CategoricalDtype(self.sel_channel_time.value, ordered=True))

            colors = self.create_color_list()

            myfig = (
                ggplot(subset, aes("hour", "normalized", color="channel")) +
                geom_point() + geom_line() + labels.xlab("Time [hours]") +
                labels.ylab("Average number of bacteria/nuclei") +
                pn.scale_colour_manual(
                    values=colors,
                    labels=list(self.sel_channel_time.value),
                    name="") + pn.labs(colour="") + pn.scale_x_continuous(
                        breaks=np.sort(self.result.hour.unique()),
                        labels=list(
                            np.sort(self.result.hour.unique()).astype(str))))

            self.time_curve_fig = myfig

            self.out_plot2.clear_output()
            with self.out_plot2:
                display(myfig)

예제 #18

0

파일 보기

def plot_seismograms(device_id):
    # Get earthquake date as datetime.datetime object
    eq_dt = AwsDataClient._get_dt_from_str(eq['date_utc'])
    print(eq_dt)
    ob = {
        "ti" : "2018-02-16 23:39:48"
    }
    time_format = '%Y-%m-%d %H:%M:%S'
    plots = []
    for axis in ['x', 'y', 'z']:
        plots.append(
            pn.ggplot(
                records_df[records_df['device_id'] == device_id],
                pn.aes('sample_dt', axis)
            ) + \
            pn.geom_line(color='blue') + \
            pn.scales.scale_x_datetime(
                date_breaks='1 minute',
                date_labels='%H:%M:%S'
            ) + \
            pn.geoms.geom_vline(

                xintercept= eq_dt,#datetime.strptime(ob["ti"], time_format),
                color='crimson'
            ) + \
            pn.labels.ggtitle(
                'device {}, axis {}'.format(
                    device_id, axis)
            )
        )

    # Now output the plots
    for p in plots:
        print(p)

예제 #19

0

파일 보기

def plot_fitting(x, y, resonance_frequency, parameter):
    """ Plots the phase response and the corresponding fit of the harmonic damped oscillator.

    Args:
        x (`float array`):                       X coordinates (frequency in kHz)
        y (`float array`):                       Y coordinates (phase in radians)
        resonance_frequency (`float array`):     Resonance frequency given by the fit of x and y
        parameter (`float array`):               Others parameters of function fit (Q factor, offset, linear background)

    Returns:
        p (`ggplot object`):                     Returns a ggplot object
    """

    y_fit = fit_function(x, resonance_frequency, parameter[0], parameter[1],
                         parameter[2])
    y_fit.name = 'Phase fit'
    x.name = 'Frequency (kHz)'
    y.name = 'Phase (rad)'
    data = concat([x, y, y_fit], axis=1)
    col_names = list(data)

    # Plot data
    p = ggplot(aes(x=col_names[0], y=col_names[1]), data=data) + \
        geom_point() + \
        geom_line(aes(x=col_names[0], y=col_names[2]),  color='red', size=0.5) + \
        theme_seaborn(style='ticks', context='talk', font_scale=0.75) + \
        theme(figure_size=(15, 7), strip_background=element_rect(fill='white'), axis_line_x=element_line(color='black'),
              axis_line_y=element_line(color='black'), legend_key=element_rect(fill='white', color='white'))
    return p

예제 #20

0

파일 보기

def plot_mass(calculated_cell_mass, plot_every_nth_point):
    """ Plots the resulting mass

    Args:
        calculated_cell_mass (`pandas data frame`):  Pandas data frame [Nx3] with time and calculated cell mass and
                                                     rolling mean averaged cell mass
        plot_every_nth_point (`int`):                If 1 all data points are plotted. Otherwise every nth data point is
                                                     used for plotting.

    Returns:
        p (`ggplot object`):                         Returns a ggplot plot object

    """

    col_names = list(calculated_cell_mass)
    col_names[0] = 'Time (h)'
    calculated_cell_mass.columns = col_names
    calculated_cell_mass = calculated_cell_mass.iloc[::plot_every_nth_point, :]

    # Plot data
    p = ggplot(aes(x=col_names[0], y=col_names[1]), data=calculated_cell_mass) + \
        geom_point(alpha=0.1) + \
        geom_line(aes(y=col_names[2]), color='red') + \
        theme_bw()
    return p

예제 #21

0

파일 보기

파일: propagating-signal.py 프로젝트: ramcdougal/634-parallel-exercises

def plot_it(t, concentration):
    data = pd.DataFrame({
        "x": range(len(y)),
        "t": f"t={t}",
        "concentration": concentration
    })
    return p9.geom_line(data=data, size=1)

예제 #22

0

파일 보기

파일: plots.py 프로젝트: Robinqiuau/asxtrade

def plot_key_stock_indicators(df, stock):
    assert isinstance(df, pd.DataFrame)
    assert all([
        'eps' in df.columns, 'pe' in df.columns, 'annual_dividend_yield'
        in df.columns
    ])

    df['volume'] = df['last_price'] * df[
        'volume'] / 1000000  # again, express as $(M)
    df['fetch_date'] = df.index
    plot_df = pd.melt(df,
                      id_vars='fetch_date',
                      value_vars=[
                          'pe', 'eps', 'annual_dividend_yield', 'volume',
                          'last_price'
                      ],
                      var_name='indicator',
                      value_name='value')
    plot_df['value'] = pd.to_numeric(plot_df['value'])
    plot_df['fetch_date'] = pd.to_datetime(plot_df['fetch_date'])

    plot = (
        p9.ggplot(plot_df, p9.aes('fetch_date', 'value', color='indicator')) +
        p9.geom_line(size=1.5, show_legend=False) +
        p9.facet_wrap('~ indicator', nrow=6, ncol=1, scales='free_y') +
        p9.theme(axis_text_x=p9.element_text(angle=30, size=7),
                 figure_size=(8, 7))
        #    + p9.aes(ymin=0)
        + p9.xlab("") + p9.ylab(""))
    return plot_as_inline_html_data(plot)

예제 #23

0

파일 보기

파일: metrices.py 프로젝트: fan-ADN/ml-shared

def plot_ROC(label_list, pred_list, names=None, **args):
    """
    複数の ROC 曲線をプロットする 
    :param: label_list: 正解ラベルリストの配列. [(y1, y2, ...), (y1, y2, ...)]  のようにして与える,  pred_list に対応させる
    :param: pred_list: 予測確率リストの配列. label_list と同じ長さにすること
    :param: names=None: モデルの名称. None または同じ長さにすること. 指定しない場合,
            ラベルの組が 2~3  ならば ['train', 'valid', 'test'] を与える. 3より多い場合は通し番号にする.
    :param args: sklearn.metrics.roc_curve に与えるパラメータ
    :return: plotnine オブジェクト
    """
    if names is None:
        if len(label_list) == 2:
            names = ('train', 'test')
        elif len(label_list) == 3:
            names = ('train', 'valid', 'test')
        else:
            names = list(range(len(label_list)))
    else:
        pass
    roc = [roc_curve(y, p, **args) for y, p in zip(label_list, pred_list)]
    fpr, tpr = tuple([list(chain.from_iterable(x)) for x in zip(*roc)][0:2])
    models = chain.from_iterable([[name] * l for name, l in zip(names, [len(x) for x, y, _ in roc])])
    d_roc = pd.DataFrame({'fpr': fpr, 'tpr': tpr, 'model': models})
    return ggplot(
            d_roc,
            aes(x='fpr', y='tpr', group='model', color='model')
    ) + geom_segment(x=0, y=0, xend=1, yend=1, linetype=':', color='grey'
    ) + geom_line(
    ) + scale_color_discrete(breaks=names
    ) + labs(x='false positive rate', y='true positive rate'
    ) + coord_equal(ratio=1, xlim=[0, 1], ylim=[0, 1]
    ) + theme_classic() + theme(figure_size=(4, 4))

예제 #24

0

파일 보기

파일: plot.py 프로젝트: zuzannna/ts_tutorial

def customized_algorithm_plot(experiment_name='finite_simple_sanity',
                              data_path=_DEFAULT_DATA_PATH):
    """Simple plot of average instantaneous regret by agent, per timestep.

  Args:
    experiment_name: string = name of experiment config.
    data_path: string = where to look for the files.

  Returns:
    p: ggplot plot
  """
    df = load_data(experiment_name, data_path)
    plt_df = (df.groupby(['t', 'agent']).agg({
        'instant_regret': np.mean
    }).reset_index())
    plt_df['agent_new_name'] = plt_df.agent.apply(rename_agent)

    custom_labels = ['Laplace TS', 'Langevin TS', 'TS', 'bootstrap TS']
    custom_colors = ["#E41A1C", "#377EB8", "#4DAF4A", "#984EA3"]

    p = (gg.ggplot(plt_df) +
         gg.aes('t', 'instant_regret', colour='agent_new_name') +
         gg.geom_line(size=1.25, alpha=0.75) + gg.xlab('time period (t)') +
         gg.ylab('per-period regret') + gg.scale_color_manual(
             name='agent', labels=custom_labels, values=custom_colors))
    return p

예제 #25

0

파일 보기

파일: plots.py 프로젝트: mappin/asxtrade

def plot_series(
        df,
        x=None,
        y=None,
        tick_text_size=6,
        line_size=1.5,
        y_axis_label="Point score",
        x_axis_label="",
        color="stock",
        use_smooth_line=False
):
    assert len(df) > 0
    assert len(x) > 0 and len(y) > 0
    assert line_size > 0.0
    assert isinstance(tick_text_size, int) and tick_text_size > 0
    assert y_axis_label is not None
    assert x_axis_label is not None
    args = {'x': x, 'y': y}
    if color:
        args['color'] = color
    plot = p9.ggplot(df, p9.aes(**args)) \
        + p9.labs(x=x_axis_label, y=y_axis_label) \
        + p9.theme(
            axis_text_x=p9.element_text(angle=30, size=tick_text_size),
            axis_text_y=p9.element_text(size=tick_text_size),
            legend_position="none",
        )
    if use_smooth_line:
        plot += p9.geom_smooth(size=line_size)
    else:
        plot += p9.geom_line(size=line_size)
    return plot_as_inline_html_data(plot)

예제 #26

0

파일 보기

파일: simulate_mtg_rounds.py 프로젝트: brschneedecker/mtg-rules-change-medium

def go_to_time_plot3(large_go_to_time_probs_new: list,
                     large_go_to_time_probs_old: list,
                     average_minutes_per_game_values: list):
    """ Plot go-to-time probability, old vs. new rules, no blowouts, 300 matches/round """

    large_time_prob_data = pd.DataFrame({
        'Average minutes per game':
        np.concatenate(
            [average_minutes_per_game_values,
             average_minutes_per_game_values]),
        'P(Go to time)':
        np.concatenate(
            [large_go_to_time_probs_new, large_go_to_time_probs_old]),
        'Rules':
        np.concatenate([
            np.repeat('New', len(average_minutes_per_game_values)),
            np.repeat('Old', len(average_minutes_per_game_values))
        ])
    })
    (plt.ggplot(
        large_time_prob_data,
        plt.aes(x='Average minutes per game', y='P(Go to time)',
                color='Rules')) + plt.geom_line() + plt.geom_point() +
     plt.ylim([0, 1]) + plt.theme_classic()).save(
         filename='figures/go_to_time_300_matches_prob_plot.png')

예제 #27

0

파일 보기

파일: main.py 프로젝트: unhoang/when_the_bootstrap_breaks

def plot_ci_eval(df):
    molten = pd.melt(df,
                     id_vars=['sample_size'],
                     value_vars=['bootstrap', 'ztest', 'ttest'])

    return (ggplot(molten, aes(x='sample_size', y='value', color='variable')) +
            geom_line() + scale_x_log10() + ylim(0, 1))

예제 #28

0

파일 보기

파일: analysis.py 프로젝트: stjordanis/bsuite

def _base_scaling(plt_df: pd.DataFrame,
                  sweep_vars: Optional[Sequence[str]] = None,
                  with_baseline: bool = True) -> gg.ggplot:
    """Base underlying piece of the scaling plots for deep sea."""
    p = (gg.ggplot(plt_df) + gg.aes(x='size', y='episode'))
    if np.all(plt_df.finished):
        p += gg.geom_point(gg.aes(colour='solved'), size=3, alpha=0.75)
    else:
        p += gg.geom_point(gg.aes(shape='finished', colour='solved'),
                           size=3,
                           alpha=0.75)
        p += gg.scale_shape_manual(values=['x', 'o'])

    if np.all(plt_df.solved):
        p += gg.scale_colour_manual(values=['#313695'])  # blue
    else:
        p += gg.scale_colour_manual(values=['#d73027',
                                            '#313695'])  # [red, blue]

    if with_baseline:
        baseline_df = _make_baseline(plt_df, sweep_vars)
        p += gg.geom_line(data=baseline_df,
                          colour='black',
                          linetype='dashed',
                          alpha=0.4,
                          size=1.5)
    return p

예제 #29

0

파일 보기

파일: plots.py 프로젝트: mappin/asxtrade

def plot_fundamentals(df, stock) -> str:
    assert isinstance(df, pd.DataFrame)
    columns_to_report = ["pe", "eps", "annual_dividend_yield", "volume", \
                    "last_price", "change_in_percent_cumulative", \
                    "change_price", "market_cap", "number_of_shares"]
    colnames = df.columns
    for column in columns_to_report:
        assert column in colnames
   
    df["volume"] = df["last_price"] * df["volume"] / 1000000  # again, express as $(M)
    df["market_cap"] /= 1000 * 1000
    df["number_of_shares"] /= 1000 * 1000
    df["fetch_date"] = df.index
    plot_df = pd.melt(
        df,
        id_vars="fetch_date",
        value_vars=columns_to_report,
        var_name="indicator",
        value_name="value",
    )
    plot_df["value"] = pd.to_numeric(plot_df["value"])
    plot_df["fetch_date"] = pd.to_datetime(plot_df["fetch_date"])

    plot = (
        p9.ggplot(plot_df, p9.aes("fetch_date", "value", color="indicator"))
        + p9.geom_line(size=1.5, show_legend=False)
        + p9.facet_wrap("~ indicator", nrow=len(columns_to_report), ncol=1, scales="free_y")
        + p9.theme(axis_text_x=p9.element_text(angle=30, size=7), 
                   axis_text_y=p9.element_text(size=7),
                   figure_size=(8, len(columns_to_report)))
        #    + p9.aes(ymin=0)
        + p9.xlab("")
        + p9.ylab("")
    )
    return plot_as_inline_html_data(plot)

예제 #30

0

파일 보기

파일: simulate_mtg_rounds.py 프로젝트: brschneedecker/mtg-rules-change-medium

def go_to_time_plot2(go_to_time_probs_new: list, go_to_time_probs_old: list,
                     go_to_time_blowout_probs_new: list,
                     go_to_time_blowout_probs_old: list,
                     average_minutes_per_game_values: list):
    """ Plot go-to-time probability, new vs. old rules, blowouts vs. no blowouts, 85 matches/round """

    time_prob_blowout_data = pd.DataFrame({
        'Average minutes per game':
        np.concatenate([
            average_minutes_per_game_values, average_minutes_per_game_values,
            average_minutes_per_game_values, average_minutes_per_game_values
        ]),
        'P(Go to time)':
        np.concatenate([
            go_to_time_probs_new, go_to_time_probs_old,
            go_to_time_blowout_probs_new, go_to_time_blowout_probs_old
        ]),
        'Rules':
        np.concatenate([
            np.repeat('New, no blowouts',
                      len(average_minutes_per_game_values)),
            np.repeat('Old, no blowouts',
                      len(average_minutes_per_game_values)),
            np.repeat('New, blowouts', len(average_minutes_per_game_values)),
            np.repeat('Old, blowouts', len(average_minutes_per_game_values))
        ])
    })

    (plt.ggplot(
        time_prob_blowout_data,
        plt.aes(x='Average minutes per game', y='P(Go to time)',
                color='Rules')) + plt.geom_line() + plt.geom_point() +
     plt.ylim([0, 1]) + plt.theme_classic()).save(
         filename='figures/go_to_time_prob_with_blowouts_plot.png')

예제 #31

0

파일 보기

파일: test_geom_path_line_step.py 프로젝트: jwhendy/plotnine

def test_line():
    df2 = df.copy()

    # geom_path plots in given order. geom_line &
    # geom_step sort by x before plotting
    df2['x'] = df['x'].values[::-1]

    p = (ggplot(df2, aes('x')) +
         geom_path(aes(y='y'), size=4) +
         geom_line(aes(y='y+2'), color='blue', size=4) +
         geom_step(aes(y='y+4'), color='red', size=4))

    assert p == 'path_line_step'

예제 #32

0

파일 보기

파일: jmlr.py 프로젝트: Pinafore/qb

def yoy_growth():
    """
    This creates figures showing the number of questions versus year in dataset
    """
    with open('data/external/datasets/qanta.mapped.2018.04.18.json') as f:
        year_pages = defaultdict(set)
        year_questions = Counter()
        for q in json.load(f)['questions']:
            if q['page'] is not None:
                year_pages[q['year']].add(q['page'])
                year_questions[q['year']] += 1
    start_year = min(year_pages)
    # 2017 is the earlier year we have a full year's worth of data, including partial 2018 isn't accurate
    end_year = min(2017, max(year_pages))
    upto_year_pages = defaultdict(set)
    upto_year_questions = Counter()
    for upto_y in range(start_year, end_year + 1):
        for curr_y in range(start_year, upto_y + 1):
            upto_year_questions[upto_y] += year_questions[curr_y]
            for page in year_pages[curr_y]:
                upto_year_pages[upto_y].add(page)
    year_page_counts = {}
    for y, pages in upto_year_pages.items():
        year_page_counts[y] = len(pages)
    year_page_counts
    year_rows = []
    for y, page_count in year_page_counts.items():
        year_rows.append({'year': y, 'value': page_count, 'Quantity': 'Distinct Answers'})
        year_rows.append({'year': y, 'Quantity': 'Total Questions', 'value': upto_year_questions[y]})
    year_df = pd.DataFrame(year_rows)
    count_cat = CategoricalDtype(categories=['Total Questions', 'Distinct Answers'], ordered=True)
    year_df['Quantity'] = year_df['Quantity'].astype(count_cat)
    eprint(year_df[year_df.Quantity == 'Total Questions'])
    p = (
        ggplot(year_df)
        + aes(x='year', y='value', color='Quantity')
        + geom_line() + geom_point()
        + xlab('Year')
        + ylab('Count up to Year (inclusive)')
        + theme_fs()
        + scale_x_continuous(breaks=list(range(start_year, end_year + 1, 2)))
    )
    p.save(path.join(output_path, 'question_answer_counts.pdf'))

예제 #33

0

파일 보기

파일: LogisticReal.py 프로젝트: denniscwylie/maclearn

def accPlot(accsByNFeats):
    plotdata = []
    for s in accsByNFeats:
        plotdata.append(pd.concat([DataFrame({"p" : p,
                                              "acc" : accsByNFeats[s][p],
                                              "set" : s},
                                             index = [str(p)])
                                   for p in accsByNFeats[s]],
                                  axis = 0))
    ggd = pd.concat(plotdata)
    ggd['acc'] = ggd['acc'].astype(float)
    ggo = gg.ggplot(ggd, gg.aes(x='p', y='acc', color='set'))
    ggo += gg.geom_line(alpha=0.5)
    ggo += gg.geom_point()
    ggo += gg.theme_bw()
    ggo += gg.scale_x_log10(breaks=[10, 100, 1000, 10000])
    ggo += gg.scale_color_manual(values=['darkgray', 'black',
                                         'red', 'dodgerblue'])
    ggo += gg.ylab('Accuracy (5-fold CV)')
    print(ggo)

예제 #34

0

파일 보기

파일: analyze_nir_intensity.py 프로젝트: danforthcenter/plantcv

def analyze_nir_intensity(gray_img, mask, bins=256, histplot=False):
    """This function calculates the intensity of each pixel associated with the plant and writes the values out to
       a file. It can also print out a histogram plot of pixel intensity and a pseudocolor image of the plant.

    Inputs:
    gray_img     = 8- or 16-bit grayscale image data
    mask         = Binary mask made from selected contours
    bins         = number of classes to divide spectrum into
    histplot     = if True plots histogram of intensity values

    Returns:
    analysis_images = NIR histogram image

    :param gray_img: numpy array
    :param mask: numpy array
    :param bins: int
    :param histplot: bool
    :return analysis_images: list
    """

    params.device += 1

    # apply plant shaped mask to image
    mask1 = binary_threshold(mask, 0, 255, 'light')
    mask1 = (mask1 / 255)
    masked = np.multiply(gray_img, mask1)

    # calculate histogram
    if gray_img.dtype == 'uint16':
        maxval = 65536
    else:
        maxval = 256

    # Make a pseudo-RGB image
    rgbimg = cv2.cvtColor(gray_img, cv2.COLOR_GRAY2BGR)

    hist_nir, hist_bins = np.histogram(masked, bins, (1, maxval))

    hist_bins1 = hist_bins[:-1]
    hist_bins2 = [float(round(l, 2)) for l in hist_bins1]

    hist_nir1 = [float(l) for l in hist_nir]

    # make hist percentage for plotting
    pixels = cv2.countNonZero(mask1)
    hist_percent = (hist_nir / float(pixels)) * 100

    # No longer returning a pseudocolored image
    # make mask to select the background
    # mask_inv = cv2.bitwise_not(mask)
    # img_back = cv2.bitwise_and(rgbimg, rgbimg, mask=mask_inv)
    # img_back1 = cv2.applyColorMap(img_back, colormap=1)

    # mask the background and color the plant with color scheme 'jet'
    # cplant = cv2.applyColorMap(rgbimg, colormap=2)
    # masked1 = apply_mask(cplant, mask, 'black')
    masked1 = cv2.bitwise_and(rgbimg, rgbimg, mask=mask)
    # cplant_back = cv2.add(masked1, img_back1)
    if params.debug is not None:
        if params.debug == "print":
            print_image(masked1, os.path.join(params.debug_outdir, str(params.device) + "_masked_nir_plant.jpg"))
        if params.debug == "plot":
            plot_image(masked1)

    analysis_images = []

    if histplot is True:
        hist_x = hist_percent
        bin_labels = np.arange(0, bins)
        dataset = pd.DataFrame({'Grayscale pixel intensity': bin_labels,
                                'Proportion of pixels (%)': hist_x})
        fig_hist = (ggplot(data=dataset,
                           mapping=aes(x='Grayscale pixel intensity',
                                       y='Proportion of pixels (%)'))
                    + geom_line(color='red')
                    + scale_x_continuous(breaks=list(range(0, bins, 25))))

        analysis_images.append(fig_hist)
        if params.debug == "print":
            fig_hist.save(os.path.join(params.debug_outdir, str(params.device) + '_nir_hist.png'))
        elif params.debug == "plot":
            print(fig_hist)

    outputs.add_observation(variable='nir_frequencies', trait='near-infrared frequencies',
                            method='plantcv.plantcv.analyze_nir_intensity', scale='frequency', datatype=list,
                            value=hist_nir1, label=hist_bins2)

    # Store images
    outputs.images.append(analysis_images)

    return analysis_images

예제 #35

0

파일 보기

파일: analyze_color.py 프로젝트: danforthcenter/plantcv

def analyze_color(rgb_img, mask, hist_plot_type=None):
    """Analyze the color properties of an image object
    Inputs:
    rgb_img          = RGB image data
    mask             = Binary mask made from selected contours
    hist_plot_type   = 'None', 'all', 'rgb','lab' or 'hsv'
    
    Returns:
    analysis_image   = histogram output
    
    :param rgb_img: numpy.ndarray
    :param mask: numpy.ndarray
    :param hist_plot_type: str
    :return analysis_images: list
    """

    params.device += 1

    if len(np.shape(rgb_img)) < 3:
        fatal_error("rgb_img must be an RGB image")

    # Mask the input image
    masked = cv2.bitwise_and(rgb_img, rgb_img, mask=mask)
    # Extract the blue, green, and red channels
    b, g, r = cv2.split(masked)
    # Convert the BGR image to LAB
    lab = cv2.cvtColor(masked, cv2.COLOR_BGR2LAB)
    # Extract the lightness, green-magenta, and blue-yellow channels
    l, m, y = cv2.split(lab)
    # Convert the BGR image to HSV
    hsv = cv2.cvtColor(masked, cv2.COLOR_BGR2HSV)
    # Extract the hue, saturation, and value channels
    h, s, v = cv2.split(hsv)

    # Color channel dictionary
    channels = {"b": b, "g": g, "r": r, "l": l, "m": m, "y": y, "h": h, "s": s, "v": v}

    # Histogram plot types
    hist_types = {"ALL": ("b", "g", "r", "l", "m", "y", "h", "s", "v"),
                  "RGB": ("b", "g", "r"),
                  "LAB": ("l", "m", "y"),
                  "HSV": ("h", "s", "v")}

    if hist_plot_type is not None and hist_plot_type.upper() not in hist_types:
        fatal_error("The histogram plot type was " + str(hist_plot_type) +
                    ', but can only be one of the following: None, "all", "rgb", "lab", or "hsv"!')
    # Store histograms, plotting colors, and plotting labels
    histograms = {
        "b": {"label": "blue", "graph_color": "blue",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["b"]], [0], mask, [256], [0, 255])]},
        "g": {"label": "green", "graph_color": "forestgreen",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["g"]], [0], mask, [256], [0, 255])]},
        "r": {"label": "red", "graph_color": "red",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["r"]], [0], mask, [256], [0, 255])]},
        "l": {"label": "lightness", "graph_color": "dimgray",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["l"]], [0], mask, [256], [0, 255])]},
        "m": {"label": "green-magenta", "graph_color": "magenta",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["m"]], [0], mask, [256], [0, 255])]},
        "y": {"label": "blue-yellow", "graph_color": "yellow",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["y"]], [0], mask, [256], [0, 255])]},
        "h": {"label": "hue", "graph_color": "blueviolet",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["h"]], [0], mask, [256], [0, 255])]},
        "s": {"label": "saturation", "graph_color": "cyan",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["s"]], [0], mask, [256], [0, 255])]},
        "v": {"label": "value", "graph_color": "orange",
              "hist": [float(l[0]) for l in cv2.calcHist([channels["v"]], [0], mask, [256], [0, 255])]}
    }

    # Create list of bin labels for 8-bit data
    binval = np.arange(0, 256)
    bin_values = [l for l in binval]

    analysis_images = []
    # Create a dataframe of bin labels and histogram data
    dataset = pd.DataFrame({'bins': binval, 'blue': histograms["b"]["hist"],
                            'green': histograms["g"]["hist"], 'red': histograms["r"]["hist"],
                            'lightness': histograms["l"]["hist"], 'green-magenta': histograms["m"]["hist"],
                            'blue-yellow': histograms["y"]["hist"], 'hue': histograms["h"]["hist"],
                            'saturation': histograms["s"]["hist"], 'value': histograms["v"]["hist"]})

    # Make the histogram figure using plotnine
    if hist_plot_type is not None:
        if hist_plot_type.upper() == 'RGB':
            df_rgb = pd.melt(dataset, id_vars=['bins'], value_vars=['blue', 'green', 'red'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_rgb, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(['blue', 'green', 'red'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'LAB':
            df_lab = pd.melt(dataset, id_vars=['bins'],
                             value_vars=['lightness', 'green-magenta', 'blue-yellow'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_lab, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(['yellow', 'magenta', 'dimgray'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'HSV':
            df_hsv = pd.melt(dataset, id_vars=['bins'],
                             value_vars=['hue', 'saturation', 'value'],
                             var_name='Color Channel', value_name='Pixels')
            hist_fig = (ggplot(df_hsv, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(['blueviolet', 'cyan', 'orange'])
                        )
            analysis_images.append(hist_fig)

        elif hist_plot_type.upper() == 'ALL':
            s = pd.Series(['blue', 'green', 'red', 'lightness', 'green-magenta',
                           'blue-yellow', 'hue', 'saturation', 'value'], dtype="category")
            color_channels = ['blue', 'yellow', 'green', 'magenta', 'blueviolet',
                              'dimgray', 'red', 'cyan', 'orange']
            df_all = pd.melt(dataset, id_vars=['bins'], value_vars=s, var_name='Color Channel',
                             value_name='Pixels')
            hist_fig = (ggplot(df_all, aes(x='bins', y='Pixels', color='Color Channel'))
                        + geom_line()
                        + scale_x_continuous(breaks=list(range(0, 256, 25)))
                        + scale_color_manual(color_channels)
                        )
            analysis_images.append(hist_fig)

    # Hue values of zero are red but are also the value for pixels where hue is undefined
    # The hue value of a pixel will be undefined when the color values are saturated
    # Therefore, hue values of zero are excluded from the calculations below

    # Calculate the median hue value
    # The median is rescaled from the encoded 0-179 range to the 0-359 degree range
    hue_median = np.median(h[np.where(h > 0)]) * 2

    # Calculate the circular mean and standard deviation of the encoded hue values
    # The mean and standard-deviation are rescaled from the encoded 0-179 range to the 0-359 degree range
    hue_circular_mean = stats.circmean(h[np.where(h > 0)], high=179, low=0) * 2
    hue_circular_std = stats.circstd(h[np.where(h > 0)], high=179, low=0) * 2

    # Store into lists instead for pipeline and print_results
    # stats_dict = {'mean': circular_mean, 'std' : circular_std, 'median': median}

    # Plot or print the histogram
    if hist_plot_type is not None:
        if params.debug == 'print':
            hist_fig.save(os.path.join(params.debug_outdir, str(params.device) + '_analyze_color_hist.png'))
        elif params.debug == 'plot':
            print(hist_fig)

    # Store into global measurements
    # RGB signal values are in an unsigned 8-bit scale of 0-255
    rgb_values = [i for i in range(0, 256)]
    # Hue values are in a 0-359 degree scale, every 2 degrees at the midpoint of the interval
    hue_values = [i * 2 + 1 for i in range(0, 180)]
    # Percentage values on a 0-100 scale (lightness, saturation, and value)
    percent_values = [round((i / 255) * 100, 2) for i in range(0, 256)]
    # Diverging values on a -128 to 127 scale (green-magenta and blue-yellow)
    diverging_values = [i for i in range(-128, 128)]
    # outputs.measurements['color_data'] = {
    #     'histograms': {
    #         'blue': {'signal_values': rgb_values, 'frequency': histograms["b"]["hist"]},
    #         'green': {'signal_values': rgb_values, 'frequency': histograms["g"]["hist"]},
    #         'red': {'signal_values': rgb_values, 'frequency': histograms["r"]["hist"]},
    #         'lightness': {'signal_values': percent_values, 'frequency': histograms["l"]["hist"]},
    #         'green-magenta': {'signal_values': diverging_values, 'frequency': histograms["m"]["hist"]},
    #         'blue-yellow': {'signal_values': diverging_values, 'frequency': histograms["y"]["hist"]},
    #         'hue': {'signal_values': hue_values, 'frequency': histograms["h"]["hist"]},
    #         'saturation': {'signal_values': percent_values, 'frequency': histograms["s"]["hist"]},
    #         'value': {'signal_values': percent_values, 'frequency': histograms["v"]["hist"]}
    #     },
    #     'color_features': {
    #         'hue_circular_mean': hue_circular_mean,
    #         'hue_circular_std': hue_circular_std,
    #         'hue_median': hue_median
    #     }
    # }
    outputs.add_observation(variable='blue_frequencies', trait='blue frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["b"]["hist"], label=rgb_values)
    outputs.add_observation(variable='green_frequencies', trait='green frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["g"]["hist"], label=rgb_values)
    outputs.add_observation(variable='red_frequencies', trait='red frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["r"]["hist"], label=rgb_values)
    outputs.add_observation(variable='lightness_frequencies', trait='lightness frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["l"]["hist"], label=percent_values)
    outputs.add_observation(variable='green-magenta_frequencies', trait='green-magenta frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["m"]["hist"], label=diverging_values)
    outputs.add_observation(variable='blue-yellow_frequencies', trait='blue-yellow frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["y"]["hist"], label=diverging_values)
    outputs.add_observation(variable='hue_frequencies', trait='hue frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["h"]["hist"], label=hue_values)
    outputs.add_observation(variable='saturation_frequencies', trait='saturation frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["s"]["hist"], label=percent_values)
    outputs.add_observation(variable='value_frequencies', trait='value frequencies',
                            method='plantcv.plantcv.analyze_color', scale='frequency', datatype=list,
                            value=histograms["v"]["hist"], label=percent_values)
    outputs.add_observation(variable='hue_circular_mean', trait='hue circular mean',
                            method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float,
                            value=hue_circular_mean, label='degrees')
    outputs.add_observation(variable='hue_circular_std', trait='hue circular standard deviation',
                            method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float,
                            value=hue_median, label='degrees')
    outputs.add_observation(variable='hue_median', trait='hue median',
                            method='plantcv.plantcv.analyze_color', scale='degrees', datatype=float,
                            value=hue_median, label='degrees')

    # Store images
    outputs.images.append(analysis_images)

    return analysis_images

예제 #36

0

파일 보기

파일: logo.py 프로젝트: jwhendy/plotnine

gradient = (
    (0.99, 0.88, 0.87),
    (0.98, 0.62, 0.71),
    (0.86, 0.20, 0.59),
    bcolor, bcolor,
    bcolor_darker, bcolor_darker)

df1 = df[:n//3:9]
df2 = df[n//3:2*n//3]
df3 = df[2*n//3::12]

p = (ggplot(aes('x', 'y', color='y', fill='y'))
     + annotate(geom='label', x=0.295, y=0.495, label='pl  tnine',
                label_size=1.5, label_padding=.1, size=24,
                fill=bcolor_lighter, color=bcolor)
     + geom_point(df1, size=8, stroke=0, show_legend=False)
     + geom_line(df2, size=2, color=bcolor_darker, show_legend=False)
     + geom_bar(df3, aes('x+.06'), stat='identity', size=0, show_legend=False)

     + scale_color_gradientn(colors=gradient)
     + scale_fill_gradientn(colors=gradient)
     + theme_void()
     + theme(figure_size=(3.6, 3.6)))

p.save('logo.pdf', pad_inches=-0.04)

# Remove the project name
p.layers = p.layers.__class__(p.layers[1:])
p.save('logo-small.pdf', pad_inches=-0.04)

예제 #37

0

파일 보기

파일: fluor_fvfm.py 프로젝트: danforthcenter/plantcv

def fluor_fvfm(fdark, fmin, fmax, mask, bins=256):
    """Analyze PSII camera images.
    Inputs:
    fdark       = grayscale fdark image
    fmin        = grayscale fmin image
    fmax        = grayscale fmax image
    mask        = mask of plant (binary, single channel)
    bins        = number of bins (1 to 256 for 8-bit; 1 to 65,536 for 16-bit; default is 256)
    Returns:
    analysis_images = list of images (fv image and fvfm histogram image)
    :param fdark: numpy.ndarray
    :param fmin: numpy.ndarray
    :param fmax: numpy.ndarray
    :param mask: numpy.ndarray
    :param bins: int
    :return analysis_images: numpy.ndarray
    """

    # Auto-increment the device counter
    params.device += 1
    # Check that fdark, fmin, and fmax are grayscale (single channel)
    if not all(len(np.shape(i)) == 2 for i in [fdark, fmin, fmax]):
        fatal_error("The fdark, fmin, and fmax images must be grayscale images.")
    # # Check that fdark, fmin, and fmax are the same bit
    # if  not (all(i.dtype == "uint16" for i in [fdark, fmin, fmax]) or
    #         (all(i.dtype == "uint8" for i in [fdark, fmin, fmax]))):
    #     fatal_error("The fdark, fmin, and fmax images must all be the same bit depth.")
    # Check that fdark, fmin, and fmax are 16-bit images
    # if not all(i.dtype == "uint16" for i in [fdark, fmin, fmax]):
    #     fatal_error("The fdark, fmin, and fmax images must be 16-bit images.")

    # QC Fdark Image
    fdark_mask = cv2.bitwise_and(fdark, fdark, mask=mask)
    if np.amax(fdark_mask) > 2000:
        qc_fdark = False
    else:
        qc_fdark = True

    # Mask Fmin and Fmax Image
    fmin_mask = cv2.bitwise_and(fmin, fmin, mask=mask)
    fmax_mask = cv2.bitwise_and(fmax, fmax, mask=mask)

    # Calculate Fvariable, where Fv = Fmax - Fmin (masked)
    fv = np.subtract(fmax_mask, fmin_mask)

    # When Fmin is greater than Fmax, a negative value is returned.
    # Because the data type is unsigned integers, negative values roll over, resulting in nonsensical values
    # Wherever Fmin is greater than Fmax, set Fv to zero
    fv[np.where(fmax_mask < fmin_mask)] = 0
    analysis_images = []
    analysis_images.append(fv)

    # Calculate Fv/Fm (Fvariable / Fmax) where Fmax is greater than zero
    # By definition above, wherever Fmax is zero, Fvariable will also be zero
    # To calculate the divisions properly we need to change from unit16 to float64 data types
    fvfm = fv.astype(np.float64)
    fmax_flt = fmax_mask.astype(np.float64)
    fvfm[np.where(fmax_mask > 0)] /= fmax_flt[np.where(fmax_mask > 0)]

    # Calculate the median Fv/Fm value for non-zero pixels
    fvfm_median = np.median(fvfm[np.where(fvfm > 0)])

    # Calculate the histogram of Fv/Fm non-zero values
    fvfm_hist, fvfm_bins = np.histogram(fvfm[np.where(fvfm > 0)], bins, range=(0, 1))
    # fvfm_bins is a bins + 1 length list of bin endpoints, so we need to calculate bin midpoints so that
    # the we have a one-to-one list of x (FvFm) and y (frequency) values.
    # To do this we add half the bin width to each lower bin edge x-value
    midpoints = fvfm_bins[:-1] + 0.5 * np.diff(fvfm_bins)

    # Calculate which non-zero bin has the maximum Fv/Fm value
    max_bin = midpoints[np.argmax(fvfm_hist)]

    # Print F-variable image
    # print_image(fv, (os.path.splitext(filename)[0] + '_fv_img.png'))
    # analysis_images.append(['IMAGE', 'fv', os.path.splitext(filename)[0] + '_fv_img.png'])

    # Create Histogram Plot, if you change the bin number you might need to change binx so that it prints
    # an appropriate number of labels
    # Create a dataframe
    dataset = pd.DataFrame({'Plant Pixels': fvfm_hist, 'Fv/Fm': midpoints})
    # Make the histogram figure using plotnine
    fvfm_hist_fig = (ggplot(data=dataset, mapping=aes(x='Fv/Fm', y='Plant Pixels'))
                     + geom_line(color='green', show_legend=True)
                     + geom_label(label='Peak Bin Value: ' + str(max_bin),
                                  x=.15, y=205, size=8, color='green'))
    analysis_images.append(fvfm_hist_fig)

    # Changed histogram method over from matplotlib pyplot to plotnine
    # binx = int(bins / 50)
    # plt.plot(midpoints, fvfm_hist, color='green', label='Fv/Fm')
    # plt.xticks(list(midpoints[0::binx]), rotation='vertical', size='xx-small')
    # plt.legend()
    # ax = plt.subplot(111)
    # ax.set_ylabel('Plant Pixels')
    # ax.text(0.05, 0.95, ('Peak Bin Value: ' + str(max_bin)), transform=ax.transAxes, verticalalignment='top')
    # plt.grid()
    # plt.title('Fv/Fm of ' + os.path.splitext(filename)[0])
    # fig_name = (os.path.splitext(filename)[0] + '_fvfm_hist.svg')
    # plt.savefig(fig_name)
    # plt.clf()
    # analysis_images.append(['IMAGE', 'fvfm_hist', fig_name])

    # No longer pseudocolor the image, instead can be pseudocolored by pcv.pseudocolor
    # # Pseudocolored Fv/Fm image
    # plt.imshow(fvfm, vmin=0, vmax=1, cmap="viridis")
    # plt.colorbar()
    # # fvfm_8bit = fvfm * 255
    # # fvfm_8bit = fvfm_8bit.astype(np.uint8)
    # # plt.imshow(fvfm_8bit, vmin=0, vmax=1, cmap=cm.jet_r)
    # # plt.subplot(111)
    # # mask_inv = cv2.bitwise_not(mask)
    # # background = np.dstack((mask, mask, mask, mask_inv))
    # # my_cmap = plt.get_cmap('binary_r')
    # # plt.imshow(background, cmap=my_cmap)
    # plt.axis('off')
    # fig_name = (os.path.splitext(filename)[0] + '_pseudo_fvfm.png')
    # plt.savefig(fig_name, dpi=600, bbox_inches='tight')
    # plt.clf()
    # analysis_images.append(['IMAGE', 'fvfm_pseudo', fig_name])

    # path = os.path.dirname(filename)
    # fig_name = 'FvFm_pseudocolor_colorbar.svg'
    # if not os.path.isfile(os.path.join(path, fig_name)):
    #     plot_colorbar(path, fig_name, 2)

    if params.debug == 'print':
        print_image(fmin_mask, os.path.join(params.debug_outdir, str(params.device) + '_fmin_mask.png'))
        print_image(fmax_mask, os.path.join(params.debug_outdir, str(params.device) + '_fmax_mask.png'))
        print_image(fv, os.path.join(params.debug_outdir, str(params.device) + '_fv_convert.png'))
        fvfm_hist_fig.save(os.path.join(params.debug_outdir, str(params.device) + '_fv_hist.png'))
    elif params.debug == 'plot':
        plot_image(fmin_mask, cmap='gray')
        plot_image(fmax_mask, cmap='gray')
        plot_image(fv, cmap='gray')
        print(fvfm_hist_fig)

    outputs.add_observation(variable='fvfm_hist', trait='Fv/Fm frequencies',
                            method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=list,
                            value=fvfm_hist.tolist(), label=np.around(midpoints, decimals=len(str(bins))).tolist())
    outputs.add_observation(variable='fvfm_hist_peak', trait='peak Fv/Fm value',
                            method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=float,
                            value=float(max_bin), label='none')
    outputs.add_observation(variable='fvfm_median', trait='Fv/Fm median',
                            method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=float,
                            value=float(np.around(fvfm_median, decimals=4)), label='none')
    outputs.add_observation(variable='fdark_passed_qc', trait='Fdark passed QC',
                            method='plantcv.plantcv.fluor_fvfm', scale='none', datatype=bool,
                            value=qc_fdark, label='none')

    # Store images
    outputs.images.append(analysis_images)

    return analysis_images

예제 #38

0

파일 보기

파일: test_geom_path_line_step.py 프로젝트: jwhendy/plotnine

def test_no_missing_values():
    p = (ggplot(df_missing, aes(x='x'))
         + geom_line(aes(y='y2'), size=2))

    assert p == 'no_missing_values'

예제 #39

0

파일 보기

파일: test_geom_path_line_step.py 프로젝트: jwhendy/plotnine

def test_missing_values():
    p = (ggplot(df_missing, aes(x='x'))
         + geom_line(aes(y='y1'), size=2))

    with pytest.warns(UserWarning):
        assert p == 'missing_values'

예제 #40

0

파일 보기

파일: figures.py 프로젝트: Pinafore/qb

    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )

예제 #41

0

파일 보기

파일: KnnReal.py 프로젝트: denniscwylie/maclearn

accsByNFeats = OrderedDict([(s, OrderedDict([(n, fitKnnWithNFeat(n, s))
                                             for n in nFeatures]))
                            for s in xnorms])

plotData = pd.concat([DataFrame({"set" : s,
                                 "p" : p,
                                 "acc" : accsByNFeats[s][p]},
                                index = [s + "_" + str(p)])
                      for s in accsByNFeats
                      for p in accsByNFeats[s]],
                     axis = 0)
plotData['acc'] = plotData['acc'].astype(float)

plt.close()
ggo = gg.ggplot(plotData, gg.aes(x='p', y='acc', color='set'))
ggo += gg.geom_line()
ggo += gg.scale_x_log10()
ggo += gg.theme_bw()
print(ggo)

# plotData.to_csv("KnnRealAccuracyByNFeat.tsv",
#                 sep = "\t",
#                 index = False,
#                 header = True)


## -----------------------------------------------------------------
## use PCA feature extraction
## -----------------------------------------------------------------
feKnnFitter = pipeline.Pipeline([
    ('featextr', pcaextractor.PcaExtractor(k=3)),

예제 #42

0

파일 보기

파일: 2.mutation-classifier.py 프로젝트: cognoma/machine-learning

            'feature_set': [model],
            'auc': metrics['auroc'].round(3)
        }))
        roc_df = metrics['roc_df']
        roc_output = roc_output.append(pd.DataFrame({
            'false_positive_rate': roc_df.fpr,
            'true_positive_rate': roc_df.tpr,
            'partition': partition,
            'feature_set': model
        }))

(gg.ggplot(roc_output, gg.aes(x='false_positive_rate',
                              y='true_positive_rate',
                              color='feature_set',
                              linetype='partition'))
 + gg.geom_line(size=1.1, alpha=0.7)
 + gg.labs(x='false positive rate', y='true positive rate')
 + theme_cognoma()
)


# ### AUROC

# In[20]:

pd.pivot_table(auc_output,
               values='auc',
               index='feature_set',
               columns='partition')

예제 #43

0

파일 보기

파일: jmlr.py 프로젝트: Pinafore/qb

def syntactic_diversity_plots():
    with open('data/external/syntactic_diversity_table.json') as f:
        rows = json.load(f)
    parse_df = pd.DataFrame(rows)
    parse_df['parse_ratio'] = parse_df['unique_parses'] / parse_df['parses']
    melt_df = pd.melt(
        parse_df,
        id_vars=['dataset', 'depth', 'overlap', 'parses'],
        value_vars=['parse_ratio', 'unique_parses'],
        var_name='metric',
        value_name='y'
    )

    def label_facet(name):
        if name == 'parse_ratio':
            return 'Average Unique Parses per Instance'
        elif name == 'unique_parses':
            return 'Count of Unique Parses'

    def label_y(ys):
        formatted_ys = []
        for y in ys:
            y = str(y)
            if y.endswith('000.0'):
                formatted_ys.append(y[:-5] + 'K')
            else:
                formatted_ys.append(y)
        return formatted_ys
    p = (
    ggplot(melt_df)
        + aes(x='depth', y='y', color='dataset')
        + facet_wrap('metric', scales='free_y', nrow=2, labeller=label_facet)
        + geom_line() + geom_point()
        + xlab('Parse Truncation Depth') + ylab('')
        + scale_color_discrete(name='Dataset')
        + scale_y_continuous(labels=label_y)
        + scale_x_continuous(
            breaks=list(range(1, 11)),
            minor_breaks=list(range(1, 11)),
            limits=[1, 10])
        + theme_fs()
    )
    p.save(path.join(output_path, 'syn_div_plot.pdf'))
    p = (
    ggplot(parse_df)
        + aes(x='depth', y='unique_parses', color='dataset')
        + geom_line() + geom_point()
        + xlab('Parse Truncation Depth')
        + ylab('Count of Unique Parses')
        + scale_color_discrete(name='Dataset')
        + scale_x_continuous(
            breaks=list(range(1, 11)),
            minor_breaks=list(range(1, 11)),
            limits=[1, 10])
        + theme_fs()
    )
    p.save(path.join(output_path, 'n_unique_parses.pdf'))
    p = (
        ggplot(parse_df)
        + aes(x='depth', y='parse_ratio', color='dataset')
        + geom_line() + geom_point()
        + xlab('Parse Truncation Depth')
        + ylab('Average Unique Parses per Instance')
        + scale_color_discrete(name='Dataset')
        + scale_x_continuous(breaks=list(range(1, 11)), minor_breaks=list(range(1, 11)), limits=[1, 10])
        + scale_y_continuous(limits=[0, 1])
        + theme_fs()
    )
    p.save(path.join(output_path, 'parse_ratio.pdf'))

예제 #44

0

파일 보기

파일: plots.py 프로젝트: ChickenProp/chickenprop.github.com

def main():
    mpl.rc('mathtext', fontset='cm')

    warnings.filterwarnings('ignore',
                            r'(geom|position)_\w+ ?: Removed \d+ rows')
    warnings.filterwarnings('ignore', r'Saving .+ x .+ in image')
    warnings.filterwarnings('ignore', r'Filename: .+\.png')

    df = concat_map(Pf_Ob_Ol, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_f')
              + titles('P_f(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pf_Ob_Ol')

    df = concat_map(Pf_Ob_σ, 'P_f', np.linspace(0.1, 1, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_f')
              + titles('P_f(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pf_Ob_σ')

    df = concat_map(Pq_Ob_Ol, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'O_l', 'P_q')
              + titles('P_q(O_b, O_l)')
              + limits((1, 10))
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              + gg.geom_line()
              , 'Pq_Ob_Ol')

    df = concat_map(Pq_Ob_σ, 'P_q', np.linspace(-0.9, 0, 10))
    save_both(my_plot(df, 'O_b', 'σ', 'P_q')
              + titles('P_q(O_b, σ)')
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Pq_Ob_σ')

    df = concat_map(Opr_Ob_Ol, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'O_l', 'Opr')
              + titles("O'(O_b, O_l)")
              + limits((1, 10), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'Opr_Ob_Ol')

    df = concat_map(Opr_Ob_σ, 'Opr', np.linspace(1, 5, 9))
    save_both(my_plot(df, 'O_b', 'σ', 'Opr')
              + titles("O'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'Opr_Ob_σ')

    df = (pd.DataFrame({'Opr': np.linspace(1, 21, 101)})
            .assign(Pf=lambda x: Opr_Pf(x.Opr)))
    save_both(my_plot(df, 'Opr', 'Pf')
              + titles("P_f(O')")
              + labs("O'", 'P_f')
              + limits((1, 20), (0, 1),
                       xbreaks=np.linspace(2, 20, 10),
                       ybreaks=np.linspace(0, 1, 11))
              + gg.geom_line()
              + gg.geom_hline(yintercept=C, linetype='dashed', color='grey')
              , 'Pf_Opr')

    df = concat_map(σpr_Ob_σ, 'σpr', np.linspace(0, 5, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'σpr')
              + titles("σ'(O_b, σ)")
              + limits((1, 10), (0, 5))
              + gg.geom_line()
              , 'σpr_Ob_σ')

    df = (pd.DataFrame({'σpr': np.linspace(0, 21, 106)})
            .assign(Pq=lambda x: σpr_Pq(x.σpr)))
    save_both(my_plot(df, 'σpr', 'Pq')
              + titles("P_q(σ')")
              + labs("σ'", 'P_q')
              + limits((0, 20), (-1, 0),
                       xbreaks=np.linspace(0, 20, 11),
                       ybreaks=np.linspace(-1, 0, 11))
              + gg.geom_line()
              , 'Pq_σpr')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_free')

    df = concat_map(liab_Ob_Ol_free, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Free bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_free')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'O_l', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, O_l)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              + gg.geom_abline(slope=1, intercept=0,
                               linetype='dashed', color='grey')
              , 'liab_Ob_Ol_qual')

    df = concat_map(liab_Ob_Ol_qual, 'liab', np.linspace(0, 10, 11))
    save_both(my_plot(df, 'O_b', 'σ', 'liab', clab='-R_{bl}')
              + titles("-R_{bl}(O_b, σ)", "S_b = 1, C_b = 0, C_l = 0.02",
                       mathrm('Qualifying bet', dollars=False))
              + limits((1,20), (1, 10))
              + gg.geom_line()
              , 'liab_Ob_σ_qual')

    df_Pf = Pf_Ob_σ(0.6).assign(profit=dollars('P_f'))
    df_Pq = Pq_Ob_σ(-0.3).assign(profit=dollars('P_q'))
    df = pd.concat((df_Pf, df_Pq), ignore_index=True)
    df.drop_duplicates('O_b', inplace=True)

    Opr = df_Pf.query('σ==0').O_b[0]
    σpr = df_Pq.query('O_b==1').σ[0]

    labels = pd.DataFrame({
        'x': [Opr+0.1, 1, 9.8], 'y': [4.8, σpr, σpr + 0.3],
        'label': ["$O'$", "$σ'$", mathrm('More profit')]
    })
    lab_aes = gg.aes('x', 'y', label='label')

    save_both(
        gg.ggplot(df, gg.aes(x='O_b', y='σ'))
        + gg.geom_area(gg.aes(fill='profit'), alpha=0.3)
        + gg.geom_vline(xintercept=Opr, linetype='dashed')
        + gg.geom_hline(yintercept=σpr, linetype='dashed')

        # text alignment can't be specified in an aes
        + gg.geom_text(lab_aes, data=labels.ix[:0], ha='left', va='top')
        + gg.geom_text(lab_aes, data=labels.ix[1:1], ha='left', va='bottom')
        + gg.geom_text(lab_aes, data=labels.ix[2:], ha='right', va='bottom')

        + gg.scale_fill_discrete(name=mathrm('Bet type'),
                                 labels=[mathrm('Free'), mathrm('Qualifying')])
        + limits((1, 10), (0, 5))
        + gg.ggtitle('%s "%s" %s' % (mathrm('Shape of the'),
                                     mathrm('more profitable'),
                                     mathrm('space')))
        + labs('O_b', 'σ')
        , 'Px_shapes')