Beispiel #1
0
 def test_rlm(self):
     p = (self.p + stat_smooth(
         method='rlm',
         formula='y ~ np.sin(x)',
         fill='red',
     ))
     assert p == 'rlm_formula'
def method_plot(df, baseline_rul, baseline_mse, method):
    plotnine.options.figure_size = (15, 8)

    jan = df[df['method'] == method]

    jan['percent_broken'] = jan['percent_broken'].round().astype(np.int)
    jan['percent_fail_runs'] = jan['percent_fail_runs'].round().astype(np.int)

    plotnine.ylim = (2, 10)
    gg = (plotnine.ggplot(
        jan, plotnine.aes(x='percent_broken', y='log_score', color='method')) +
          plotnine.facet_wrap('task', 2, 4) +
          plotnine.stat_boxplot(plotnine.aes(y='log_value', x=60),
                                data=baseline_rul,
                                width=80,
                                color='#14639e',
                                show_legend=False) +
          plotnine.geom_jitter(width=2.5, show_legend=False) +
          plotnine.stat_smooth(method='gls', show_legend=False) +
          plotnine.xlab('Grade of Degradation in %') +
          plotnine.ylab('Logarithmic RUL-Score') +
          plotnine.theme_classic(base_size=20))
    gg.save('%s_log_rul.pdf' % method)

    plotnine.ylim = (90, 10)
    gg = (plotnine.ggplot(
        jan, plotnine.aes(x='percent_broken', y='mse', color='method')) +
          plotnine.facet_wrap('task', 2, 4) +
          plotnine.stat_boxplot(plotnine.aes(y='value', x=60),
                                data=baseline_mse,
                                width=80,
                                color='#14639e',
                                show_legend=False) +
          plotnine.geom_jitter(width=2.5, show_legend=False) +
          plotnine.stat_smooth(method='gls', show_legend=False) +
          plotnine.xlab('Grade of Degradation in %') + plotnine.ylab('RMSE') +
          plotnine.theme_classic(base_size=20))
    gg.save('%s_rmse.pdf' % method)
Beispiel #3
0
 def plot(self,
          plotDat,
          tag=None,
          log=True,
          by='cell_type',
          data_set=None,
          title=None,
          alpha=.4):
     pDat = plotDat.copy()
     gcorr = pearsonr(pDat.measured, pDat.prediction)[0]
     corrs = pDat.groupby(
         pDat[by]).apply(lambda x: pearsonr(x.measured, x.prediction)[0])
     pDat['corr'] = corrs[pDat[by]].values
     by_str = '{}_pearson'.format(by)
     pDat[by_str] = pDat.apply(
         lambda x: '{} {:.2f}'.format(x[by], corrs[x[by]]), axis=1)
     if data_set:
         pDat = pDat.loc[pDat['dataset_name'] == data_set]
     pl = (pn.ggplot(pn.aes('measured', 'prediction', color=by_str), pDat) +
           pn.geom_point(alpha=alpha) + pn.stat_smooth(mapping=pn.aes(
               'measured', 'prediction', color=by_str),
                                                       method='lm',
                                                       geom='line',
                                                       alpha=0.5,
                                                       se=False,
                                                       inherit_aes=False))
     if len(pDat['sample'].unique()) < 10:
         pl = pl + pn.aes(shape='sample')
     else:
         pl = pl + pn.aes(shape='dataset_name')
     if log is True:
         pl = pl + pn.scale_x_log10() + pn.scale_y_log10()
     if title is not None:
         pl = pl + pn.ggtitle(title)
     elif tag is not None:
         pl = pl + pn.ggtitle('{} pearson={}'.format(tag, gcorr))
     else:
         pl = pl + pn.ggtitle('pearson={}'.format(gcorr))
     return pl
Beispiel #4
0
from plotnine import (ggplot, aes, geom_point, facet_wrap, stat_smooth,
                      theme_xkcd)
from plotnine.data import mtcars

kwargs = dict(width=6, height=4)

p1 = (ggplot(mtcars, aes('wt', 'mpg')) + geom_point())
p1.save('readme-image-1.png', **kwargs)

p2 = p1 + aes(color='factor(gear)')
p2.save('readme-image-2.png', **kwargs)

p3 = p2 + stat_smooth(method='lm')
p3.save('readme-image-3.png', **kwargs)

p4 = p3 + facet_wrap('~gear')
p4.save('readme-image-4.png', **kwargs)

p5 = p4 + theme_xkcd()
p5.save('readme-image-5.png', **kwargs)
Beispiel #5
0
    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )
Beispiel #6
0
from plotnine.data import mpg
from plotnine import ggplot, aes, facet_grid, labs, geom_point, stat_smooth

print(ggplot(mpg)
      + facet_grid(facets="year~class")
      + aes(x="displ", y="hwy")
      + labs(
          x="Engine Size",
          y="Miles per Gallon",
          title="Miles per Gallon for Each Year and Vehicle Class")
      + geom_point()
      + stat_smooth(method='lm'))
Beispiel #7
0
    def plot_char_percent_vs_accuracy_smooth(self,
                                             expo=False,
                                             no_models=False,
                                             columns=False):
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json'
                              ) and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Dilettante'),
                                        ('maryland', 'Expert'),
                                        ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay[
                                'control_correct_positions']
                            control_wrong_positions = gameplay[
                                'control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1] +
                                len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[
                                argsort_control]
                            control_y = control_sorted_result.cumsum(
                            ) / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({
                                'correct': control_y,
                                'char_percent': control_x
                            })
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay[
                            'adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(control_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1] +
                            len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum(
                        ) / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({
                            'correct': adv_y,
                            'char_percent': adv_x
                        })
                        adv_df['Dataset'] = 'Round 1 - IR Interface'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay[
                                'advneural_correct_positions']
                            adv_wrong_positions = gameplay[
                                'advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(control_positions)
                            adv_result = np.array(
                                len(adv_correct_positions) * [1] +
                                len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum(
                            ) / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({
                                'correct': adv_y,
                                'char_percent': adv_x
                            })
                            adv_df['Dataset'] = 'Round 2 - NN Interface'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
            if no_models:
                p = ggplot(human_df) + geom_line()
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Interface']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Interface']
                    df = df[df['Dataset'] != 'Round 2 - NN Interface']
                p = ggplot(df)

                if os.path.exists('data/external/all_human_gameplay.json'
                                  ) and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg',
                                        se=False,
                                        method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se,
                                             bins=20,
                                             shape='.')
            else:
                chart = None

            p = (p + facet_conf +
                 aes(x='char_percent', y='correct', color='Dataset'))
            if chart is not None:
                p += chart
            p = (
                p + scale_y_continuous(breaks=np.linspace(0, 1, 11)) +
                scale_x_continuous(breaks=[0, .5, 1]) +
                xlab('Percent of Question Revealed') + ylab('Accuracy') +
                theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={
                        't': 6,
                        'b': 6,
                        'l': 1,
                        'r': 5
                    })) +
                scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF'],
                                   name='Questions'))
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            return (
                ggplot(self.char_plot_df) +
                aes(x='char_percent', y='correct', color='Guessing_Model') +
                stat_smooth(
                    method='mavg', se=False, method_args={'window': 500}) +
                scale_y_continuous(breaks=np.linspace(0, 1, 21)))
Beispiel #8
0
    def plot_char_percent_vs_accuracy_smooth(
        self, expo=False, no_models=False, columns=False
    ):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f"Setting limits to: {limits}")
        else:
            limits = [0, 1]
        if expo:
            if (
                os.path.exists("data/external/all_human_gameplay.json")
                and not self.no_humans
            ):
                with open("data/external/all_human_gameplay.json") as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [
                        ("parents", "Intermediate"),
                        ("maryland", "Expert"),
                        ("live", "National"),
                    ]:
                        if self.merge_humans:
                            name = "Human"
                        gameplay = all_gameplay[event]
                        if event != "live":
                            control_correct_positions = gameplay[
                                "control_correct_positions"
                            ]
                            control_wrong_positions = gameplay[
                                "control_wrong_positions"
                            ]
                            control_positions = (
                                control_correct_positions + control_wrong_positions
                            )
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1]
                                + len(control_wrong_positions) * [0]
                            )
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = (
                                control_sorted_result.cumsum()
                                / control_sorted_result.shape[0]
                            )
                            control_df = pd.DataFrame(
                                {"correct": control_y, "char_percent": control_x}
                            )
                            control_df["Dataset"] = "Regular Test"
                            control_df["Guessing_Model"] = f" {name}"
                            frames.append(control_df)

                        adv_correct_positions = gameplay["adv_correct_positions"]
                        adv_wrong_positions = gameplay["adv_wrong_positions"]
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1]
                            + len(adv_wrong_positions) * [0]
                        )
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({"correct": adv_y, "char_percent": adv_x})
                        adv_df["Dataset"] = "IR Adversarial"
                        adv_df["Guessing_Model"] = f" {name}"
                        frames.append(adv_df)

                        if len(gameplay["advneural_correct_positions"]) > 0:
                            adv_correct_positions = gameplay[
                                "advneural_correct_positions"
                            ]
                            adv_wrong_positions = gameplay["advneural_wrong_positions"]
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(
                                len(adv_correct_positions) * [1]
                                + len(adv_wrong_positions) * [0]
                            )
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = (
                                adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            )
                            adv_df = pd.DataFrame(
                                {"correct": adv_y, "char_percent": adv_x}
                            )
                            adv_df["Dataset"] = "RNN Adversarial"
                            adv_df["Guessing_Model"] = f" {name}"
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df["Guessing_Model"].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df["Guessing_Model"] = human_df["Guessing_Model"].astype(
                        human_dtype
                    )
                    dataset_dtype = CategoricalDtype(
                        ["Regular Test", "IR Adversarial", "RNN Adversarial"],
                        ordered=True,
                    )
                    human_df["Dataset"] = human_df["Dataset"].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape=".")
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df["Dataset"] != "Round 1 - IR Adversarial"]
                if 2 not in self.rounds:
                    df = df[df["Dataset"] != "Round 2 - IR Adversarial"]
                    df = df[df["Dataset"] != "Round 2 - RNN Adversarial"]
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f"Saving df to: {self.save_df}")
                    df.to_json(self.save_df)

                if (
                    os.path.exists("data/external/all_human_gameplay.json")
                    and not self.no_humans
                ):
                    eprint("Loading human data")
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap("Guessing_Model", ncol=1)
            else:
                facet_conf = facet_wrap("Guessing_Model", nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(
                        method="mavg", se=False, method_args={"window": 400}
                    )
                else:
                    chart = stat_summary_bin(
                        fun_data=mean_no_se,
                        bins=20,
                        shape=".",
                        linetype="None",
                        size=0.5,
                    )
            else:
                chart = None

            p = p + facet_conf + aes(x="char_percent", y="correct", color="Dataset")
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, 0.5, 1])
                + coord_cartesian(ylim=limits)
                + xlab("Percent of Question Revealed")
                + ylab("Accuracy")
                + theme(
                    # legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={"t": 6, "b": 6, "l": 1, "r": 5})
                )
                + scale_color_manual(
                    values=["#FF3333", "#66CC00", "#3333FF", "#FFFF33"],
                    name="Questions",
                )
            )
            if self.title != "":
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f"Saving df to: {self.save_df}")
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x="char_percent", y="correct", color="Guessing_Model")
                + stat_smooth(method="mavg", se=False, method_args={"window": 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )
def mixed_linear_plots(df, x_axis, x_label):
    plotnine.options.figure_size = (8, 10)

    md = smf.mixedlm('log_score ~ percent_broken + percent_fail_runs',
                     df,
                     groups=df.index.values)
    mdf_rul = md.fit()

    print('#' * 18 + 'Log RUL' + '#' * 18)
    print(mdf_rul.summary())

    md = smf.mixedlm('mse ~ percent_broken + percent_fail_runs',
                     df,
                     groups=df.index.values)
    mdf_mse = md.fit()

    print('#' * 18 + 'RMSE' + '#' * 18)
    print(mdf_mse.summary())

    df['percent_broken'] = df['percent_broken'].round().astype(np.int)
    df['percent_fail_runs'] = df['percent_fail_runs'].round().astype(np.int)

    gg = (plotnine.ggplot(
        df, plotnine.aes(x=x_axis, y='log_score', color='method')) +
          plotnine.geom_jitter(width=2.5, show_legend=False) +
          plotnine.geom_abline(
              plotnine.aes(intercept=mdf_rul.params['Intercept'],
                           slope=mdf_rul.params[x_axis])) +
          plotnine.stat_smooth(method='gls', show_legend=False) +
          plotnine.xlab(x_label) + plotnine.ylab('Logarithmic RUL-Score') +
          plotnine.scale_color_discrete(name='Method', labels=['DAAN', 'JAN'])
          + plotnine.theme_classic(base_size=20))
    gg.save('%s_log_rul_by_method.pdf' % x_axis)

    gg = (plotnine.ggplot(
        df, plotnine.aes(x=x_axis, y='log_score', color='task')) +
          plotnine.geom_jitter(width=2.5, show_legend=False) +
          plotnine.geom_abline(
              plotnine.aes(intercept=mdf_rul.params['Intercept'],
                           slope=mdf_rul.params[x_axis])) +
          plotnine.stat_smooth(method='gls', show_legend=False) +
          plotnine.xlab(x_label) + plotnine.ylab('Logarithmic RUL-Score') +
          plotnine.scale_color_discrete(
              name='Task',
              labels=['4→3', '4→2', '1→3', '1→2', '3→4', '3→1', '2→4', '2→1'
                      ]) + plotnine.theme_classic(base_size=20))
    gg.save('%s_log_rul_by_task.pdf' % x_axis)

    gg = (
        plotnine.ggplot(df, plotnine.aes(x=x_axis, y='mse', color='method')) +
        plotnine.geom_jitter(width=2.5) + plotnine.geom_abline(
            plotnine.aes(intercept=mdf_mse.params['Intercept'],
                         slope=mdf_mse.params[x_axis])) +
        plotnine.stat_smooth(method='gls') + plotnine.ylab('RMSE') +
        plotnine.xlab(x_label) +
        plotnine.scale_color_discrete(name='Method', labels=['DAAN', 'JAN']) +
        plotnine.theme_classic(base_size=20))
    gg.save('%s_mse_by_method.pdf' % x_axis)

    gg = (plotnine.ggplot(df, plotnine.aes(x=x_axis, y='mse', color='task')) +
          plotnine.geom_jitter(width=2.5) + plotnine.geom_abline(
              plotnine.aes(intercept=mdf_mse.params['Intercept'],
                           slope=mdf_mse.params[x_axis])) +
          plotnine.stat_smooth(method='gls') + plotnine.ylab('RMSE') +
          plotnine.scale_color_discrete(
              name='Task',
              labels=['4→3', '4→2', '1→3', '1→2', '3→4', '3→1', '2→4', '2→1'
                      ]) + plotnine.theme_classic(base_size=20))
    gg.save('%s_mse_by_task.pdf' % x_axis)
Beispiel #10
0
 def test_gls(self):
     p = (self.p + stat_smooth(
         method='gls', formula='y ~ np.sin(x)', fill='red', se=True))
     assert p == 'gls_formula'
Beispiel #11
0
import numpy as np 
import pandas as pd 
import statsmodels.api as sm 
import statsmodels.formula.api as smf 
from itertools import combinations 
import plotnine as p

# read data
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
def read_data(file): 
	return pd.read_stata("https://raw.github.com/scunning1975/mixtape/master/" + file)

np.random.seed(12282020)

dat = pd.DataFrame({'x': np.random.normal(50, 25, 1000)})
dat.loc[dat.x<0, 'x'] = 0
dat = dat[dat.x<100]
dat['D'] = 0
dat.loc[dat.x>50, 'D'] = 1
dat['y1'] = 25 + 0*dat.D + 1.5 * dat.x + np.random.normal(0, 20, dat.shape[0])
dat['y2'] = 25 + 40*dat.D + 1.5 * dat.x + np.random.normal(0, 20, dat.shape[0])
print('"Counterfactual Potential Outcomes')


p.ggplot(dat, p.aes(x='x', y='y1', color = 'factor(D)')) +    p.geom_point(alpha = 0.5) +    p.geom_vline(xintercept = 50, colour = "grey") +    p.stat_smooth(method = "lm", se = 'F') +    p.labs(x = "Test score (X)", y = "Potential Outcome (Y1)")

#
#  (C) Copyright 2021  Pavel Tisnovsky
#
#  All rights reserved. This program and the accompanying materials
#  are made available under the terms of the Eclipse Public License v1.0
#  which accompanies this distribution, and is available at
#  http://www.eclipse.org/legal/epl-v10.html
#
#  Contributors:
#      Pavel Tisnovsky
#

from plotnine.data import mpg
from plotnine import ggplot, aes, facet_grid, labs, geom_point, stat_smooth

print(
    ggplot(mpg) + facet_grid(facets="year~class") + aes(x="displ", y="hwy") +
    labs(
        x="Engine Size",
        y="Miles per Gallon",
        title="Miles per Gallon for Each Year and Vehicle Class",
    ) + geom_point() + stat_smooth(method="lm"))
Beispiel #13
0
#Topic ----Plot Nine- Bar Plot
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#pip install plotnine --user
from plotnine import *

#https://datacarpentry.org/python-ecology-lesson/07-visualization-ggplot-python/index.html


from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
from plotnine.data import mtcars
mtcars
(ggplot(mtcars, aes('wt', 'mpg', color='factor(gear)')) + geom_point() + stat_smooth(method='lm') + facet_wrap('~gear'))

ggplot(mtcars, aes('wt', 'hp', color='factor(cyl)')) + geom_point(aes(size='mpg')) + labs(title='MT cars', subtitle ='wt vs hp', x='weight', y='horsepower') + geom_text(aes(label='name'))


#%%%
%matplotlib inline
import plotnine as p9
from plotnine.data import mtcars
from adjustText import adjust_text
#https://github.com/Phlya/adjustText/wiki
p9.ggplot(mtcars, aes('wt', 'hp', color='factor(cyl)')) + p9.geom_point(aes(size='mpg')) + p9.labs(title='MT cars', subtitle ='wt vs hp', x='weight', y='horsepower') + p9.geom_text(aes(label='name'), size=11, nudge_y=2)
p9.geom_text?
plt.ioff()# and plt.ion()
plt.close()
%matplotlib
Beispiel #14
0
    def plot_char_percent_vs_accuracy_smooth(self, expo=False):
        if expo:
            p = (ggplot(self.char_plot_df) +
                 facet_wrap('Guessing_Model', nrow=1) +
                 aes(x='char_percent', y='correct', color='Dataset') +
                 stat_smooth(
                     method='mavg', se=False, method_args={'window': 200}) +
                 scale_y_continuous(breaks=np.linspace(0, 1, 11)) +
                 scale_x_continuous(breaks=[0, .5, 1]) +
                 xlab('Percent of Question Revealed') + ylab('Accuracy') +
                 theme(legend_position='top'))
            if os.path.exists('data/external/human_gameplay.json'):
                with open('data/external/human_gameplay.json') as f:
                    gameplay = json.load(f)
                    control_correct_positions = gameplay[
                        'control_correct_positions']
                    control_wrong_positions = gameplay[
                        'control_wrong_positions']
                    control_positions = control_correct_positions + control_wrong_positions
                    control_positions = np.array(control_positions)
                    control_result = np.array(
                        len(control_correct_positions) * [1] +
                        len(control_wrong_positions) * [0])
                    argsort_control = np.argsort(control_positions)
                    control_x = control_positions[argsort_control]
                    control_sorted_result = control_result[argsort_control]
                    control_y = control_sorted_result.cumsum(
                    ) / control_sorted_result.shape[0]
                    control_df = pd.DataFrame({
                        'correct': control_y,
                        'char_percent': control_x
                    })
                    control_df['Dataset'] = 'Test Questions'
                    control_df['Guessing_Model'] = ' Human'

                    adv_correct_positions = gameplay['adv_correct_positions']
                    adv_wrong_positions = gameplay['adv_wrong_positions']
                    adv_positions = adv_correct_positions + adv_wrong_positions
                    adv_positions = np.array(control_positions)
                    adv_result = np.array(
                        len(adv_correct_positions) * [1] +
                        len(adv_wrong_positions) * [0])
                    argsort_adv = np.argsort(adv_positions)
                    adv_x = adv_positions[argsort_adv]
                    adv_sorted_result = adv_result[argsort_adv]
                    adv_y = adv_sorted_result.cumsum(
                    ) / adv_sorted_result.shape[0]
                    adv_df = pd.DataFrame({
                        'correct': adv_y,
                        'char_percent': adv_x
                    })
                    adv_df['Dataset'] = 'Challenge Questions'
                    adv_df['Guessing_Model'] = ' Human'

                    human_df = pd.concat([control_df, adv_df])
                    p = p + (geom_line(data=human_df))

            return p
        else:
            return (
                ggplot(self.char_plot_df) +
                aes(x='char_percent', y='correct', color='Guessing_Model') +
                stat_smooth(
                    method='mavg', se=False, method_args={'window': 500}) +
                scale_y_continuous(breaks=np.linspace(0, 1, 21)))
Beispiel #15
0
import plotnine as p

# read data
import ssl
ssl._create_default_https_context = ssl._create_unverified_context


def read_data(file):
    return pd.read_stata(
        "https://raw.github.com/scunning1975/mixtape/master/" + file)


np.random.seed(12282020)

dat = pd.DataFrame({'x': np.random.normal(50, 25, 1000)})
dat.loc[dat.x < 0, 'x'] = 0
dat = dat[dat.x < 100]
dat['D'] = 0
dat.loc[dat.x > 50, 'D'] = 1
dat['y1'] = 25 + 0 * dat.D + 1.5 * dat.x + np.random.normal(
    0, 20, dat.shape[0])
dat['y2'] = 25 + 40 * dat.D + 1.5 * dat.x + np.random.normal(
    0, 20, dat.shape[0])
print('"Counterfactual Potential Outcomes')

print('"Counterfactual Potential Outcomes after Treatment')
p.ggplot(dat, p.aes(x='x', y='y2', color='factor(D)')) + p.geom_point(
    alpha=0.5) + p.geom_vline(xintercept=50, colour="grey") + p.stat_smooth(
        method="lm", se='F') + p.labs(x="Test score (X)",
                                      y="Potential Outcome (Y)")
Beispiel #16
0
from plotnine import (ggplot, aes, geom_point, facet_wrap,
                      stat_smooth, theme_xkcd)
from plotnine.data import mtcars

kwargs = dict(width=6, height=4)

p1 = (ggplot(mtcars, aes('wt', 'mpg'))
      + geom_point())
p1.save('readme-image-1.png', **kwargs)

p2 = p1 + aes(color='factor(gear)')
p2.save('readme-image-2.png', **kwargs)

p3 = p2 + stat_smooth(method='lm')
p3.save('readme-image-3.png', **kwargs)

p4 = p3 + facet_wrap('~gear')
p4.save('readme-image-4.png', **kwargs)

p5 = p4 + theme_xkcd()
p5.save('readme-image-5.png', **kwargs)
Beispiel #17
0
barrels = beer.loc[:, 'barrels'].values.reshape(-1, 1)
crude_rate = beer.loc[:, 'crude.rate'].values.reshape(-1, 1) 

## Linear model of barrels vs crude
lm = LinearRegression()
model = lm.fit(barrels, crude_rate)
pred = lm.predict(barrels)
r2 = model.score(barrels, crude_rate)

## Plot barrels and crude rate
g = (ggplot(beer, aes(x = 'barrels', y = 'crude.rate')) + 
 geom_point(color = 'black') + 
 geom_text(aes(label = 'year'),
           position = positions.position_nudge(0,1)) + 
 stat_smooth(aes(x = 'barrels', y = 'crude.rate'), color = 'blue',
             method = 'lm',
             se = False) + 
 labs(title = "Crude Rate versus Beer Production", 
 x = "Ten Thousands of Barrels", 
 y = "Deaths per Million") + 
 annotate("text", x = 18250, 
          y = 65, 
          label = "R2:" + str(round(r2, 3))))
g.save("figures/allBeer_crudeRate_lm_p9.png")

g2 = (ggplot(beer, aes(x = 'year', y = 'barrels')) + 
 geom_point() +
 labs(title = "National Beer Production 2008-2015",
      x = "Ten Thousands of Barrels",
      y = "Year"))
g2.save("figures/allBeer_years_p9.png")
Beispiel #18
0
def gene_expression_dynamics(
    adata,
    selected_fate,
    gene_name_list,
    traj_threshold=0.1,
    source="transition_map",
    invert_PseudoTime=False,
    mask=None,
    compute_new=True,
    gene_exp_percentile=99,
    n_neighbors=8,
    plot_raw_data=False,
    stat_smooth_method="loess",
):
    """
    Plot gene trend along the inferred dynamic trajectory.

    The results should be pre-computed from :func:`cospar.tl.progenitor` or
    :func:`cospar.tl.iterative_differentiation`

    Using the states that belong to the trajectory, it computes the pseudo time
    for these states and shows expression dynamics of selected genes along
    this pseudo time.

    Specifically, we first construct KNN graph, compute spectral embedding,
    and take the first component as the pseudo time. To create dynamics for a
    selected gene, we re-weight the expression of this gene at each cell by its
    probability belonging to the trajectory, and rescale the expression at selected
    percentile value. Finally, we fit a curve to the data points.

    Parameters
    ----------
    adata: :class:`~anndata.AnnData` object
        Assume to contain transition maps at adata.uns.
    selected_fate: `str`, or `list`
        targeted cluster of the trajectory, as consistent with adata.obs['state_info']
        When it is a list, the listed clusters are combined into a single fate cluster.
    gene_name_list: `list`
        List of genes to plot on the dynamic trajectory.
    traj_threshold: `float`, optional (default: 0.1), range: (0,1)
        Relative threshold, used to thresholding the inferred dynamic trajecotry to select states.
    invert_PseudoTime: `bool`, optional (default: False)
        If true, invert the pseudotime: 1-pseuotime. This is useful when the direction
        of pseudo time does not agree with intuition.
    mask: `np.array`, optional (default: None)
        A boolean array for further selecting cell states.
    compute_new: `bool`, optional (default: True)
        If true, compute everyting from stratch (as we save computed pseudotime)
    gene_exp_percentile: `int`, optional (default: 99)
        Plot gene expression below this percentile.
    n_neighbors: `int`, optional (default: 8)
        Number of nearest neighbors for constructing KNN graph.
    plot_raw_data: `bool`, optional (default: False)
        Plot the raw gene expression values of each cell along the pseudotime.
    stat_smooth_method: `str`, optional (default: 'loess')
        Smooth method used in the ggplot. Current available choices are:
        'auto' (Use loess if (n<1000), glm otherwise),
        'lm' (Linear Model),
        'wls' (Linear Model),
        'rlm' (Robust Linear Model),
        'glm' (Generalized linear Model),
        'gls' (Generalized Least Squares),
        'lowess' (Locally Weighted Regression (simple)),
        'loess' (Locally Weighted Regression),
        'mavg' (Moving Average),
        'gpr' (Gaussian Process Regressor)}.
    """

    if mask == None:
        final_mask = np.ones(adata.shape[0]).astype(bool)
    else:
        if mask.shape[0] == adata.shape[0]:
            final_mask = mask
        else:
            logg.error(
                "mask must be a boolean array with the same size as adata.shape[0]."
            )
            return None

    hf.check_available_map(adata)
    fig_width = settings.fig_width
    fig_height = settings.fig_height
    point_size = settings.fig_point_size

    if len(adata.uns["available_map"]) == 0:
        logg.error(f"There is no transition map available yet")

    else:

        if type(selected_fate) == str:
            selected_fate = [selected_fate]

        (
            mega_cluster_list,
            valid_fate_list,
            fate_array_flat,
            sel_index_list,
        ) = hf.analyze_selected_fates(adata.obs["state_info"], selected_fate)
        if len(mega_cluster_list) == 0:
            logg.error("No cells selected. Computation aborted!")
            return adata
        else:
            fate_name = mega_cluster_list[0]
            target_idx = sel_index_list[0]

            x_emb = adata.obsm["X_emb"][:, 0]
            y_emb = adata.obsm["X_emb"][:, 1]
            data_des = adata.uns["data_des"][-1]
            data_path = settings.data_path
            figure_path = settings.figure_path
            file_name = os.path.join(
                data_path, f"{data_des}_fate_trajectory_pseudoTime_{fate_name}.npy"
            )

            traj_name = f"diff_trajectory_{source}_{fate_name}"
            if traj_name not in adata.obs.keys():
                logg.error(
                    f"The target fate trajectory for {fate_name} with {source} have not been inferred yet.\n"
                    "Please infer the trajectory with first with cs.tl.progenitor, \n"
                    "or cs.tl.iterative_differentiation."
                )

            else:
                prob_0 = np.array(adata.obs[traj_name])

                sel_cell_idx = (prob_0 > traj_threshold * np.max(prob_0)) & final_mask
                if np.sum(sel_cell_idx) == 0:
                    raise ValueError("No cells selected!")

                sel_cell_id = np.nonzero(sel_cell_idx)[0]

                if os.path.exists(file_name) and (not compute_new):
                    logg.info("Load pre-computed pseudotime")
                    PseudoTime = np.load(file_name)
                else:

                    from sklearn import manifold

                    data_matrix = adata.obsm["X_pca"][sel_cell_idx]
                    method = SpectralEmbedding(n_components=1, n_neighbors=n_neighbors)
                    PseudoTime = method.fit_transform(data_matrix)
                    np.save(file_name, PseudoTime)
                    # logg.info("Run time:",time.time()-t)

                PseudoTime = PseudoTime - np.min(PseudoTime)
                PseudoTime = (PseudoTime / np.max(PseudoTime)).flatten()

                ## re-order the pseudoTime such that the target fate has the pseudo time 1.
                if invert_PseudoTime:
                    # target_fate_id=np.nonzero(target_idx)[0]
                    # convert_fate_id=hf.converting_id_from_fullSpace_to_subSpace(target_fate_id,sel_cell_id)[0]
                    # if np.mean(PseudoTime[convert_fate_id])<0.5: PseudoTime=1-PseudoTime
                    PseudoTime = 1 - PseudoTime

                # pdb.set_trace()
                if (
                    np.sum((PseudoTime > 0.25) & (PseudoTime < 0.75)) == 0
                ):  # the cell states do not form a contiuum. Plot raw data instead
                    logg.error(
                        "The selected cell states do not form a connected graph. Cannot form a continuum of pseudoTime. Only plot the raw data"
                    )
                    plot_raw_data = True

                ## plot the pseudotime ordering
                fig = plt.figure(figsize=(fig_width * 2, fig_height))
                ax = plt.subplot(1, 2, 1)
                pl_util.customized_embedding(
                    x_emb,
                    y_emb,
                    sel_cell_idx,
                    ax=ax,
                    title="Selected cells",
                    point_size=point_size,
                )
                ax1 = plt.subplot(1, 2, 2)
                pl_util.customized_embedding(
                    x_emb[sel_cell_idx],
                    y_emb[sel_cell_idx],
                    PseudoTime,
                    ax=ax1,
                    title="Pseudo Time",
                    point_size=point_size,
                )
                # customized_embedding(x_emb[final_id],y_emb[final_id],PseudoTime,ax=ax1,title='Pseudo time')
                Clb = fig.colorbar(
                    plt.cm.ScalarMappable(cmap=plt.cm.Reds), ax=ax1, label="Pseudo time"
                )
                fig.savefig(
                    os.path.join(
                        figure_path,
                        f"{data_des}_fate_trajectory_pseudoTime_{fate_name}.{settings.file_format_figs}",
                    )
                )

                temp_dict = {"PseudoTime": PseudoTime}
                for gene_name in gene_name_list:
                    yy_max = np.percentile(
                        adata.obs_vector(gene_name), gene_exp_percentile
                    )  # global blackground
                    yy = np.array(adata.obs_vector(gene_name)[sel_cell_idx])
                    rescaled_yy = (
                        yy * prob_0[sel_cell_idx] / yy_max
                    )  # rescaled by global background
                    temp_dict[gene_name] = rescaled_yy

                from plotnine import (
                    aes,
                    geom_point,
                    ggplot,
                    labs,
                    stat_smooth,
                    theme_classic,
                )

                data2 = pd.DataFrame(temp_dict)
                data2_melt = pd.melt(
                    data2, id_vars=["PseudoTime"], value_vars=gene_name_list
                )
                gplot = (
                    ggplot(
                        data=data2_melt,
                        mapping=aes(x="PseudoTime", y="value", color="variable"),
                    )
                    + (
                        geom_point()
                        if plot_raw_data
                        else stat_smooth(method=stat_smooth_method)
                    )
                    + theme_classic()
                    + labs(
                        x="Pseudo time",
                        y="Normalized gene expression",
                        color="Gene name",
                    )
                )

                gplot.save(
                    os.path.join(
                        figure_path,
                        f"{data_des}_fate_trajectory_pseutoTime_gene_expression_{fate_name}.{settings.file_format_figs}",
                    ),
                    width=fig_width,
                    height=fig_height,
                    verbose=False,
                )
                gplot.draw()
Beispiel #19
0
 def test_lm_weights(self):
     p = (self.p + aes(weight='x.abs()') + stat_smooth(
         method='lm', formula='y ~ np.sin(x)', fill='red', se=True))
     assert p == 'lm_formula_weights'
Beispiel #20
0
lmb_data['demvoteshare_c'] = lmb_data['demvoteshare'] - 0.5
# drop missing values
lmb_data = lmb_data[~pd.isnull(lmb_data.demvoteshare_c)]
lmb_data['demvoteshare_sq'] = lmb_data['demvoteshare_c']**2

#aggregating the data
lmb_data = lmb_data[lmb_data.demvoteshare.between(.45, .55)]
categories = lmb_data.lagdemvoteshare
lmb_data['lagdemvoteshare_100'] = pd.cut(lmb_data.lagdemvoteshare, 100)

agg_lmb_data = lmb_data.groupby('lagdemvoteshare_100')['score'].mean().reset_index()
lmb_data['gg_group'] = [1 if x>.5 else 0 for x in lmb_data.lagdemvoteshare]
agg_lmb_data['lagdemvoteshare'] = np.arange(0.01, 1.01, .01)

# plotting
p.ggplot(lmb_data, p.aes('lagdemvoteshare', 'score')) +    p.geom_point(p.aes(x = 'lagdemvoteshare', y = 'score'), data = agg_lmb_data) +    p.stat_smooth(p.aes('lagdemvoteshare', 'score', group = 'gg_group'), 
                  data=lmb_data, method = "lm", 
              formula = 'y ~ x + I(x**2)') +\
    p.xlim(0,1) + p.ylim(0,100) +\
    p.geom_vline(xintercept = 0.5)

p.ggplot(lmb_data, p.aes('lagdemvoteshare', 'score')) +    p.geom_point(p.aes(x = 'lagdemvoteshare', y = 'score'), data = agg_lmb_data) +    p.stat_smooth(p.aes('lagdemvoteshare', 'score', group = 'gg_group'), 
                  data=lmb_data, method = "lowess") +\
    p.xlim(0,1) + p.ylim(0,100) +\
    p.geom_vline(xintercept = 0.5)

p.ggplot(lmb_data, p.aes('lagdemvoteshare', 'score')) +    p.geom_point(p.aes(x = 'lagdemvoteshare', y = 'score'), data = agg_lmb_data) +    p.stat_smooth(p.aes('lagdemvoteshare', 'score', group = 'gg_group'), 
                  data=lmb_data, method = "lm")+\
    p.xlim(0,1) + p.ylim(0,100) +\
    p.geom_vline(xintercept = 0.5)
Beispiel #21
0
                                   x['k'],
                                   x['resubAccuracy'],
                                   x['testAccuracy'])
                                  for x in repeatedKnnResults],
                                 columns = ['p',
                                            'k',
                                            'resubAccuracy',
                                            'testAccuracy'])

ggdata = pd.concat(
    [DataFrame({'p' : knnResultsSimplified.p,
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'resub',
                'Accuracy' : knnResultsSimplified.resubAccuracy}),
     DataFrame({'p' : knnResultsSimplified.p,
                'k' : knnResultsSimplified.k.apply(int),
                'type' : 'test',
                'Accuracy' : knnResultsSimplified.testAccuracy})],
    axis = 0
)

plt.close()
ggo = gg.ggplot(ggdata, gg.aes(x='p', y='Accuracy',
                               color='type', group='type', linetype='type'))
ggo += gg.facet_wrap('~ k')
ggo += gg.scale_x_log10()
ggo += gg.geom_point(alpha=0.6)
ggo += gg.stat_smooth()
ggo += gg.theme_bw()
print(ggo)
#
#  (C) Copyright 2021  Pavel Tisnovsky
#
#  All rights reserved. This program and the accompanying materials
#  are made available under the terms of the Eclipse Public License v1.0
#  which accompanies this distribution, and is available at
#  http://www.eclipse.org/legal/epl-v10.html
#
#  Contributors:
#      Pavel Tisnovsky
#

from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
from plotnine.data import mtcars

print(
    ggplot(mtcars, aes("wt", "mpg", color="factor(gear)")) + geom_point() +
    stat_smooth(method="lm") + facet_wrap("~gear"))
Beispiel #23
0
    'set' +
    pd.Series(['1'] * anscombe.shape[0] + ['2'] * anscombe.shape[0] +
              ['3'] * anscombe.shape[0] + ['4'] * anscombe.shape[0]).values
})
anscombe.head()

plt.close()
ggo = gg.ggplot(anscombe, gg.aes(x='x', y='y')) +\
      gg.facet_wrap('~ set') +\
      gg.geom_point() +\
      gg.theme_bw()
print(ggo)
# ggo.save('anscombe_points.pdf', format='pdf', height=5, width=5)

plt.close()
ggo += gg.stat_smooth(method='lm')
print(ggo)
## ggo.save('anscombe_lm.pdf', format='pdf', height=5, width=5)

## seaborn's lmplot function often useful in same situations
## one would want stat_smooth in R with ggplot2
plt.close()
sns.lmplot(data=anscombe, x='x', y='y', col='set')

plt.close()
sns.lmplot(data=anscombe, x='x', y='y', col='set', robust=True, ci=None)

plt.close()
sns.lmplot(data=anscombe, x='x', y='y', col='set', lowess=True)

## -----------------------------------------------------------------
Beispiel #24
0
# -*- coding: utf-8 -*-
"""
Created on Wed Oct 14 08:12:35 2020

@author: Ashish
Using plotnine library for plotting ggplot2 style graphics
"""
from plotnine import ggplot, geom_point, aes, stat_smooth, facet_wrap
from plotnine.data import mtcars

plt = (ggplot(mtcars, aes('wt', 'mpg', color='factor(gear)')) + geom_point() +
       stat_smooth(method='lm') + facet_wrap('~gear'))

# show the plot
print(plt)
#
#  (C) Copyright 2021  Pavel Tisnovsky
#
#  All rights reserved. This program and the accompanying materials
#  are made available under the terms of the Eclipse Public License v1.0
#  which accompanies this distribution, and is available at
#  http://www.eclipse.org/legal/epl-v10.html
#
#  Contributors:
#      Pavel Tisnovsky
#

from plotnine import ggplot, geom_point, aes, stat_smooth
from plotnine.data import mtcars

g = (ggplot(mtcars, aes("wt", "mpg", color="factor(gear)")) + geom_point() +
     stat_smooth(method="lm"))

g.save("10.png")
Beispiel #26
0
    def plot_char_percent_vs_accuracy_smooth(self,
                                             expo=False,
                                             no_models=False,
                                             columns=False):
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json'):
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Dilettante'),
                                        ('maryland', 'Expert'),
                                        ('live', 'National')]:
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay[
                                'control_correct_positions']
                            control_wrong_positions = gameplay[
                                'control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1] +
                                len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[
                                argsort_control]
                            control_y = control_sorted_result.cumsum(
                            ) / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({
                                'correct': control_y,
                                'char_percent': control_x
                            })
                            control_df['Dataset'] = 'Test Questions'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay[
                            'adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(control_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1] +
                            len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum(
                        ) / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({
                            'correct': adv_y,
                            'char_percent': adv_x
                        })
                        adv_df['Dataset'] = 'Challenge Questions'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                    human_df = pd.concat(frames)
            if no_models:
                p = ggplot(human_df) + geom_line()
            else:
                p = ggplot(self.char_plot_df)
                if os.path.exists('data/external/all_human_gameplay.json'):
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)
            p = (p + facet_conf +
                 aes(x='char_percent', y='correct', color='Dataset') +
                 stat_smooth(
                     method='mavg', se=False, method_args={'window': 400}) +
                 scale_y_continuous(breaks=np.linspace(0, 1, 11)) +
                 scale_x_continuous(breaks=[0, .5, 1]) +
                 xlab('Percent of Question Revealed') + ylab('Accuracy') +
                 theme(legend_position='top',
                       legend_box_margin=0,
                       legend_title=element_blank(),
                       strip_text_x=element_text(margin={
                           't': 6,
                           'b': 6,
                           'l': 1,
                           'r': 5
                       })))
            return p
        else:
            return (
                ggplot(self.char_plot_df) +
                aes(x='char_percent', y='correct', color='Guessing_Model') +
                stat_smooth(
                    method='mavg', se=False, method_args={'window': 500}) +
                scale_y_continuous(breaks=np.linspace(0, 1, 21)))