コード例 #1
0
ファイル: figures.py プロジェクト: Pinafore/publications
def round_2_plot():
    if not os.path.exists(round_2_df_path):
        eprint(f'Downloading {round_2_df_url} to {round_2_df_path}')
        urlretrieve(round_2_df_url, round_2_df_path)
    verify_checksum(round_2_df_checksum, round_2_df_path)
    df = pd.read_json(round_2_df_path)
    p = (
        ggplot(df) + aes(x='char_percent', y='correct', color='Dataset') +
        facet_wrap('Guessing_Model', nrow=1) + stat_summary_bin(
            fun_data=mean_no_se, bins=20, shape='.', linetype='None',
            size=0.5) + scale_y_continuous(breaks=np.linspace(0, 1, 6)) +
        scale_x_continuous(breaks=[0, .5, 1]) +
        coord_cartesian(ylim=[0, 0.7]) +
        ggtitle('Round 2 Attacks and Models') +
        xlab('Percent of Question Revealed') + ylab('Accuracy') + theme(
            #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
            strip_text_x=element_text(margin={
                't': 6,
                'b': 6,
                'l': 1,
                'r': 5
            })) +
        scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'],
                           name='Questions'))
    p.save('2019_tacl_trick/auto_fig/round_2_json.pdf', width=7.0, height=1.7)
コード例 #2
0
def test_discrete_x():
    p = (ggplot(df, aes('xd', 'y'))
         + stat_summary_bin(fun_y=np.mean,
                            fun_ymin=np.min,
                            fun_ymax=np.max,
                            geom='bar'))

    assert p == 'discrete_x'
コード例 #3
0
def test_continuous_x():
    p = (ggplot(df, aes('xc', 'y'))
         + stat_summary_bin(fun_y=np.mean,
                            fun_ymin=np.min,
                            fun_ymax=np.max,
                            bins=5,
                            geom='bar'))

    assert p == 'continuous_x'
コード例 #4
0
    def plot_char_percent_vs_accuracy_smooth(
        self, expo=False, no_models=False, columns=False
    ):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f"Setting limits to: {limits}")
        else:
            limits = [0, 1]
        if expo:
            if (
                os.path.exists("data/external/all_human_gameplay.json")
                and not self.no_humans
            ):
                with open("data/external/all_human_gameplay.json") as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [
                        ("parents", "Intermediate"),
                        ("maryland", "Expert"),
                        ("live", "National"),
                    ]:
                        if self.merge_humans:
                            name = "Human"
                        gameplay = all_gameplay[event]
                        if event != "live":
                            control_correct_positions = gameplay[
                                "control_correct_positions"
                            ]
                            control_wrong_positions = gameplay[
                                "control_wrong_positions"
                            ]
                            control_positions = (
                                control_correct_positions + control_wrong_positions
                            )
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1]
                                + len(control_wrong_positions) * [0]
                            )
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = (
                                control_sorted_result.cumsum()
                                / control_sorted_result.shape[0]
                            )
                            control_df = pd.DataFrame(
                                {"correct": control_y, "char_percent": control_x}
                            )
                            control_df["Dataset"] = "Regular Test"
                            control_df["Guessing_Model"] = f" {name}"
                            frames.append(control_df)

                        adv_correct_positions = gameplay["adv_correct_positions"]
                        adv_wrong_positions = gameplay["adv_wrong_positions"]
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1]
                            + len(adv_wrong_positions) * [0]
                        )
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({"correct": adv_y, "char_percent": adv_x})
                        adv_df["Dataset"] = "IR Adversarial"
                        adv_df["Guessing_Model"] = f" {name}"
                        frames.append(adv_df)

                        if len(gameplay["advneural_correct_positions"]) > 0:
                            adv_correct_positions = gameplay[
                                "advneural_correct_positions"
                            ]
                            adv_wrong_positions = gameplay["advneural_wrong_positions"]
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(
                                len(adv_correct_positions) * [1]
                                + len(adv_wrong_positions) * [0]
                            )
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = (
                                adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            )
                            adv_df = pd.DataFrame(
                                {"correct": adv_y, "char_percent": adv_x}
                            )
                            adv_df["Dataset"] = "RNN Adversarial"
                            adv_df["Guessing_Model"] = f" {name}"
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df["Guessing_Model"].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df["Guessing_Model"] = human_df["Guessing_Model"].astype(
                        human_dtype
                    )
                    dataset_dtype = CategoricalDtype(
                        ["Regular Test", "IR Adversarial", "RNN Adversarial"],
                        ordered=True,
                    )
                    human_df["Dataset"] = human_df["Dataset"].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape=".")
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df["Dataset"] != "Round 1 - IR Adversarial"]
                if 2 not in self.rounds:
                    df = df[df["Dataset"] != "Round 2 - IR Adversarial"]
                    df = df[df["Dataset"] != "Round 2 - RNN Adversarial"]
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f"Saving df to: {self.save_df}")
                    df.to_json(self.save_df)

                if (
                    os.path.exists("data/external/all_human_gameplay.json")
                    and not self.no_humans
                ):
                    eprint("Loading human data")
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap("Guessing_Model", ncol=1)
            else:
                facet_conf = facet_wrap("Guessing_Model", nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(
                        method="mavg", se=False, method_args={"window": 400}
                    )
                else:
                    chart = stat_summary_bin(
                        fun_data=mean_no_se,
                        bins=20,
                        shape=".",
                        linetype="None",
                        size=0.5,
                    )
            else:
                chart = None

            p = p + facet_conf + aes(x="char_percent", y="correct", color="Dataset")
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, 0.5, 1])
                + coord_cartesian(ylim=limits)
                + xlab("Percent of Question Revealed")
                + ylab("Accuracy")
                + theme(
                    # legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={"t": 6, "b": 6, "l": 1, "r": 5})
                )
                + scale_color_manual(
                    values=["#FF3333", "#66CC00", "#3333FF", "#FFFF33"],
                    name="Questions",
                )
            )
            if self.title != "":
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f"Saving df to: {self.save_df}")
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x="char_percent", y="correct", color="Guessing_Model")
                + stat_smooth(method="mavg", se=False, method_args={"window": 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )
コード例 #5
0
ファイル: figures.py プロジェクト: jetstreamin/qb
    def plot_char_percent_vs_accuracy_smooth(self,
                                             expo=False,
                                             no_models=False,
                                             columns=False):
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json'
                              ) and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Dilettante'),
                                        ('maryland', 'Expert'),
                                        ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay[
                                'control_correct_positions']
                            control_wrong_positions = gameplay[
                                'control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(
                                len(control_correct_positions) * [1] +
                                len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[
                                argsort_control]
                            control_y = control_sorted_result.cumsum(
                            ) / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({
                                'correct': control_y,
                                'char_percent': control_x
                            })
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay[
                            'adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(control_positions)
                        adv_result = np.array(
                            len(adv_correct_positions) * [1] +
                            len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum(
                        ) / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({
                            'correct': adv_y,
                            'char_percent': adv_x
                        })
                        adv_df['Dataset'] = 'Round 1 - IR Interface'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay[
                                'advneural_correct_positions']
                            adv_wrong_positions = gameplay[
                                'advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(control_positions)
                            adv_result = np.array(
                                len(adv_correct_positions) * [1] +
                                len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum(
                            ) / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({
                                'correct': adv_y,
                                'char_percent': adv_x
                            })
                            adv_df['Dataset'] = 'Round 2 - NN Interface'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
            if no_models:
                p = ggplot(human_df) + geom_line()
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Interface']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Interface']
                    df = df[df['Dataset'] != 'Round 2 - NN Interface']
                p = ggplot(df)

                if os.path.exists('data/external/all_human_gameplay.json'
                                  ) and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg',
                                        se=False,
                                        method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se,
                                             bins=20,
                                             shape='.')
            else:
                chart = None

            p = (p + facet_conf +
                 aes(x='char_percent', y='correct', color='Dataset'))
            if chart is not None:
                p += chart
            p = (
                p + scale_y_continuous(breaks=np.linspace(0, 1, 11)) +
                scale_x_continuous(breaks=[0, .5, 1]) +
                xlab('Percent of Question Revealed') + ylab('Accuracy') +
                theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={
                        't': 6,
                        'b': 6,
                        'l': 1,
                        'r': 5
                    })) +
                scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF'],
                                   name='Questions'))
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            return (
                ggplot(self.char_plot_df) +
                aes(x='char_percent', y='correct', color='Guessing_Model') +
                stat_smooth(
                    method='mavg', se=False, method_args={'window': 500}) +
                scale_y_continuous(breaks=np.linspace(0, 1, 21)))
コード例 #6
0
ファイル: figures.py プロジェクト: Pinafore/qb
    def plot_char_percent_vs_accuracy_smooth(self, expo=False, no_models=False, columns=False):
        if self.y_max is not None:
            limits = [0, float(self.y_max)]
            eprint(f'Setting limits to: {limits}')
        else:
            limits = [0, 1]
        if expo:
            if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                with open('data/external/all_human_gameplay.json') as f:
                    all_gameplay = json.load(f)
                    frames = []
                    for event, name in [('parents', 'Intermediate'), ('maryland', 'Expert'), ('live', 'National')]:
                        if self.merge_humans:
                            name = 'Human'
                        gameplay = all_gameplay[event]
                        if event != 'live':
                            control_correct_positions = gameplay['control_correct_positions']
                            control_wrong_positions = gameplay['control_wrong_positions']
                            control_positions = control_correct_positions + control_wrong_positions
                            control_positions = np.array(control_positions)
                            control_result = np.array(len(control_correct_positions) * [1] + len(control_wrong_positions) * [0])
                            argsort_control = np.argsort(control_positions)
                            control_x = control_positions[argsort_control]
                            control_sorted_result = control_result[argsort_control]
                            control_y = control_sorted_result.cumsum() / control_sorted_result.shape[0]
                            control_df = pd.DataFrame({'correct': control_y, 'char_percent': control_x})
                            control_df['Dataset'] = 'Regular Test'
                            control_df['Guessing_Model'] = f' {name}'
                            frames.append(control_df)

                        adv_correct_positions = gameplay['adv_correct_positions']
                        adv_wrong_positions = gameplay['adv_wrong_positions']
                        adv_positions = adv_correct_positions + adv_wrong_positions
                        adv_positions = np.array(adv_positions)
                        adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                        argsort_adv = np.argsort(adv_positions)
                        adv_x = adv_positions[argsort_adv]
                        adv_sorted_result = adv_result[argsort_adv]
                        adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                        adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                        adv_df['Dataset'] = 'IR Adversarial'
                        adv_df['Guessing_Model'] = f' {name}'
                        frames.append(adv_df)

                        if len(gameplay['advneural_correct_positions']) > 0:
                            adv_correct_positions = gameplay['advneural_correct_positions']
                            adv_wrong_positions = gameplay['advneural_wrong_positions']
                            adv_positions = adv_correct_positions + adv_wrong_positions
                            adv_positions = np.array(adv_positions)
                            adv_result = np.array(len(adv_correct_positions) * [1] + len(adv_wrong_positions) * [0])
                            argsort_adv = np.argsort(adv_positions)
                            adv_x = adv_positions[argsort_adv]
                            adv_sorted_result = adv_result[argsort_adv]
                            adv_y = adv_sorted_result.cumsum() / adv_sorted_result.shape[0]
                            adv_df = pd.DataFrame({'correct': adv_y, 'char_percent': adv_x})
                            adv_df['Dataset'] = 'RNN Adversarial'
                            adv_df['Guessing_Model'] = f' {name}'
                            frames.append(adv_df)

                    human_df = pd.concat(frames)
                    human_vals = sort_humans(list(human_df['Guessing_Model'].unique()))
                    human_dtype = CategoricalDtype(human_vals, ordered=True)
                    human_df['Guessing_Model'] = human_df['Guessing_Model'].astype(human_dtype)
                    dataset_dtype = CategoricalDtype(['Regular Test', 'IR Adversarial', 'RNN Adversarial'], ordered=True)
                    human_df['Dataset'] = human_df['Dataset'].astype(dataset_dtype)

            if no_models:
                p = ggplot(human_df) + geom_point(shape='.')
            else:
                df = self.char_plot_df
                if 1 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 1 - IR Adversarial']
                if 2 not in self.rounds:
                    df = df[df['Dataset'] != 'Round 2 - IR Adversarial']
                    df = df[df['Dataset'] != 'Round 2 - RNN Adversarial']
                p = ggplot(df)
                if self.save_df is not None:
                    eprint(f'Saving df to: {self.save_df}')
                    df.to_json(self.save_df)

                if os.path.exists('data/external/all_human_gameplay.json') and not self.no_humans:
                    eprint('Loading human data')
                    p = p + geom_line(data=human_df)

            if columns:
                facet_conf = facet_wrap('Guessing_Model', ncol=1)
            else:
                facet_conf = facet_wrap('Guessing_Model', nrow=1)

            if not no_models:
                if self.mvg_avg_char:
                    chart = stat_smooth(method='mavg', se=False, method_args={'window': 400})
                else:
                    chart = stat_summary_bin(fun_data=mean_no_se, bins=20, shape='.', linetype='None', size=0.5)
            else:
                chart = None

            p = (
                p + facet_conf
                + aes(x='char_percent', y='correct', color='Dataset')
            )
            if chart is not None:
                p += chart
            p = (
                p
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + scale_x_continuous(breaks=[0, .5, 1])
                + coord_cartesian(ylim=limits)
                + xlab('Percent of Question Revealed')
                + ylab('Accuracy')
                + theme(
                    #legend_position='top', legend_box_margin=0, legend_title=element_blank(),
                    strip_text_x=element_text(margin={'t': 6, 'b': 6, 'l': 1, 'r': 5})
                )
                + scale_color_manual(values=['#FF3333', '#66CC00', '#3333FF', '#FFFF33'], name='Questions')
            )
            if self.title != '':
                p += ggtitle(self.title)

            return p
        else:
            if self.save_df is not None:
                eprint(f'Saving df to: {self.save_df}')
                df.to_json(self.save_df)
            return (
                ggplot(self.char_plot_df)
                + aes(x='char_percent', y='correct', color='Guessing_Model')
                + stat_smooth(method='mavg', se=False, method_args={'window': 500})
                + scale_y_continuous(breaks=np.linspace(0, 1, 6))
                + coord_cartesian(ylim=limits)
            )