Example #1
0
args = parser.parse_args()
if "templates.yaml" not in args.dataset_path:
    exit()

path = args.dataset_path.split("/")

if path[2] in INCLUDED_USERS:
    print("Skipping showing templates for community dataset.")
else:
    dataset_name = path[2]
    subset_name = path[3] if len(path) == 5 else ""

    template_collection = TemplateCollection()

    dataset = get_dataset(dataset_name, subset_name)
    splits = list(dataset.keys())

    dataset_templates = template_collection.get_dataset(
        dataset_name, subset_name)
    template_list = dataset_templates.all_template_names

    width = 80
    print("DATASET ", args.dataset_path)

    # First show all the templates.
    for template_name in template_list:
        template = dataset_templates[template_name]
        print("TEMPLATE")
        print("NAME:", template_name)
        print("Is Original Task: ", template.metadata.original_task)
Example #2
0
    if dataset_key is not None:

        #
        # Check for subconfigurations (i.e. subsets)
        #
        configs = get_dataset_confs(dataset_key)
        conf_option = None
        if len(configs) > 0:
            conf_option = st.sidebar.selectbox("Subset",
                                               configs,
                                               index=0,
                                               format_func=lambda a: a.name)

        subset_name = str(conf_option.name) if conf_option else None
        try:
            dataset = get_dataset(dataset_key, subset_name)
        except OSError as e:
            st.error(
                f"Some datasets are not handled automatically by `datasets` and require users to download the "
                f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
                f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
                f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment "
                f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`"
                f"\n\nOriginal error:\n{str(e)}")
            st.stop()

        splits = list(dataset.keys())
        index = 0
        if "train" in splits:
            index = splits.index("train")
        split = st.sidebar.selectbox("Split",
Example #3
0
def run_app():
    #
    # Loads session state
    #
    state = _get_state()

    def reset_template_state():
        state.template_name = None
        state.jinja = None
        state.reference = None

    #
    # Initial page setup
    #
    st.set_page_config(page_title="Promptsource", layout="wide")
    st.sidebar.markdown(
        "<center><a href='https://github.com/bigscience-workshop/promptsource'>💻Github - Promptsource\n\n</a></center>",
        unsafe_allow_html=True,
    )
    mode = st.sidebar.selectbox(
        label="Choose a mode",
        options=select_options,
        index=0,
        key="mode_select",
    )
    st.sidebar.title(f"{side_bar_title_prefix} 🌸 - {mode}")

    #
    # Adds pygments styles to the page.
    #
    st.markdown("<style>" +
                HtmlFormatter(style="friendly").get_style_defs(".highlight") +
                "</style>",
                unsafe_allow_html=True)

    WIDTH = 140

    def show_jinja(t, width=WIDTH):
        def replace_linebreaks(t):
            """
            st.write does not handle double breaklines very well. When it encounters `\n\n`, it exit the curent <div> block.
            Explicitely replacing all `\n` with their html equivalent to bypass this issue.
            Also stripping the trailing `\n` first.
            """
            return t.strip("\n").replace("\n", "<br/>")

        wrap = textwrap.fill(t, width=width, replace_whitespace=False)
        out = highlight(wrap, DjangoLexer(), HtmlFormatter())
        out = replace_linebreaks(out)
        st.write(out, unsafe_allow_html=True)

    def show_text(t, width=WIDTH, with_markdown=False):
        wrap = [
            textwrap.fill(subt, width=width, replace_whitespace=False)
            for subt in t.split("\n")
        ]
        wrap = "\n".join(wrap)
        if with_markdown:
            st.write(wrap, unsafe_allow_html=True)
        else:
            st.text(wrap)

    if mode == "Helicopter view":
        st.title("High level metrics")
        st.write("This will take a minute to collect.")
        st.write(
            "If you want to contribute, please refer to the instructions in " +
            "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)."
        )

        #
        # Loads template data
        #
        try:
            template_collection = TemplateCollection()
        except FileNotFoundError:
            st.error(
                "Unable to find the prompt folder!\n\n"
                "We expect the folder to be in the working directory. "
                "You might need to restart the app in the root directory of the repo."
            )
            st.stop()

        #
        # Global metrics
        #
        counts = template_collection.get_templates_count()
        nb_prompted_datasets = len(counts)
        st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`")
        nb_prompts = sum(counts.values())
        st.write(f"## Number of *prompts*: `{nb_prompts}`")

        #
        # Metrics per dataset/subset
        #
        # Download dataset infos (multiprocessing download)
        manager = Manager()
        all_infos = manager.dict()
        all_datasets = list(set([t[0] for t in template_collection.keys]))

        pool = Pool(processes=multiprocessing.cpu_count())
        pool.map(functools.partial(get_infos, all_infos), all_datasets)
        pool.close()
        pool.join()

        results = []
        for (dataset_name, subset_name) in template_collection.keys:
            # Collect split sizes (train, validation and test)
            if dataset_name not in all_infos:
                infos = get_dataset_infos(dataset_name)
                all_infos[dataset_name] = infos
            else:
                infos = all_infos[dataset_name]
            if infos:
                if subset_name is None:
                    subset_infos = infos[list(infos.keys())[0]]
                else:
                    subset_infos = infos[subset_name]

                split_sizes = {
                    k: v.num_examples
                    for k, v in subset_infos.splits.items()
                }
            else:
                # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
                # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
                # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
                split_sizes = {}

            # Collect template counts, original task counts and names
            dataset_templates = template_collection.get_dataset(
                dataset_name, subset_name)
            results.append({
                "Dataset name":
                dataset_name,
                "Subset name":
                "∅" if subset_name is None else subset_name,
                "Train size":
                split_sizes["train"] if "train" in split_sizes else 0,
                "Validation size":
                split_sizes["validation"]
                if "validation" in split_sizes else 0,
                "Test size":
                split_sizes["test"] if "test" in split_sizes else 0,
                "Number of prompts":
                len(dataset_templates),
                "Number of original task prompts":
                sum([
                    bool(t.metadata.original_task)
                    for t in dataset_templates.templates.values()
                ]),
                "Prompt names":
                [t.name for t in dataset_templates.templates.values()],
            })
        results_df = pd.DataFrame(results)
        results_df.sort_values(["Number of prompts"],
                               inplace=True,
                               ascending=False)
        results_df.reset_index(drop=True, inplace=True)

        nb_training_instances = results_df["Train size"].sum()
        st.write(
            f"## Number of *training instances*: `{nb_training_instances}`")

        plot_df = results_df[[
            "Dataset name", "Subset name", "Train size", "Number of prompts"
        ]].copy()
        plot_df[
            "Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"]
        plot_df.sort_values(["Train size"], inplace=True, ascending=False)
        fig = px.bar(
            plot_df,
            x="Name",
            y="Train size",
            hover_data=["Dataset name", "Subset name", "Number of prompts"],
            log_y=True,
            title=
            "Number of training instances per data(sub)set - y-axis is in logscale",
        )
        fig.update_xaxes(visible=False, showticklabels=False)
        st.plotly_chart(fig, use_container_width=True)
        st.write(
            f"- Top 3 training subsets account for `{100 * plot_df[:3]['Train size'].sum() / nb_training_instances:.2f}%` of the training instances."
        )
        biggest_training_subset = plot_df.iloc[0]
        st.write(
            f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances"
        )
        smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1]
        st.write(
            f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances"
        )

        st.markdown("***")
        st.write("Details per dataset")
        st.table(results_df)

    else:
        # Combining mode `Prompted dataset viewer` and `Sourcing` since the
        # backbone of the interfaces is the same
        assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError(
            f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`"
        )

        #
        # Loads dataset information
        #

        dataset_list = list_datasets()
        ag_news_index = dataset_list.index("ag_news")

        #
        # Select a dataset - starts with ag_news
        #
        dataset_key = st.sidebar.selectbox(
            "Dataset",
            dataset_list,
            key="dataset_select",
            index=ag_news_index,
            help="Select the dataset to work on.",
        )

        #
        # If a particular dataset is selected, loads dataset and template information
        #
        if dataset_key is not None:

            #
            # Check for subconfigurations (i.e. subsets)
            #
            configs = get_dataset_confs(dataset_key)
            conf_option = None
            if len(configs) > 0:
                conf_option = st.sidebar.selectbox(
                    "Subset", configs, index=0, format_func=lambda a: a.name)

            subset_name = str(conf_option.name) if conf_option else None
            try:
                dataset = get_dataset(dataset_key, subset_name)
            except OSError as e:
                st.error(
                    f"Some datasets are not handled automatically by `datasets` and require users to download the "
                    f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
                    f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
                    f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment "
                    f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`"
                    f"\n\nOriginal error:\n{str(e)}")
                st.stop()

            splits = list(dataset.keys())
            index = 0
            if "train" in splits:
                index = splits.index("train")
            split = st.sidebar.selectbox("Split",
                                         splits,
                                         key="split_select",
                                         index=index)
            dataset = dataset[split]
            dataset = renameDatasetColumn(dataset)

            #
            # Loads template data
            #
            try:
                dataset_templates = DatasetTemplates(
                    dataset_key, conf_option.name if conf_option else None)
            except FileNotFoundError:
                st.error(
                    "Unable to find the prompt folder!\n\n"
                    "We expect the folder to be in the working directory. "
                    "You might need to restart the app in the root directory of the repo."
                )
                st.stop()

            template_list = dataset_templates.all_template_names
            num_templates = len(template_list)
            st.sidebar.write(
                "No of prompts created for " +
                f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`"
                + f": **{str(num_templates)}**")

            if mode == "Prompted dataset viewer":
                if num_templates > 0:
                    template_name = st.sidebar.selectbox(
                        "Prompt name",
                        template_list,
                        key="template_select",
                        index=0,
                        help="Select the prompt to visualize.",
                    )

                step = 50
                example_index = st.sidebar.number_input(
                    f"Select the example index (Size = {len(dataset)})",
                    min_value=0,
                    max_value=len(dataset) - step,
                    value=0,
                    step=step,
                    key="example_index_number_input",
                    help="Offset = 50.",
                )
            else:  # mode = Sourcing
                st.sidebar.subheader("Select Example")
                example_index = st.sidebar.slider("Select the example index",
                                                  0,
                                                  len(dataset) - 1)

                example = dataset[example_index]
                example = removeHyphen(example)

                st.sidebar.write(example)

            st.sidebar.subheader("Dataset Schema")
            rendered_features = render_features(dataset.features)
            st.sidebar.write(rendered_features)

            #
            # Display dataset information
            #
            st.header("Dataset: " + dataset_key + " " +
                      (("/ " + conf_option.name) if conf_option else ""))

            # If we have a custom dataset change the source link to the hub
            split_dataset_key = dataset_key.split("/")
            possible_user = split_dataset_key[0]
            if len(split_dataset_key) > 1 and possible_user in INCLUDED_USERS:
                source_link = "https://huggingface.co/datasets/%s/blob/main/%s.py" % (
                    dataset_key,
                    split_dataset_key[-1],
                )
            else:
                source_link = "https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py" % (
                    dataset_key,
                    dataset_key,
                )

            st.markdown("*Homepage*: " + dataset.info.homepage +
                        "\n\n*Dataset*: " + source_link)

            md = """
            %s
            """ % (dataset.info.description.replace("\\", "")
                   if dataset_key else "")
            st.markdown(md)

            #
            # Body of the app: display prompted examples in mode `Prompted dataset viewer`
            # or text boxes to create new prompts in mode `Sourcing`
            #
            if mode == "Prompted dataset viewer":
                #
                # Display template information
                #
                if num_templates > 0:
                    template = dataset_templates[template_name]
                    st.subheader("Prompt")
                    st.markdown("##### Name")
                    st.text(template.name)
                    st.markdown("##### Reference")
                    st.text(template.reference)
                    st.markdown("##### Original Task? ")
                    st.text(template.metadata.original_task)
                    st.markdown("##### Choices in template? ")
                    st.text(template.metadata.choices_in_prompt)
                    st.markdown("##### Metrics")
                    st.text(", ".join(template.metadata.metrics) if template.
                            metadata.metrics else None)
                    st.markdown("##### Answer Choices")
                    if template.get_answer_choices_expr() is not None:
                        show_jinja(template.get_answer_choices_expr())
                    else:
                        st.text(None)
                    st.markdown("##### Jinja template")
                    splitted_template = template.jinja.split("|||")
                    st.markdown("###### Input template")
                    show_jinja(splitted_template[0].strip())
                    if len(splitted_template) > 1:
                        st.markdown("###### Target template")
                        show_jinja(splitted_template[1].strip())
                    st.markdown("***")

                #
                # Display a couple (steps) examples
                #
                for ex_idx in range(example_index, example_index + step):
                    if ex_idx >= len(dataset):
                        continue
                    example = dataset[ex_idx]
                    example = removeHyphen(example)
                    col1, _, col2 = st.beta_columns([12, 1, 12])
                    with col1:
                        st.write(example)
                    if num_templates > 0:
                        with col2:
                            prompt = template.apply(example,
                                                    highlight_variables=False)
                            if prompt == [""]:
                                st.write("∅∅∅ *Blank result*")
                            else:
                                st.write("Input")
                                show_text(prompt[0])
                                if len(prompt) > 1:
                                    st.write("Target")
                                    show_text(prompt[1])
                    st.markdown("***")
            else:  # mode = Sourcing
                st.markdown("## Prompt Creator")

                #
                # Create a new template or select an existing one
                #
                col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6])

                # current_templates_key and state.templates_key are keys for the templates object
                current_templates_key = (dataset_key, conf_option.name
                                         if conf_option else None)

                # Resets state if there has been a change in templates_key
                if state.templates_key != current_templates_key:
                    state.templates_key = current_templates_key
                    reset_template_state()

                with col1a, st.form("new_template_form"):
                    new_template_name = st.text_input(
                        "Create a New Prompt",
                        key="new_template",
                        value="",
                        help="Enter name and hit enter to create a new prompt.",
                    )
                    new_template_submitted = st.form_submit_button("Create")
                    if new_template_submitted:
                        if new_template_name in dataset_templates.all_template_names:
                            st.error(
                                f"A prompt with the name {new_template_name} already exists "
                                f"for dataset {state.templates_key}.")
                        elif new_template_name == "":
                            st.error("Need to provide a prompt name.")
                        else:
                            template = Template(new_template_name, "", "")
                            dataset_templates.add_template(template)
                            reset_template_state()
                            state.template_name = new_template_name
                    else:
                        state.new_template_name = None

                with col1b, st.beta_expander("or Select Prompt",
                                             expanded=True):
                    template_list = dataset_templates.all_template_names
                    if state.template_name:
                        index = template_list.index(state.template_name)
                    else:
                        index = 0
                    state.template_name = st.selectbox(
                        "",
                        template_list,
                        key="template_select",
                        index=index,
                        help="Select the prompt to work on.")

                    if st.button("Delete Prompt", key="delete_prompt"):
                        dataset_templates.remove_template(state.template_name)
                        reset_template_state()

                variety_guideline = """
                :heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged.
                Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation.
                \r**To get various prompts, you can try moving the cursor along theses axes**:
                \n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input.
                \n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end?
                \n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive.
                """

                col1, _, _ = st.beta_columns([18, 1, 6])
                with col1:
                    if state.template_name is not None:
                        show_text(variety_guideline, with_markdown=True)

                #
                # Edit the created or selected template
                #
                col1, _, col2 = st.beta_columns([18, 1, 6])
                with col1:
                    if state.template_name is not None:
                        template = dataset_templates[state.template_name]
                        #
                        # If template is selected, displays template editor
                        #
                        with st.form("edit_template_form"):
                            updated_template_name = st.text_input(
                                "Name", value=template.name)
                            state.reference = st.text_input(
                                "Prompt Reference",
                                help=
                                "Short description of the prompt and/or paper reference for the prompt.",
                                value=template.reference,
                            )

                            # Metadata
                            state.metadata = template.metadata
                            state.metadata.original_task = st.checkbox(
                                "Original Task?",
                                value=template.metadata.original_task,
                                help=
                                "Prompt asks model to perform the original task designed for this dataset.",
                            )
                            state.metadata.choices_in_prompt = st.checkbox(
                                "Choices in Template?",
                                value=template.metadata.choices_in_prompt,
                                help=
                                "Prompt explicitly lists choices in the template for the output.",
                            )

                            # Metrics from here:
                            # https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py
                            metrics_choices = [
                                "BLEU",
                                "ROUGE",
                                "Squad",
                                "Trivia QA",
                                "Accuracy",
                                "Pearson Correlation",
                                "Spearman Correlation",
                                "MultiRC",
                                "AUC",
                                "COQA F1",
                                "Edit Distance",
                            ]
                            # Add mean reciprocal rank
                            metrics_choices.append("Mean Reciprocal Rank")
                            # Add generic other
                            metrics_choices.append("Other")
                            # Sort alphabetically
                            metrics_choices = sorted(metrics_choices)
                            state.metadata.metrics = st.multiselect(
                                "Metrics",
                                metrics_choices,
                                default=template.metadata.metrics,
                                help=
                                "Select all metrics that are commonly used (or should "
                                "be used if a new task) to evaluate this prompt.",
                            )

                            # Answer choices
                            if template.get_answer_choices_expr() is not None:
                                answer_choices = template.get_answer_choices_expr(
                                )
                            else:
                                answer_choices = ""
                            state.answer_choices = st.text_input(
                                "Answer Choices",
                                value=answer_choices,
                                help=
                                "A Jinja expression for computing answer choices. "
                                "Separate choices with a triple bar (|||).",
                            )

                            # Jinja
                            state.jinja = st.text_area("Template",
                                                       height=40,
                                                       value=template.jinja)

                            # Submit form
                            if st.form_submit_button("Save"):
                                if (updated_template_name
                                        in dataset_templates.all_template_names
                                        and updated_template_name !=
                                        state.template_name):
                                    st.error(
                                        f"A prompt with the name {updated_template_name} already exists "
                                        f"for dataset {state.templates_key}.")
                                elif updated_template_name == "":
                                    st.error("Need to provide a prompt name.")
                                else:
                                    # Parses state.answer_choices
                                    if state.answer_choices == "":
                                        updated_answer_choices = None
                                    else:
                                        updated_answer_choices = state.answer_choices

                                    dataset_templates.update_template(
                                        state.template_name,
                                        updated_template_name,
                                        state.jinja,
                                        state.reference,
                                        state.metadata,
                                        updated_answer_choices,
                                    )
                                    # Update the state as well
                                    state.template_name = updated_template_name
                #
                # Displays template output on current example if a template is selected
                # (in second column)
                #
                with col2:
                    if state.template_name is not None:
                        st.empty()
                        template = dataset_templates[state.template_name]
                        prompt = template.apply(example)
                        if prompt == [""]:
                            st.write("∅∅∅ *Blank result*")
                        else:
                            st.write("Input")
                            show_text(prompt[0], width=40)
                            if len(prompt) > 1:
                                st.write("Target")
                                show_text(prompt[1], width=40)

    #
    # Must sync state at end
    #
    state.sync()