Exemple #1
0
def download_data_and_write_config(task_name: str, task_data_path: str,
                                   task_config_path: str):
    hf_datasets_conversion_metadata = HF_DATASETS_CONVERSION_DICT[task_name]
    examples_dict = download_utils.convert_hf_dataset_to_examples(
        path=hf_datasets_conversion_metadata["path"],
        name=hf_datasets_conversion_metadata.get("name"),
        field_map=hf_datasets_conversion_metadata.get("field_map"),
        label_map=hf_datasets_conversion_metadata.get("label_map"),
        phase_map=hf_datasets_conversion_metadata.get("phase_map",
                                                      DEFAULT_PHASE_MAP),
        phase_list=hf_datasets_conversion_metadata.get("phase_list"),
    )
    paths_dict = download_utils.write_examples_to_jsonls(
        examples_dict=examples_dict,
        task_data_path=task_data_path,
    )
    jiant_task_name = hf_datasets_conversion_metadata.get(
        "jiant_task_name", task_name)
    py_io.write_json(
        data={
            "task": jiant_task_name,
            "paths": paths_dict,
            "name": task_name
        },
        path=task_config_path,
    )
Exemple #2
0
def download_data_and_write_config(task_name: str,
                                   task_data_path: str,
                                   task_config_path: str,
                                   n_fold: int = None,
                                   fold: int = None):
    examples_dict = build_examples(task_name, n_fold=n_fold, fold=fold)
    paths_dict = download_utils.write_examples_to_jsonls(
        examples_dict=examples_dict, task_data_path=task_data_path,
        skip_if_exists=True
    )
    jiant_task_name = HF_DATASETS_CONVERSION_DICT[task_name].get("jiant_task_name", task_name)
    cv_jiant_task_name = build_cv_task_name(jiant_task_name, n_fold, fold)

    cv_task_name = build_cv_task_name(task_name, n_fold, fold)
    py_io.write_json(
        data={"task": cv_jiant_task_name, "paths": paths_dict, "name": cv_task_name},
        path=task_config_path,
        skip_if_exists=True,
    )