コード例 #1
0
ファイル: preprocess_imdb.py プロジェクト: DAI-Lab/fibber
def download_and_preprocess_imdb():
    """Download and preprocess IMDB sentiment classification dataset. """
    download_raw_and_preprocess(
        dataset_name="imdb",
        download_list=["imdb-raw"],
        preprocess_fn=preprocess_imdb_data,
        preprocess_input_output_list=[
            ("raw/aclImdb/train", "train.json"),
            ("raw/aclImdb/test", "test.json")])
コード例 #2
0
ファイル: preprocess_yelp.py プロジェクト: DAI-Lab/fibber
def download_and_preprocess_yelp():
    """Download and preprocess Yelp dataset."""
    download_raw_and_preprocess(dataset_name="yelp",
                                download_list=["yelp-raw"],
                                preprocess_fn=preprocess_yelp_data,
                                preprocess_input_output_list=[
                                    ("raw/yelp_review_polarity_csv/train.csv",
                                     "train.json"),
                                    ("raw/yelp_review_polarity_csv/test.csv",
                                     "test.json")
                                ])
コード例 #3
0
def download_and_preprocess_snli():
    """Download and preprocess SNLI dataset."""
    download_raw_and_preprocess(
        dataset_name="snli",
        download_list=["snli-raw"],
        preprocess_fn=preprocess_snli_data,
        preprocess_input_output_list=[
            ("raw/snli_1.0/snli_1.0_train.jsonl", "train.json"),
            ("raw/snli_1.0/snli_1.0_dev.jsonl", "dev.json"),
            ("raw/snli_1.0/snli_1.0_test.jsonl", "test.json")
        ])
コード例 #4
0
def download_and_preprocess_mnli():
    """Download and preprocess MNLI dataset."""
    download_raw_and_preprocess(
        dataset_name="mnli",
        download_list=["mnli-raw"],
        preprocess_fn=preprocess_mnli_data,
        preprocess_input_output_list=[
            ("raw/multinli_1.0/multinli_1.0_train.jsonl", "train.json"),
            ("raw/multinli_1.0/multinli_1.0_dev_matched.jsonl",
             "dev_matched.json"),
            ("raw/multinli_1.0/multinli_1.0_dev_mismatched.jsonl",
             "dev_mismatched.json")
        ])
コード例 #5
0
def download_and_preprocess_ag():
    """Download and preprocess AG's news dataset. """
    download_raw_and_preprocess(dataset_name="ag",
                                download_list=["ag-raw-train", "ag-raw-test"],
                                preprocess_fn=preprocess_ag_data,
                                preprocess_input_output_list=[
                                    ("raw/train.csv", "train.json"),
                                    ("raw/test.csv", "test.json")
                                ])

    download_raw_and_preprocess(
        dataset_name="ag_no_title",
        download_list=["ag-raw-train", "ag-raw-test"],
        preprocess_fn=lambda inp, out: preprocess_ag_data(
            inp, out, include_title=False, include_author_media=False),
        preprocess_input_output_list=[("raw/train.csv", "train.json"),
                                      ("raw/test.csv", "test.json")])