Beispiel #1
0
def test_fetch_data_orc(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.orc"))
    cudf.io.orc.to_orc(expected_df, fname)
    config = {"type": "fs", "input_path": fname, "input_format": "orc"}

    reader = FileSystemReader(config)
    fetched_df = reader.fetch_data()

    assert fetched_df.equals(expected_df)
Beispiel #2
0
def test_fetch_data_parquet(tmpdir, expected_df):
    fname = tmpdir.mkdir("tmp_test_fs_reader").join("person.parquet")
    cudf.io.parquet.to_parquet(expected_df, fname)

    config = {"type": "fs", "input_path": fname, "input_format": "parquet"}

    reader = FileSystemReader(config)
    fetched_df = reader.fetch_data()

    assert fetched_df.equals(expected_df)
Beispiel #3
0
def test_fetch_data_orc(test_input_base_path, expected_df):
    test_input_path = "%s/person.orc" % (test_input_base_path)
    config = {
        "type": "fs",
        "input_path": test_input_path,
        "input_format": "orc"
    }

    reader = FileSystemReader(config)
    fetched_df = reader.fetch_data()

    assert fetched_df.equals(expected_df)
Beispiel #4
0
def test_fetch_data_parquet(test_input_base_path, expected_df):
    test_input_path = "%s/person.parquet" % (test_input_base_path)
    config = {
        "type": "fs",
        "input_path": test_input_path,
        "columns": ["firstname", "lastname", "gender"],
        "input_format": "parquet"
    }

    reader = FileSystemReader(config)
    fetched_df = reader.fetch_data()

    assert fetched_df.equals(expected_df)
Beispiel #5
0
def test_fetch_data_json(tmpdir, expected_df):
    fname = str(tmpdir.mkdir("tmp_test_fs_reader").join("person.json"))
    cudf.io.json.to_json(expected_df, fname, orient="records")
    config = {
        "type": "fs",
        "input_path": fname,
        "orient": "records",
        "input_format": "json"
    }

    reader = FileSystemReader(config)
    fetched_df = reader.fetch_data()

    assert fetched_df.equals(expected_df)
Beispiel #6
0
def test_fetch_data_text(test_input_base_path, expected_df):
    test_input_path = "%s/person.csv" % (test_input_base_path)
    config = {
        "type": "fs",
        "input_path": test_input_path,
        "names": ["firstname", "lastname", "gender"],
        "delimiter": ",",
        "usecols": ["firstname", "lastname", "gender"],
        "dtype": ["str", "str", "str"],
        "header": 0,
        "input_format": "text"
    }
    reader = FileSystemReader(config)
    fetched_df = reader.fetch_data()

    assert fetched_df.equals(expected_df)
Beispiel #7
0
def test_fetch_data_csv(tmpdir, expected_df):
    fname = tmpdir.mkdir("tmp_test_fs_reader").join("person.csv")
    expected_df.to_csv(fname, index=False)

    config = {
        "type": "fs",
        "input_path": fname,
        "names": ["firstname", "lastname", "gender"],
        "delimiter": ",",
        "usecols": ["firstname", "lastname", "gender"],
        "dtype": ["str", "str", "str"],
        "header": 0,
        "input_format": "csv"
    }
    reader = FileSystemReader(config)
    fetched_df = reader.fetch_data()

    assert fetched_df.equals(expected_df)
Beispiel #8
0
 def get_reader(self):
     return FileSystemReader(self.config)
Beispiel #9
0
 def get_reader(self):
     """
     Get instance of FileSystemReader
     """
     return FileSystemReader(self.config)