Beispiel #1
0
def test_pre_compute_with_projection_projects_on_data_frames():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    result = pre_compute(s[['sepal_length', 'sepal_width']].distinct(),
                         csv, comfortable_memory=10)
    assert set(result.get_chunk().columns) == \
            set(['sepal_length', 'sepal_width'])
Beispiel #2
0
def test_pre_compute_calls_lean_projection():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    result = pre_compute(s.sort('sepal_length').species,
                         csv, comfortable_memory=10)
    assert set(result.get_chunk().columns) == \
            set(['sepal_length', 'species'])
Beispiel #3
0
def test_pre_compute_calls_lean_projection():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    result = pre_compute(s.sort('sepal_length').species,
                         csv, comfortable_memory=10)
    assert set(first(result).columns) == \
            set(['sepal_length', 'species'])
Beispiel #4
0
def test_pre_compute_with_projection_projects_on_data_frames():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    result = pre_compute(s[['sepal_length', 'sepal_width']].distinct(),
                         csv,
                         comfortable_memory=10)
    assert set(first(result).columns) == \
            set(['sepal_length', 'sepal_width'])
Beispiel #5
0
def test_pre_compute_calls_lean_projection():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    result = pre_compute(s.sort('sepal_length').species,
                         csv,
                         comfortable_memory=10)
    assert set(first(result).columns) == \
            set(['sepal_length', 'species'])
Beispiel #6
0
def test_pre_compute_with_head_on_large_csv_yields_iterator():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(
        pre_compute(s.species.head(), csv, comfortable_memory=10), Iterator)
Beispiel #7
0
def test_pre_compute_on_large_csv_gives_chunked_reader():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(pre_compute(s.species, csv, comfortable_memory=10),
                      (chunks(pd.DataFrame), pd.io.parsers.TextFileReader))
Beispiel #8
0
def test_pre_compute_on_small_csv_gives_dataframe():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(pre_compute(s.species, csv), (Series, DataFrame))
Beispiel #9
0
def test_pre_compute_on_small_csv_gives_dataframe():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    assert isinstance(pre_compute(s.species, csv), DataFrame)
Beispiel #10
0
def test_pre_compute_with_head_on_large_csv_yields_iterator():
    csv = CSV(example("iris.csv"))
    s = symbol("s", discover(csv))
    assert isinstance(pre_compute(s.species.head(), csv, comfortable_memory=10), Iterator)
Beispiel #11
0
def test_pre_compute_on_large_csv_gives_chunked_reader():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    assert isinstance(pre_compute(s.species, csv, comfortable_memory=10),
                      pandas.io.parsers.TextFileReader)
Beispiel #12
0
def test_pre_compute_on_large_csv_gives_chunked_reader():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(pre_compute(s.species, csv, comfortable_memory=10),
                      (chunks(pd.DataFrame), pd.io.parsers.TextFileReader))
Beispiel #13
0
def test_pre_compute_on_small_csv_gives_dataframe():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(pre_compute(s.species, csv), (Series, DataFrame))
Beispiel #14
0
def test_pre_compute_calls_lean_projection():
    csv = CSV(example("iris.csv"))
    s = symbol("s", discover(csv))
    result = pre_compute(s.sort("sepal_length").species, csv, comfortable_memory=10)
    assert set(first(result).columns) == set(["sepal_length", "species"])
Beispiel #15
0
def test_pre_compute_with_projection_projects_on_data_frames():
    csv = CSV(example("iris.csv"))
    s = symbol("s", discover(csv))
    result = pre_compute(s[["sepal_length", "sepal_width"]].distinct(), csv, comfortable_memory=10)
    assert set(first(result).columns) == set(["sepal_length", "sepal_width"])
Beispiel #16
0
def test_pre_compute_with_head_on_large_csv_yields_iterator():
    csv = CSV(example('iris.csv'))
    s = symbol('s', csv.dshape)
    assert isinstance(pre_compute(s.species.head(), csv, comfortable_memory=10),
                      Iterator)
Beispiel #17
0
def test_pre_compute_on_large_csv_gives_dask_reader():
    csv = CSV(example('iris.csv'))
    s = symbol('s', discover(csv))
    assert isinstance(pre_compute(s.species, csv, comfortable_memory=10),
                      dask.dataframe.DataFrame)