Esempio n. 1
0
def test_csv_to_s3__using_multipart_upload():
    df = pd.DataFrame({'a': ["*" * 5 * 1024**2]})
    with tmpfile('.csv') as fn:
        with s3_bucket('.csv') as b:
            df.to_csv(fn, index=False)
            s3 = into(b, CSV(fn), multipart=True)
            result = into(pd.DataFrame, s3)
    tm.assert_frame_equal(df, result)
Esempio n. 2
0
def test_csv_to_s3_into():
    df = tm.makeMixedDataFrame()
    with tmpfile('.csv') as fn:
        with s3_bucket('.csv') as b:
            df.to_csv(fn, index=False)
            s3 = into(b, CSV(fn))
            result = into(pd.DataFrame, s3)
    tm.assert_frame_equal(df, result)
Esempio n. 3
0
def test_into_sqlite_with_different_sep():
    df = pd.DataFrame([('Alice', 100), ('Bob', 200)],
                      columns=['name', 'amount'])
    with tmpfile('.csv') as fn:
        # TODO: get the  header  argument to work in into(CSV, other)
        df.to_csv(fn, sep='|', header=False, index=False)
        csv = CSV(fn, delimiter='|', has_header=False)

        with tmpfile('.db') as sql:
            db = resource('sqlite:///%s::df' % sql, dshape=discover(csv))
            result = into(db, csv)

            assert into(list, result) == into(list, df)
Esempio n. 4
0
def test_pre_compute_on_multiple_datasets_is_selective():
    from odo import CSV
    from blaze import Data
    from blaze.cached import CachedDataset

    df = pd.DataFrame(
        [[1, 'Alice', 100], [2, 'Bob', -200], [3, 'Charlie', 300],
         [4, 'Denis', 400], [5, 'Edith', -500]],
        columns=['id', 'name', 'amount'])
    iris = CSV(example('iris.csv'))
    dset = CachedDataset({'df': df, 'iris': iris})

    d = Data(dset)
    assert str(compute(d.df.amount)) == str(df.amount)
Esempio n. 5
0
import pandas as pd
import datashape

from odo.backends.sql_csv import append_csv_to_sql_table, copy_command
from odo import resource, into, CSV, discover
from odo.utils import tmpfile, ignoring


def normalize(s):
    return ' '.join(s.strip().split()).lower().replace('_', '')


fn = os.path.abspath('myfile.csv')
escaped_fn = fn.encode('unicode_escape').decode() if os.name == 'nt' else fn

csv = CSV(fn, delimiter=',', has_header=True)
ds = datashape.dshape('var * {name: string, amount: int}')
tbl = resource('sqlite:///:memory:::my_table', dshape=ds)


def test_postgres_load():
    assert normalize(copy_command('postgresql', tbl, csv)) == normalize("""
    COPY my_table from '%s'
        (FORMAT csv,
         DELIMITER E',',
         NULL '',
         QUOTE '"',
         ESCAPE '\\',
         HEADER True,
         ENCODING 'utf-8');
    """ % escaped_fn)