예제 #1
0
def test_csv_with_trailing_commas():
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            # note the trailing space in the header
            f.write('a,b,c, \n1, 2, 3, ')
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == ['a', 'b', 'c', '']
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            f.write('a,b,c,\n1, 2, 3, ')  # NO trailing space in the header
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == ['a', 'b', 'c', 'Unnamed: 3']
예제 #2
0
파일: test_csv.py 프로젝트: luizirber/odo
def test_csv():
    with tmpfile('.csv') as fn:
        csv = CSV(fn,
                  dshape='var * {name: string, amount: int}',
                  delimiter=',')

        assert csv.dialect['delimiter'] == ','
예제 #3
0
파일: test_csv.py 프로젝트: user32000/odo
def test_header_mix_str_digits():
    ds = datashape.dshape('''var * {"On- or Off- Budget": ?string,
                                    "1990": ?string}''')
    with filetext('On- or Off- Budget,1990\nOn Budget,-628\nOff budget,"5,962"\n') as fn:
        csv = CSV(fn, has_header=True)
        df = convert(pd.DataFrame, csv)
        assert discover(csv).measure == ds.measure
예제 #4
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_read_supports_read_csv_kwargs():
    with filetext('Alice,1\nBob,2') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_dataframe(csv, dshape=ds, usecols=['name'])
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice',), ('Bob',)]
예제 #5
0
def test_temp_ssh_files():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        scsv = into(Temp(SSH(CSV)), csv, hostname='localhost')
        assert discover(csv) == discover(scsv)

        assert isinstance(scsv, _Temp)
예제 #6
0
파일: test_csv.py 프로젝트: user32000/odo
def test_more_unicode_column_names():
    with filetext(b'foo\xc4\x87,a\n1,2\n3,4', extension='csv',
                  mode='wb') as fn:
        df = into(pd.DataFrame, CSV(fn, has_header=True))
    expected = pd.DataFrame([(1, 2), (3, 4)],
                            columns=[b'foo\xc4\x87'.decode('utf8'), u'a'])
    tm.assert_frame_equal(df, expected)
예제 #7
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_loads_in_datetimes_naively():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: ?string, when: ?datetime}')
        assert discover(csv) == ds

        df = convert(pd.DataFrame, csv)
        assert df.dtypes['when'] == 'M8[ns]'
예제 #8
0
파일: test_csv.py 프로젝트: luizirber/odo
def test_pandas_discover_on_gzipped_files():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02',
                  open=gzip.open,
                  mode='wt',
                  extension='.csv.gz') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: ?string, when: ?datetime}')
        assert discover(csv) == ds
예제 #9
0
파일: test_csv.py 프로젝트: luizirber/odo
def test_unused_datetime_columns():
    ds = datashape.dshape('var * {val: string, when: datetime}')
    with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn:
        csv = CSV(fn, has_header=True)
        assert convert(
            list,
            csv_to_DataFrame(csv, usecols=['val'], squeeze=True,
                             dshape=ds)) == ['a', 'b']
예제 #10
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_read():
    with filetext('Alice,1\nBob,2') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_dataframe(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', 1), ('Bob', 2)]
        assert list(df.columns) == ['name', 'amount']
예제 #11
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_read_supports_datetimes():
    with filetext('Alice,2014-01-02\nBob,2014-01-03') as fn:
        ds = datashape.dshape('var * {name: string, when: date}')
        csv = CSV(fn)
        df = csv_to_dataframe(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'when']
        assert df.dtypes['when'] == 'M8[ns]'
예제 #12
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_read_supports_missing_integers():
    with filetext('Alice,1\nBob,') as fn:
        ds = datashape.dshape('var * {name: string, val: ?int32}')
        csv = CSV(fn)
        df = csv_to_dataframe(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'val']
        assert df.dtypes['val'] == 'f4'
예제 #13
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_writes_header_by_default():
    with tmpfile('.csv') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn)
        append(csv, data, dshape=ds)

        with open(fn) as f:
            assert 'name' in f.read()
예제 #14
0
파일: test_csv.py 프로젝트: user32000/odo
def test_header_disagrees_with_dshape():
    ds = datashape.dshape('var * {name: string, bal: int64}')
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn, header=True)
        assert convert(list, csv) == [('Alice', 100), ('Bob', 200)]

        assert list(convert(pd.DataFrame, csv).columns) == ['name', 'val']
        assert list(convert(pd.DataFrame, csv, dshape=ds).columns) == [
            'name', 'bal']
예제 #15
0
def test_table_resource():
    with tmpfile('csv') as filename:
        ds = dshape('var * {a: int, b: int}')
        csv = CSV(filename)
        append(csv, [[1, 2], [10, 20]], dshape=ds)

        t = data(filename)
        assert isinstance(t.data, CSV)
        assert into(list, compute(t)) == into(list, csv)
예제 #16
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_read_supports_gzip():
    with filetext('Alice,1\nBob,2', open=gzip.open,
                  mode='wt', extension='.csv.gz') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_dataframe(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', 1), ('Bob', 2)]
        assert list(df.columns) == ['name', 'amount']
예제 #17
0
def test_ssh_csv_to_s3_csv():
    # for some reason this can only be run in the same file as other ssh tests
    # and must be a Temp(SSH(CSV)) otherwise tests above this one fail
    s3_bucket = pytest.importorskip('odo.backends.tests.test_aws').s3_bucket

    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        remote = into(Temp(SSH(CSV)), CSV(fn), hostname='localhost')
        with s3_bucket('.csv') as b:
            result = into(b, remote)
            assert discover(result) == discover(resource(b))
예제 #18
0
def dcsv():
    this_dir = os.path.dirname(__file__)
    file_name = os.path.join(this_dir, 'dummydata.csv')
    dshape = """var * {
        Name: string,
        RegistrationDate: date,
        ZipCode: int64,
        Consts: float64
    }"""

    return CSV(file_name, dshape=dshape)
예제 #19
0
파일: test_csv.py 프로젝트: user32000/odo
def test_header_with_quotes():
    csv = CSV(os.path.join(os.path.dirname(__file__), 'encoding.csv'),
              encoding='latin1')
    expected = dshape("""var * {
        D_PROC: ?string,
        NUM_SEQ: int64,
        COD_TIP_RELAC: ?float64,
        COMPL: ?string,
        COD_ASSUNTO: int64
    }
    """)
    assert discover(csv) == expected
예제 #20
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_write_gzip():
    with tmpfile('.csv.gz') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn, has_header=True)
        append(csv, data, dshape=ds)

        f = gzip.open(fn)
        s = f.read()
        assert 'name' in s
        assert 'Alice,1' in s
        f.close()
예제 #21
0
파일: test_csv.py 프로젝트: zhangshoug/odo
def test_string_n_convert(string_dshape):
    data = ['2015-03-13,FOO THE BAR', '2014-01-29,BAZ THE QUUX']
    ds = 'var * {k: date, n: %s}' % string_dshape
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write('\n'.join(data))
        csv = CSV(fn, has_header=False)
        result = odo(csv, pd.DataFrame, dshape=ds)
        assert list(result.columns) == list('kn')
    raw = [tuple(x.split(',')) for x in data]
    expected = pd.DataFrame(raw, columns=list('kn'))
    expected['k'] = pd.to_datetime(expected.k)
    tm.assert_frame_equal(result, expected)
예제 #22
0
def test_convert_through_temporary_local_storage():
    with filetext('name,quantity\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        df = into(pd.DataFrame, csv)
        scsv = into(Temp(SSH(CSV)), csv, hostname='localhost')

        assert into(list, csv) == into(list, scsv)

        scsv2 = into(Temp(SSH(CSV)), df, hostname='localhost')
        assert into(list, scsv2) == into(list, df)

        sjson = into(Temp(SSH(JSONLines)), df, hostname='localhost')
        assert (into(np.ndarray, sjson) == into(np.ndarray, df)).all()
예제 #23
0
파일: test_csv.py 프로젝트: user32000/odo
def test_csv_append():
    with tmpfile('.csv') as fn:
        csv = CSV(fn, has_header=False)

        data = [('Alice', 100), ('Bob', 200)]
        append(csv, data)

        assert list(convert(Iterator, csv)) == data

        with open(fn) as f:
            s = f.read()
        assert 'Alice' in s
        assert '100' in s
예제 #24
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_write():
    with tmpfile('.csv') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        data = [('Alice', 1), ('Bob', 2)]
        csv = CSV(fn, has_header=True)
        append(csv, data, dshape=ds)

        with open(fn) as f:
            assert 'name' in f.read()

        # Doesn't write header twice
        append(csv, data, dshape=ds)
        with open(fn) as f:
            s = f.read()
            assert s.count('name') == 1
예제 #25
0
파일: test_csv.py 프로젝트: user32000/odo
def test_pandas_read_supports_whitespace_strings():
    with filetext('a,b, \n1,2, \n2,3, \n', extension='csv') as fn:
        csv = CSV(fn)
        ds = discover(csv)
        assert ds == datashape.dshape("var * {a: int64, b: int64, '': ?string}")
예제 #26
0
파일: test_csv.py 프로젝트: user32000/odo
def test_multibyte_encoding_dialect(multibyte_csv):
        c = CSV(multibyte_csv, encoding='utf8', sniff_nbytes=10)
        assert c.dialect['delimiter'] == ','
예제 #27
0
파일: test_csv.py 프로젝트: user32000/odo
def test_multibyte_encoding_header(multibyte_csv):
        c = CSV(multibyte_csv, encoding='utf8', sniff_nbytes=3)
        assert c.has_header is None  # not enough data to infer header
예제 #28
0
파일: test_csv.py 프로젝트: user32000/odo
def test_encoding_is_none():
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write('a,1\nb,2\nc,3'.encode('utf-8').decode('utf-8'))
        assert CSV(fn, encoding=None).encoding == 'utf-8'
예제 #29
0
파일: test_csv.py 프로젝트: user32000/odo
def test_has_header_on_tsv():
    with tmpfile('.csv') as fn:
        with open(fn, 'wb') as f:
            f.write(b'a\tb\n1\t2\n3\t4')
        csv = CSV(fn)
        assert csv.has_header
예제 #30
0
파일: test_csv.py 프로젝트: user32000/odo
def test_csv_supports_sep():
    assert CSV('foo.csv', sep=';').dialect['delimiter'] == ';'