Python filetext 예제들, into.utils.filetext Python 예제들

예제 #1

0

파일 보기

def test_temp_ssh_files():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        scsv = into(Temp(SSH(CSV)), csv, hostname='localhost')
        assert discover(csv) == discover(scsv)

        assert isinstance(scsv, _Temp)

예제 #2

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_pandas_read_supports_read_csv_kwargs():
    with filetext('Alice,1\nBob,2') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds, usecols=['name'])
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice',), ('Bob',)]

예제 #3

0

파일 보기

def test_pandas_discover_on_gzipped_files():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02',
                  open=gzip.open,
                  extension='.csv.gz') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: string, when: datetime}')
        assert discover(csv) == ds

예제 #4

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_pandas_read_supports_read_csv_kwargs():
    with filetext('Alice,1\nBob,2') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds, usecols=['name'])
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', ), ('Bob', )]

예제 #5

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_pandas_loads_in_datetimes_naively():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: string, when: datetime}')
        assert discover(csv) == ds

        df = convert(pd.DataFrame, csv)
        assert df.dtypes['when'] == 'M8[ns]'

예제 #6

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_pandas_read_supports_gzip():
    with filetext('Alice,1\nBob,2', open=gzip.open, extension='.csv.gz') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', 1), ('Bob', 2)]
        assert list(df.columns) == ['name', 'amount']

예제 #7

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_pandas_loads_in_datetimes_naively():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: ?string, when: ?datetime}')
        assert discover(csv) == ds

        df = convert(pd.DataFrame, csv)
        assert df.dtypes['when'] == 'M8[ns]'

예제 #8

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_pandas_read_supports_datetimes():
    with filetext('Alice,2014-01-02\nBob,2014-01-03') as fn:
        ds = datashape.dshape('var * {name: string, when: date}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'when']
        assert df.dtypes['when'] == 'M8[ns]'

예제 #9

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_pandas_read_supports_missing_integers():
    with filetext('Alice,1\nBob,') as fn:
        ds = datashape.dshape('var * {name: string, val: ?int32}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'val']
        assert df.dtypes['val'] == 'f4'

예제 #10

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_unused_datetime_columns():
    ds = datashape.dshape('var * {val: string, when: datetime}')
    with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn:
        csv = CSV(fn, has_header=True)
        assert convert(
            list,
            csv_to_DataFrame(csv, usecols=['val'], squeeze=True,
                             dshape=ds)) == ['a', 'b']

예제 #11

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_pandas_read():
    with filetext('Alice,1\nBob,2') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', 1), ('Bob', 2)]
        assert list(df.columns) == ['name', 'amount']

예제 #12

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_header_disagrees_with_dshape():
    ds = datashape.dshape('var * {name: string, bal: int64}')
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn, header=True)
        assert convert(list, csv) == [('Alice', 100), ('Bob', 200)]

        assert list(convert(pd.DataFrame, csv).columns) == ['name', 'val']
        assert list(convert(pd.DataFrame, csv, dshape=ds).columns) == ['name', 'bal']

예제 #13

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_pandas_read_supports_datetimes():
    with filetext('Alice,2014-01-02\nBob,2014-01-03') as fn:
        ds = datashape.dshape('var * {name: string, when: date}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'when']
        assert df.dtypes['when'] == 'M8[ns]'

예제 #14

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_pandas_read_supports_missing_integers():
    with filetext('Alice,1\nBob,') as fn:
        ds = datashape.dshape('var * {name: string, val: ?int32}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert list(df.columns) == ['name', 'val']
        assert df.dtypes['val'] == 'f4'

예제 #15

0

파일 보기

파일: test_into.py 프로젝트: quasiben/odo

def test_into_double_string():
    with filetext('alice,1\nbob,2', extension='.csv') as source:
        assert into(list, source) == [('alice', 1), ('bob', 2)]

        with tmpfile('.csv') as target:
            csv = into(target, source)
            assert isinstance(csv, CSV)
            with open(target) as f:
                assert 'alice' in f.read()

예제 #16

0

파일 보기

파일: test_into.py 프로젝트: mrocklin/into

def test_into_double_string():
    with filetext('alice,1\nbob,2', extension='.csv') as source:
        assert into(list, source) == [('alice', 1), ('bob', 2)]

        with tmpfile('.csv') as target:
            csv = into(target, source)
            assert isinstance(csv, CSV)
            with open(target) as f:
                assert 'alice' in f.read()

예제 #17

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_header_disagrees_with_dshape():
    ds = datashape.dshape('var * {name: string, bal: int64}')
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn, header=True)
        assert convert(list, csv) == [('Alice', 100), ('Bob', 200)]

        assert list(convert(pd.DataFrame, csv).columns) == ['name', 'val']
        assert list(convert(pd.DataFrame, csv,
                            dshape=ds).columns) == ['name', 'bal']

예제 #18

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_pandas_read_supports_gzip():
    with filetext('Alice,1\nBob,2',
                  open=gzip.open,
                  mode='wt',
                  extension='.csv.gz') as fn:
        ds = datashape.dshape('var * {name: string, amount: int}')
        csv = CSV(fn)
        df = csv_to_DataFrame(csv, dshape=ds)
        assert isinstance(df, pd.DataFrame)
        assert convert(list, df) == [('Alice', 1), ('Bob', 2)]
        assert list(df.columns) == ['name', 'amount']

예제 #19

0

파일 보기

파일: test_ssh.py 프로젝트: mrocklin/into

def test_copy_remote_csv():
    with tmpfile('csv') as target:
        with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
            csv = resource(fn)
            scsv = into('ssh://localhost:foo.csv', csv)
            assert isinstance(scsv, SSH(CSV))
            assert discover(scsv) == discover(csv)


            # Round trip
            csv2 = into(target, scsv)
            assert into(list, csv) == into(list, csv2)

예제 #20

0

파일 보기

def test_copy_remote_csv():
    with tmpfile('csv') as target:
        with filetext('name,balance\nAlice,100\nBob,200',
                      extension='csv') as fn:
            csv = resource(fn)
            scsv = into('ssh://localhost:foo.csv', csv)
            assert isinstance(scsv, SSH(CSV))
            assert discover(scsv) == discover(csv)

            # Round trip
            csv2 = into(target, scsv)
            assert into(list, csv) == into(list, csv2)

예제 #21

0

파일 보기

def test_convert_through_temporary_local_storage():
    with filetext('name,quantity\nAlice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        df = into(pd.DataFrame, csv)
        scsv = into(Temp(SSH(CSV)), csv, hostname='localhost')

        assert into(list, csv) == into(list, scsv)

        scsv2 = into(Temp(SSH(CSV)), df, hostname='localhost')
        assert into(list, scsv2) == into(list, df)

        sjson = into(Temp(SSH(JSONLines)), df, hostname='localhost')
        assert (into(np.ndarray, sjson) == into(np.ndarray, df)).all()

예제 #22

0

파일 보기

def test_drop():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        with tmpfile('csv') as target:
            scsv = SSH(CSV)(target, hostname='localhost')

            assert not os.path.exists(target)

            conn = sftp(**scsv.auth)
            conn.put(fn, target)

            assert os.path.exists(target)

            drop(scsv)

            assert not os.path.exists(target)

예제 #23

0

파일 보기

파일: test_ssh.py 프로젝트: mrocklin/into

def test_drop():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        with tmpfile('csv') as target:
            csv = CSV(fn)
            scsv = SSH(CSV)(target, hostname='localhost')

            assert not os.path.exists(target)

            with sftp(**scsv.auth) as conn:
                conn.put(fn, target)

            assert os.path.exists(target)

            drop(scsv)

            assert not os.path.exists(target)

예제 #24

0

파일 보기

파일: test_ssh.py 프로젝트: mrocklin/into

def test_discover():
    with filetext('name,balance\nAlice,100\nBob,200') as fn:
        local = CSV(fn)
        remote = SSH(CSV)(fn, hostname='localhost')

        assert discover(local) == discover(remote)

예제 #25

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_discover_csv_without_columns():
    with filetext('Alice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        ds = discover(csv)
        assert '100' not in str(ds)

예제 #26

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_unicode_column_names():
    with filetext('foo\xc4\x87,a\n1,2\n3,4', extension='csv') as fn:
        csv = CSV(fn, has_header=True)
        df = into(pd.DataFrame, csv)

예제 #27

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_discover_csv_with_spaces_in_header():
    with filetext(' name,  val\nAlice,100\nBob,200', extension='csv') as fn:
        ds = discover(CSV(fn, has_header=True))
        assert ds.measure.names == ['name', 'val']

예제 #28

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_unused_datetime_columns():
    ds = datashape.dshape('var * {val: string, when: datetime}')
    with filetext("val,when\na,2000-01-01\nb,2000-02-02") as fn:
        csv = CSV(fn, has_header=True)
        assert convert(list, csv_to_DataFrame(csv, usecols=['val'],
            squeeze=True, dshape=ds)) == ['a', 'b']

예제 #29

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_csv_missing_values():
    with filetext('name,val\nAlice,100\nNA,200', extension='csv') as fn:
        csv = CSV(fn)
        assert discover(csv).measure.dict['name'] == Option(string)

예제 #30

0

파일 보기

파일: test_into.py 프로젝트: mrocklin/into

def test_into_string_on_right():
    with filetext('alice,1\nbob,2', extension='.csv') as source:
        assert into([], source) == [('alice', 1), ('bob', 2)]

예제 #31

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_empty_dataframe():
    with filetext('name,val', extension='csv') as fn:
        csv = CSV(fn, has_header=True)
        df = convert(pd.DataFrame, csv)
        assert isinstance(df, pd.DataFrame)

예제 #32

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_header_argument_set_with_or_without_header():
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        assert into(list, fn) == [('Alice', 100), ('Bob', 200)]

    with filetext('Alice,100\nBob,200', extension='csv') as fn:
        assert into(list, fn) == [('Alice', 100), ('Bob', 200)]

예제 #33

0

파일 보기

파일: test_text.py 프로젝트: quasiben/odo

def test_drop():
    with filetext('hello\nworld') as fn:
        t = TextFile(fn)
        assert os.path.exists(fn)
        drop(t)
        assert not os.path.exists(fn)

예제 #34

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_discover_csv_files_without_header():
    with filetext('Alice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=False)
        df = convert(pd.DataFrame, csv)
        assert len(df) == 2
        assert 'Alice' not in list(df.columns)

예제 #35

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_pandas_discover_on_gzipped_files():
    with filetext('name,when\nAlice,2014-01-01\nBob,2014-02-02',
            open=gzip.open, extension='.csv.gz') as fn:
        csv = CSV(fn, has_header=True)
        ds = datashape.dshape('var * {name: string, when: datetime}')
        assert discover(csv) == ds

예제 #36

0

파일 보기

파일: test_ssh.py 프로젝트: mrocklin/into

def test_discover_from_resource():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        local = CSV(fn)
        remote = resource('ssh://localhost:' + fn)

        assert discover(local) == discover(remote)

예제 #37

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_header_argument_set_with_or_without_header():
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        assert into(list, fn) == [('Alice', 100), ('Bob', 200)]

    with filetext('Alice,100\nBob,200', extension='csv') as fn:
        assert into(list, fn) == [('Alice', 100), ('Bob', 200)]

예제 #38

0

파일 보기

파일: test_into.py 프로젝트: quasiben/odo

def test_into_string_on_right():
    with filetext('alice,1\nbob,2', extension='.csv') as source:
        assert into([], source) == [('alice', 1), ('bob', 2)]

예제 #39

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_discover_csv_with_spaces_in_header():
    with filetext(' name,  val\nAlice,100\nBob,200', extension='csv') as fn:
        ds = discover(CSV(fn, has_header=True))
        assert ds.measure.names == ['name', 'val']

예제 #40

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_csv_into_list():
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        L = into(list, fn)
        assert L == [('Alice', 100), ('Bob', 200)]

예제 #41

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_raise_errors_quickly_on_into_chunks_dataframe():
    with filetext('name,val\nAlice,100\nBob,foo', extension='csv') as fn:
        ds = datashape.dshape('var * {name: string, val: int}')
        csv = CSV(fn, header=True)
        assert raises(Exception,
                      lambda: CSV_to_chunks_of_dataframes(csv, dshape=ds))

예제 #42

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_discover_csv_yields_string_on_totally_empty_columns():
    expected = dshape('var * {a: int64, b: string, c: int64}')
    with filetext('a,b,c\n1,,3\n4,,6\n7,,9') as fn:
        csv = CSV(fn, has_header=True)
        assert discover(csv) == expected

예제 #43

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_empty_dataframe():
    with filetext('name,val', extension='csv') as fn:
        csv = CSV(fn, has_header=True)
        df = convert(pd.DataFrame, csv)
        assert isinstance(df, pd.DataFrame)

예제 #44

0

파일 보기

def test_discover():
    with filetext('name,balance\nAlice,100\nBob,200') as fn:
        local = CSV(fn)
        remote = SSH(CSV)(fn, hostname='localhost')

        assert discover(local) == discover(remote)

예제 #45

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_csv_separator_header():
    with filetext('a|b|c\n1|2|3\n4|5|6', extension='csv') as fn:
        csv = CSV(fn, delimiter='|', has_header=True)
        assert convert(list, csv) == [(1, 2, 3), (4, 5, 6)]

예제 #46

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_discover_csv_files_without_header():
    with filetext('Alice,2014-01-01\nBob,2014-02-02') as fn:
        csv = CSV(fn, has_header=False)
        df = convert(pd.DataFrame, csv)
        assert len(df) == 2
        assert 'Alice' not in list(df.columns)

예제 #47

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_raise_errors_quickly_on_into_chunks_dataframe():
    with filetext('name,val\nAlice,100\nBob,foo', extension='csv') as fn:
        ds = datashape.dshape('var * {name: string, val: int}')
        csv = CSV(fn, header=True)
        assert raises(Exception,
                lambda: CSV_to_chunks_of_dataframes(csv, dshape=ds))

예제 #48

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_discover_csv_yields_string_on_totally_empty_columns():
    expected = dshape('var * {a: int64, b: ?string, c: int64}')
    with filetext('a,b,c\n1,,3\n4,,6\n7,,9') as fn:
        csv = CSV(fn, has_header=True)
        assert discover(csv) == expected

예제 #49

0

파일 보기

파일: test_csv.py 프로젝트: debugger22/into

def test_csv_into_list():
    with filetext('name,val\nAlice,100\nBob,200', extension='csv') as fn:
        L = into(list, fn)
        assert L == [('Alice', 100), ('Bob', 200)]

예제 #50

0

파일 보기

def test_discover_from_resource():
    with filetext('name,balance\nAlice,100\nBob,200', extension='csv') as fn:
        local = CSV(fn)
        remote = resource('ssh://localhost:' + fn)

        assert discover(local) == discover(remote)

예제 #51

0

파일 보기

파일: test_text.py 프로젝트: quasiben/odo

def test_convert():
    with filetext('Hello\nWorld') as fn:
        assert convert(list, TextFile(fn)) == ['Hello\n', 'World']

예제 #52

0

파일 보기

파일: test_csv.py 프로젝트: jreback/into

def test_discover_csv_without_columns():
    with filetext('Alice,100\nBob,200', extension='csv') as fn:
        csv = CSV(fn)
        ds = discover(csv)
        assert '100' not in str(ds)