Beispiel #1
0
def test_csv_to_compressed_csv():
    with tmpfile(".csv") as fn:
        with open(fn, "w") as f:
            f.write("a,1\nb,2\nc,3")
        with tmpfile(".csv.gz") as gfn:
            result = odo(fn, gfn)
            assert odo(result, list) == odo(fn, list)
Beispiel #2
0
def test_csv_to_compressed_csv():
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write('a,1\nb,2\nc,3')
        with tmpfile('.csv.gz') as gfn:
            result = odo(fn, gfn)
            assert odo(result, list) == odo(fn, list)
Beispiel #3
0
def test_resource_gzip():
    with tmpfile('json.gz') as fn:
        assert isinstance(resource(fn), (JSON, JSONLines))
        assert isinstance(resource('json://' + fn), (JSON, JSONLines))
        assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines))

    with tmpfile('jsonlines.gz'):
        assert isinstance(resource('jsonlines://' + fn), (JSON, JSONLines))
Beispiel #4
0
def test_pytables_to_csv():
    ndim = 2
    with tmpfile('.h5') as fn:
        h5file = tb.openFile(fn, mode='w', title="Test Array")
        h5file.createArray('/', "test", np.zeros((ndim, ndim), dtype=float))
        h5file.close()
        with tmpfile('csv') as csv:
            t = odo('pytables://%s::/test' % fn, csv)
            assert odo(t, list) == [(0.0, 0.0), (0.0, 0.0)]
Beispiel #5
0
def test_copy_one_table_to_a_foreign_engine():
    data = [(1, 1), (2, 4), (3, 9)]
    ds = dshape("var * {x: int, y: int}")
    with tmpfile("db") as fn1:
        with tmpfile("db") as fn2:
            src = into("sqlite:///%s::points" % fn1, data, dshape=ds)
            tgt = into("sqlite:///%s::points" % fn2, sa.select([src]), dshape=ds)

            assert into(set, src) == into(set, tgt)
            assert into(set, data) == into(set, tgt)
Beispiel #6
0
def test_resource_on_file():
    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = resource(uri, 'foo', dshape='var * {x: int, y: int}')
        assert isinstance(sql, sa.Table)

    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        sql = resource(uri + '::' + 'foo', dshape='var * {x: int, y: int}')
        assert isinstance(sql, sa.Table)
Beispiel #7
0
def test_resource_on_file():
    with tmpfile(".db") as fn:
        uri = "sqlite:///" + fn
        sql = resource(uri, "foo", dshape="var * {x: int, y: int}")
        assert isinstance(sql, sa.Table)

    with tmpfile(".db") as fn:
        uri = "sqlite:///" + fn
        sql = resource(uri + "::" + "foo", dshape="var * {x: int, y: int}")
        assert isinstance(sql, sa.Table)
Beispiel #8
0
def test_into_sqlite_with_header_and_different_sep():
    df = pd.DataFrame([('Alice', 100), ('Bob', 200)],
                      columns=['name', 'amount'])
    with tmpfile('.csv') as fn:
        csv = into(fn, df, delimiter='|')

        with tmpfile('.db') as sql:
            db = resource('sqlite:///%s::df' % sql, dshape=discover(csv))
            result = into(db, csv)

            assert into(list, result) == into(list, df)
Beispiel #9
0
def test_send_parameterized_query_to_csv():
    with tmpfile('db') as dbfilename:
        with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename:
            t = odo(
                csvfilename,
                'sqlite:///%s::mytable' % dbfilename,
            )
        with tmpfile('.csv') as fn:
            q = t.select(t.c.a == 1)
            r = odo(q, fn)
            assert sorted(odo(q, list)) == sorted(odo(r, list))
Beispiel #10
0
def test_into_sqlite():
    data = [('Alice', 100), ('Bob', 200)]
    ds = datashape.dshape('var * {name: string, amount: int}')

    with tmpfile('.db') as dbpath:
        with tmpfile('.csv') as csvpath:
            csv = into(csvpath, data, dshape=ds, has_header=False)
            sql = resource('sqlite:///%s::mytable' % dbpath, dshape=ds)
            with ignoring(NotImplementedError):
                append_csv_to_sql_table(sql, csv)
                assert into(list, sql) == data
Beispiel #11
0
def test_different_encoding_to_csv():
    with tmpfile('db') as dbfilename:
        with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename:
            t = odo(
                csvfilename,
                'sqlite:///%s::mytable' % dbfilename,
                encoding='latin1'
            )
            with tmpfile('.csv') as fn:
                with pytest.raises(ValueError):
                    odo(t, fn, encoding='latin1')
Beispiel #12
0
def test_sqlite_to_csv(sep, header):
    with tmpfile("db") as dbfilename:
        with filetext("a,b\n1,2\n3,4", extension="csv") as csvfilename:
            t = odo(csvfilename, "sqlite:///%s::mytable" % dbfilename)

        with tmpfile(".csv") as fn:
            odo(t, fn, header=header, delimiter=sep)
            with open(fn, "rt") as f:
                lines = f.readlines()
            expected = [tuple(map(int, row)) for row in map(lambda x: x.split(sep), lines[header:])]
            assert odo(fn, list, delimiter=sep, has_header=header, dshape=discover(t)) == expected
Beispiel #13
0
def test_missing_to_csv():
    data = [dict(a=1, b=2), dict(a=2, c=4)]
    with tmpfile('.json') as fn:
        js = JSON(fn)
        js = odo(data, js)

        with tmpfile('.csv') as csvf:
            csv = odo(js, csvf)
            with open(csv.path, 'rt') as f:
                result = f.read()

    expected = 'a,b,c\n1,2.0,\n2,,4.0\n'
    assert result == expected
Beispiel #14
0
def test_into_sqlite_with_different_sep():
    df = pd.DataFrame([('Alice', 100), ('Bob', 200)],
                      columns=['name', 'amount'])
    with tmpfile('.csv') as fn:
        # TODO: get the  header  argument to work in into(CSV, other)
        df.to_csv(fn, sep='|', header=False, index=False)
        csv = CSV(fn, delimiter='|', has_header=False)

        with tmpfile('.db') as sql:
            db = resource('sqlite:///%s::df' % sql, dshape=discover(csv))
            result = into(db, csv)

            assert into(list, result) == into(list, df)
Beispiel #15
0
def test_resource_shape():
    with tmpfile('.hdf5') as fn:
        r = resource(fn+'::/data', dshape='10 * int')
        assert r.shape == (10,)
        r.file.close()
    with tmpfile('.hdf5') as fn:
        r = resource(fn+'::/data', dshape='10 * 10 * int')
        assert r.shape == (10, 10)
        r.file.close()
    with tmpfile('.hdf5') as fn:
        r = resource(fn+'::/data', dshape='var * 10 * int')
        assert r.shape == (0, 10)
        r.file.close()
Beispiel #16
0
def test_tuples_to_json():
    ds = dshape('var * {a: int, b: int}')
    with tmpfile('json') as fn:
        j = JSON(fn)

        append(j, [(1, 2), (10, 20)], dshape=ds)
        with open(fn) as f:
            assert '"a": 1' in f.read()

    with tmpfile('json') as fn:
        j = JSONLines(fn)

        append(j, [(1, 2), (10, 20)], dshape=ds)
        with open(fn) as f:
            assert '"a": 1' in f.read()
Beispiel #17
0
def test_compound_primary_key_with_single_reference():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    product_sku: string,
                                    name: ?string,
                                    price: ?float64
                                }
                            """, primary_key=['product_no', 'product_sku'])
        # TODO: should this fail everywhere? e.g., this fails in postgres, but
        # not in sqlite because postgres doesn't allow partial foreign keys
        # might be best to let the backend handle this
        ds = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, T],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn, dshape=ds,
                          foreign_keys=dict(product_no=products.c.product_no),
                          primary_key=['order_id'])
        assert discover(orders) == dshape(
            """var * {
                order_id: int32,
                product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}],
                quantity: ?int32
            }
            """
        )
Beispiel #18
0
def test_string_dshape_doc_example():
    x = np.zeros((10, 2))
    with tmpfile('.db') as fn:
        t = odo(
            x, 'sqlite:///%s::x' % fn, dshape='var * {a: float64, b: float64}'
        )
        assert all(row == (0, 0) for row in t.select().execute().fetchall())
Beispiel #19
0
def tbfile():
    with tmpfile('.h5') as filename:
        f = tb.open_file(filename, mode='w')
        d = f.create_table('/', 'title',  x)
        d.close()
        f.close()
        yield filename
Beispiel #20
0
def test_discover_foreign_keys():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    name: ?string,
                                    price: ?float64
                                }
                            """,
                            primary_key=['product_no'])
        expected = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, {
                            product_no: int32,
                            name: ?string,
                            price: ?float64
                          }],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn,
                          dshape=expected,
                          foreign_keys=dict(product_no=products.c.product_no))
        result = discover(orders)
        assert result == expected
Beispiel #21
0
def test_compound_primary_key_with_fkey():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    product_sku: string,
                                    name: ?string,
                                    price: ?float64
                                }
                            """,
                            primary_key=['product_no', 'product_sku'])
        ds = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, T],
                          product_sku: map[int32, U],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn, dshape=ds,
                          primary_key=['order_id'],
                          foreign_keys={
                              'product_no': products.c.product_no,
                              'product_sku': products.c.product_sku
                          })
        assert discover(orders) == dshape(
            """var * {
                order_id: int32,
                product_no: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}],
                product_sku: map[int32, {product_no: int32, product_sku: string, name: ?string, price: ?float64}],
                quantity: ?int32
            }
            """
        )
Beispiel #22
0
def test_na_value(sql, csv):
    sql = odo(null_data, sql)
    with tmpfile('.csv') as fn:
        csv = odo(sql, fn, na_value='NA')
        with open(csv.path, 'rt') as f:
            raw = f.read()
    assert raw == 'a,b\n1,NA\n10,20\n100,200\n'
Beispiel #23
0
def test_sql_to_csv(sql, csv, tmpdir):
    sql, bind = sql
    sql = odo(csv, sql, bind=bind)
    with tmpfile('.csv', dir=tmpdir) as fn:
        csv = odo(sql, fn, bind=bind)
        assert odo(csv, list) == data
        assert discover(csv).measure.names == discover(sql).measure.names
Beispiel #24
0
def test_foreign_keys_auto_construct():
    with tmpfile('db') as fn:
        products = resource('sqlite:///%s::products' % fn,
                            dshape="""
                                var * {
                                    product_no: int32,
                                    name: ?string,
                                    price: ?float64
                                }
                            """,
                            primary_key=['product_no'])
        ds = dshape("""var * {
                          order_id: int32,
                          product_no: map[int32, T],
                          quantity: ?int32
                        }""")
        orders = resource('sqlite:///%s::orders' % fn, dshape=ds,
                          foreign_keys=dict(product_no=products.c.product_no),
                          primary_key=['order_id'])
        assert discover(orders) == dshape("""
            var * {
                order_id: int32,
                product_no: map[int32, {
                                    product_no: int32,
                                    name: ?string,
                                    price: ?float64
                                }],
                quantity: ?int32
            }
        """)
Beispiel #25
0
def test_transaction():
    with tmpfile('.db') as fn:
        rsc = resource('sqlite:///%s::table' % fn, dshape='var * {a: int}')

    data = [(1,), (2,), (3,)]

    conn_1 = rsc.bind.connect()
    conn_2 = rsc.bind.connect()

    trans_1 = conn_1.begin()
    conn_2.begin()

    odo(data, rsc, bind=conn_1)

    # inside the transaction the write should be there
    assert odo(rsc, list, bind=conn_1) == data

    # outside of a transaction or in a different transaction the write is not
    # there
    assert odo(rsc, list) == odo(rsc, list, bind=conn_2) == []

    trans_1.commit()

    # now the data should appear outside the transaction
    assert odo(rsc, list) == odo(rsc, list, bind=conn_2) == data
Beispiel #26
0
def test_foreign_keys_as_compound_primary_key():
    with tmpfile('db') as fn:
        suppliers = resource(
            'sqlite:///%s::suppliers' % fn,
            dshape='var * {id: int64, name: string}',
            primary_key=['id']
        )
        parts = resource(
            'sqlite:///%s::parts' % fn,
            dshape='var * {id: int64, name: string, region: string}',
            primary_key=['id']
        )
        suppart = resource(
            'sqlite:///%s::suppart' % fn,
            dshape='var * {supp_id: map[int64, T], part_id: map[int64, U]}',
            foreign_keys={
                'supp_id': suppliers.c.id,
                'part_id': parts.c.id
            },
            primary_key=['supp_id', 'part_id']
        )
        expected = dshape("""
            var * {
                supp_id: map[int64, {id: int64, name: string}],
                part_id: map[int64, {id: int64, name: string, region: string}]
            }
        """)
        result = discover(suppart)
        assert result == expected
Beispiel #27
0
def test_csv_with_header():
    with tmpfile('db') as dbfilename:
        with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename:
            t = into('sqlite:///%s::mytable' % dbfilename,
                     csvfilename, has_header=True)
            assert discover(t) == dshape('var * {a: int64, b: int64}')
            assert into(set, t) == set([(1, 2), (3, 4)])
Beispiel #28
0
def test_discover_with_dotted_names():
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write('a.b,c.d\n1,2\n3,4')
        dshape = discover(resource(fn))
    assert dshape == datashape.dshape('var * {"a.b": int64, "c.d": int64}')
    assert dshape.measure.names == [u'a.b', u'c.d']
Beispiel #29
0
def test_fixed_shape():
    with tmpfile('.hdf5') as fn:
        df.to_hdf(fn, 'foo')
        r = resource('hdfstore://'+fn+'::/foo')
        assert isinstance(r.shape, list)
        assert discover(r).shape == (len(df),)
        r.parent.close()
Beispiel #30
0
def test_sql_select_to_csv(sql, csv, tmpdir):
    sql, bind = sql
    sql = odo(csv, sql, bind=bind)
    query = sa.select([sql.c.a])
    with tmpfile('.csv', dir=tmpdir) as fn:
        csv = odo(query, fn, bind=bind)
        assert odo(csv, list) == [(x,) for x, _ in data]
Beispiel #31
0
def complex_csv():
    path = os.path.join(os.path.dirname(__file__), 'dummydata.csv')
    with tmpfile('.csv') as fn:
        shutil.copy(path, fn)
        yield CSV(fn, has_header=True)
Beispiel #32
0
def test_load_from_jsonlines(ctx):
    with tmpfile('.json') as fn:
        js = odo(df, 'jsonlines://%s' % fn)
        result = odo(js, ctx, name='r')
        assert (list(map(set, odo(result, list))) ==
                list(map(set, odo(df, list))))
Beispiel #33
0
def test_encoding_is_none():
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write('a,1\nb,2\nc,3'.encode('utf-8').decode('utf-8'))
        assert CSV(fn, encoding=None).encoding == 'utf-8'
Beispiel #34
0
def csv():
    with tmpfile('.csv') as fn:
        create_csv(data, fn)
        yield CSV(fn)
Beispiel #35
0
def fcsv():
    with tmpfile('.csv') as fn:
        create_csv(data_floats, fn)
        yield CSV(fn, columns=list('ab'))
Beispiel #36
0
def test_csv_infer_header():
    with tmpfile('db') as dbfilename:
        with filetext('a,b\n1,2\n3,4', extension='csv') as csvfilename:
            t = odo(csvfilename, 'sqlite:///%s::mytable' % dbfilename)
            assert discover(t) == dshape('var * {a: int64, b: int64}')
            assert odo(t, set) == set([(1, 2), (3, 4)])
Beispiel #37
0
def csv():
    with tmpfile('csv') as filename:
        yield odo(data, filename, dshape=ds, has_header=False)
Beispiel #38
0
def csv(tmpdir):
    s = '\n'.join(','.join(map(str, row)) for row in data).encode('utf8')
    with tmpfile('.csv', dir=tmpdir) as fn:
        with open(fn, 'wb') as f:
            f.write(s)
        yield CSV(fn)
Beispiel #39
0
def tmpcsv():
    with tmpfile('.csv') as fn:
        with open(fn, mode='w') as f:
            df.to_csv(f, index=False)
        yield fn
Beispiel #40
0
def test_ftp_to_local_txt():
    with tmpfile('.txt') as fn:
        txt = odo(ftp_url, fn)
        path = os.path.abspath(txt.path)
        assert os.path.exists(path)
Beispiel #41
0
def test_failed_url():
    failed_url = "http://foo.com/myfile.csv"
    with tmpfile('.csv') as fn:
        odo(failed_url, fn)
Beispiel #42
0
def test_sql_select_to_csv(sql, csv):
    sql = odo(csv, sql)
    query = sa.select([sql.c.a])
    with tmpfile('.csv') as fn:
        csv = odo(query, fn)
        assert odo(csv, list) == [(x, ) for x, _ in data]
Beispiel #43
0
def test_has_header_on_tsv():
    with tmpfile('.csv') as fn:
        with open(fn, 'wb') as f:
            f.write(b'a\tb\n1\t2\n3\t4')
        csv = CSV(fn)
        assert csv.has_header
Beispiel #44
0
def test_url_to_local_csv():
    with tmpfile('.csv') as fn:
        csv = odo(iris_url, fn)
        path = os.path.abspath(csv.path)
        assert os.path.exists(path)
Beispiel #45
0
def test_decimal_conversion():
    data = [(1.0,), (2.0,)]
    with tmpfile('.db') as fn:
        t = odo(data, 'sqlite:///%s::x' % fn, dshape='var * {x: decimal[11, 2]}')
        result = odo(sa.select([sa.func.sum(t.c.x)]), Decimal)
    assert result == sum(Decimal(r[0]) for r in data)
Beispiel #46
0
def test_s3_to_local_csv():
    with tmpfile('.csv') as fn:
        csv = into(fn, tips_uri)
        path = os.path.abspath(csv.path)
        assert os.path.exists(path)
Beispiel #47
0
def complex_csv(tmpdir):
    path = os.path.join(os.path.dirname(__file__), 'dummydata.csv')
    with tmpfile('.csv', dir=tmpdir) as fn:
        shutil.copy(path, fn)
        os.chmod(fn, 0o777)
        yield CSV(fn, has_header=True)
Beispiel #48
0
def test_resource_to_engine():
    with tmpfile('.db') as fn:
        uri = 'sqlite:///' + fn
        r = resource(uri)
        assert isinstance(r, sa.engine.Engine)
        assert r.dialect.name == 'sqlite'
Beispiel #49
0
def test_append_empty_iterator_returns_table():
    with tmpfile('.db') as fn:
        t = resource('sqlite:///%s::x' % fn, dshape='var * {a: int32}')
        assert odo(iter([]), t) is t
Beispiel #50
0
def csv():
    with tmpfile('csv') as filename:
        csv = into(filename, data, dshape=ds, has_header=False)
        yield csv
Beispiel #51
0
def cities(sc):
    with tmpfile('.txt') as fn:
        cities_df.to_csv(fn, header=False, index=False)
        raw = sc.textFile(fn)
        parts = raw.map(lambda line: line.split(','))
        yield parts.map(lambda person: Row(name=person[0], city=person[1]))
Beispiel #52
0
def quoted_sql():
    with tmpfile('.db') as db:
        try:
            yield resource('sqlite:///%s::foo bar' % db, dshape=ds)
        except sa.exc.OperationalError as e:
            pytest.skip(str(e))
Beispiel #53
0
def encoding_csv(tmpdir):
    path = os.path.join(os.path.dirname(__file__), 'encoding.csv')
    with tmpfile('.csv', dir=tmpdir) as fn:
        with open(fn, 'wb') as f, open(path, 'r') as g:
            f.write(g.read().encode('latin1'))
        yield CSV(fn)
Beispiel #54
0
def test_quoted_name(csv, quoted_sql):
    with tmpfile('csv') as filename:
        csv = odo(data, filename, dshape=ds, has_header=True)
        s = odo(csv, quoted_sql)
        t = odo(csv, list)
        assert sorted(odo(s, list)) == sorted(t)
Beispiel #55
0
def json_file(data):
    with tmpfile('.json') as fn:
        with open(fn, 'w') as f:
            json.dump(data, f, default=json_dumps)

        yield fn