コード例 #1
0
ファイル: test_sql.py プロジェクト: wikiped/dask
def test_npartitions(db):
    data = read_sql_table('test', db, columns=list(df.columns), npartitions=2,
                          index_col='number')
    assert len(data.divisions) == 3
    assert (data.name.compute() == df.name).all()
    data = read_sql_table('test', db, columns=['name'], npartitions=6,
                          index_col="number")
    assert_eq(data, df[['name']])
コード例 #2
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_division_or_partition(db):
    with pytest.raises(TypeError):
        read_sql_table('test', db, columns=['name'], index_col="number",
                       divisions=[0, 2, 4], npartitions=3)

    out = read_sql_table('test', db, index_col="number", bytes_per_chunk=100)
    m = out.map_partitions(lambda d: d.memory_usage(
        deep=True, index=True).sum()).compute()
    assert (50 < m).all() and (m < 200).all()
    assert_eq(out, df)
コード例 #3
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_no_nameless_index(db):
    from sqlalchemy import sql
    index = (-sql.column('negish'))
    with pytest.raises(ValueError):
        read_sql_table('test', db, npartitions=2, index_col=index,
                       columns=['negish', 'age', index])

    index = sql.func.abs(sql.column('negish'))

    # function for the index, get all columns
    with pytest.raises(ValueError):
        read_sql_table('test', db, npartitions=2, index_col=index)
コード例 #4
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_extra_connection_engine_keywords(capsys, db):
    data = read_sql_table('test', db, npartitions=2, index_col='number', engine_kwargs={'echo': False}
                          ).compute()
    # no captured message from the stdout with the echo=False parameter (this is the default)
    out, err = capsys.readouterr()
    assert "SELECT" not in out
    assert_eq(data, df)
    # with the echo=True sqlalchemy parameter, you should get all SQL queries in the stdout
    data = read_sql_table('test', db, npartitions=2, index_col='number', engine_kwargs={'echo': True}
                          ).compute()
    out, err = capsys.readouterr()
    assert "SELECT" in out
    assert_eq(data, df)
コード例 #5
0
def test_npartitions(db):
    data = read_sql_table('test',
                          db,
                          columns=list(df.columns),
                          npartitions=2,
                          index_col='number')
    assert len(data.divisions) == 3
    assert (data.name.compute() == df.name).all()
    data = read_sql_table('test',
                          db,
                          columns=['name'],
                          npartitions=6,
                          index_col="number")
    assert_eq(data, df[['name']])
コード例 #6
0
def test_division_or_partition(db):
    with pytest.raises(TypeError):
        read_sql_table('test',
                       db,
                       columns=['name'],
                       index_col="number",
                       divisions=[0, 2, 4],
                       npartitions=3)

    out = read_sql_table('test', db, index_col="number", bytes_per_chunk=100)
    m = out.map_partitions(
        lambda d: d.memory_usage(deep=True, index=True).sum()).compute()
    assert (50 < m).all() and (m < 200).all()
    assert_eq(out, df)
コード例 #7
0
def test_no_nameless_index(db):
    from sqlalchemy import sql

    index = -sql.column("negish")
    with pytest.raises(ValueError):
        read_sql_table(
            "test", db, npartitions=2, index_col=index, columns=["negish", "age", index]
        )

    index = sql.func.abs(sql.column("negish"))

    # function for the index, get all columns
    with pytest.raises(ValueError):
        read_sql_table("test", db, npartitions=2, index_col=index)
コード例 #8
0
ファイル: test_sql.py プロジェクト: djhoese/dask
def test_to_sql_engine_kwargs(caplog):
    ddf = dd.from_pandas(df, 2)
    with tmp_db_uri() as uri:
        ddf.to_sql("test", uri, engine_kwargs={"echo": False})
        logs = "\n".join(r.message for r in caplog.records)
        assert logs == ""
        assert_eq(df, read_sql_table("test", uri, "number"))

    with tmp_db_uri() as uri:
        ddf.to_sql("test", uri, engine_kwargs={"echo": True})
        logs = "\n".join(r.message for r in caplog.records)
        assert "CREATE" in logs
        assert "INSERT" in logs

        assert_eq(df, read_sql_table("test", uri, "number"))
コード例 #9
0
def test_no_nameless_index(db):
    from sqlalchemy import sql
    index = (-sql.column('negish'))
    with pytest.raises(ValueError):
        read_sql_table('test',
                       db,
                       npartitions=2,
                       index_col=index,
                       columns=['negish', 'age', index])

    index = sql.func.abs(sql.column('negish'))

    # function for the index, get all columns
    with pytest.raises(ValueError):
        read_sql_table('test', db, npartitions=2, index_col=index)
コード例 #10
0
def test_extra_connection_engine_keywords(capsys, db):
    data = read_sql_table(
        "test", db, npartitions=2, index_col="number", engine_kwargs={"echo": False}
    ).compute()
    # no captured message from the stdout with the echo=False parameter (this is the default)
    out, err = capsys.readouterr()
    assert "SELECT" not in out
    assert_eq(data, df)
    # with the echo=True sqlalchemy parameter, you should get all SQL queries in the stdout
    data = read_sql_table(
        "test", db, npartitions=2, index_col="number", engine_kwargs={"echo": True}
    ).compute()
    out, err = capsys.readouterr()
    assert "SELECT" in out
    assert_eq(data, df)
コード例 #11
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_simple(db):
    # single chunk
    data = read_sql_table('test', db, npartitions=2, index_col='number'
                          ).compute()
    assert (data.name == df.name).all()
    assert data.index.name == 'number'
    assert_eq(data, df)
コード例 #12
0
def test_select_from_select(db):
    from sqlalchemy import sql

    s1 = sql.select([sql.column("number"),
                     sql.column("name")]).select_from(sql.table("test"))
    out = read_sql_table(s1, db, npartitions=2, index_col="number")
    assert_eq(out, df[["name"]])
コード例 #13
0
def test_meta(db):
    data = read_sql_table(
        "test", db, index_col="number", meta=dd.from_pandas(df, npartitions=1)
    ).compute()
    assert (data.name == df.name).all()
    assert data.index.name == "number"
    assert_eq(data, df)
コード例 #14
0
def test_divisions(db):
    data = read_sql_table(
        "test", db, columns=["name"], divisions=[0, 2, 4], index_col="number"
    )
    assert data.divisions == (0, 2, 4)
    assert data.index.max().compute() == 4
    assert_eq(data, df[["name"]][df.index <= 4])
コード例 #15
0
def test_simple(db):
    # single chunk
    data = read_sql_table('test', db, npartitions=2, index_col='number'
                          ).compute()
    assert (data.name == df.name).all()
    assert data.index.name == 'number'
    assert_eq(data, df)
コード例 #16
0
ファイル: test_sql.py プロジェクト: caseyclements/dask
def test_empty_other_schema():
    from sqlalchemy import create_engine, MetaData, Table, Column, Integer, event, DDL
    # Database configurations.
    pg_host = 'localhost'
    pg_port = '5432'
    pg_user = '******'
    pg_pass = '******'
    pg_db = 'db'
    db_url = 'postgresql://%s:%s@%s:%s/%s' % (pg_user, pg_pass, pg_host, pg_port, pg_db)

    # Create an empty table in a different schema.
    table_name = 'empty_table'
    schema_name = 'other_schema'
    engine = create_engine(db_url)
    metadata = MetaData()
    table = Table(table_name, metadata,
                  Column('id', Integer, primary_key=True),
                  Column('col2', Integer), schema=schema_name)
    # Create the schema and the table.
    event.listen(metadata, 'before_create', DDL("CREATE SCHEMA IF NOT EXISTS %s" % schema_name))
    metadata.create_all(engine)

    # Read the empty table from the other schema.
    dask_df = read_sql_table(table.name, db_url, index_col='id', schema=table.schema, npartitions=1)

    # Validate that the retrieved table is empty.
    assert dask_df.index.name == 'id'
    assert dask_df.col2.dtype == np.dtype('int64')
    pd_dataframe = dask_df.compute()
    assert pd_dataframe.empty is True

    # Drop the schema and the table.
    engine.execute("DROP SCHEMA IF EXISTS %s CASCADE" % schema_name)
コード例 #17
0
def test_empty_other_schema():
    from sqlalchemy import create_engine, MetaData, Table, Column, Integer, event, DDL
    # Database configurations.
    pg_host = 'localhost'
    pg_port = '5432'
    pg_user = '******'
    pg_pass = '******'
    pg_db = 'db'
    db_url = 'postgresql://%s:%s@%s:%s/%s' % (pg_user, pg_pass, pg_host, pg_port, pg_db)

    # Create an empty table in a different schema.
    table_name = 'empty_table'
    schema_name = 'other_schema'
    engine = create_engine(db_url)
    metadata = MetaData()
    table = Table(table_name, metadata,
                  Column('id', Integer, primary_key=True),
                  Column('col2', Integer), schema=schema_name)
    # Create the schema and the table.
    event.listen(metadata, 'before_create', DDL("CREATE SCHEMA IF NOT EXISTS %s" % schema_name))
    metadata.create_all(engine)

    # Read the empty table from the other schema.
    dask_df = read_sql_table(table.name, db_url, index_col='id', schema=table.schema, npartitions=1)

    # Validate that the retrieved table is empty.
    assert dask_df.index.name == 'id'
    assert dask_df.col2.dtype == np.dtype('int64')
    pd_dataframe = dask_df.compute()
    assert pd_dataframe.empty is True

    # Drop the schema and the table.
    engine.execute("DROP SCHEMA IF EXISTS %s CASCADE" % schema_name)
コード例 #18
0
def test_needs_rational(db):
    import datetime

    now = datetime.datetime.now()
    d = datetime.timedelta(seconds=1)
    df = pd.DataFrame(
        {
            "a": list("ghjkl"),
            "b": [now + i * d for i in range(5)],
            "c": [True, True, False, True, True],
        }
    )
    df = df.append(
        [
            {"a": "x", "b": now + d * 1000, "c": None},
            {"a": None, "b": now + d * 1001, "c": None},
        ]
    )
    with tmpfile() as f:
        uri = "sqlite:///%s" % f
        df.to_sql("test", uri, index=False, if_exists="replace")

        # one partition contains NULL
        data = read_sql_table("test", uri, npartitions=2, index_col="b")
        df2 = df.set_index("b")
        assert_eq(data, df2.astype({"c": bool}))  # bools are coerced

        # one partition contains NULL, but big enough head
        data = read_sql_table("test", uri, npartitions=2, index_col="b", head_rows=12)
        df2 = df.set_index("b")
        assert_eq(data, df2)

        # empty partitions
        data = read_sql_table("test", uri, npartitions=20, index_col="b")
        part = data.get_partition(12).compute()
        assert part.dtypes.tolist() == ["O", bool]
        assert part.empty
        df2 = df.set_index("b")
        assert_eq(data, df2.astype({"c": bool}))

        # explicit meta
        data = read_sql_table("test", uri, npartitions=2, index_col="b", meta=df2[:0])
        part = data.get_partition(1).compute()
        assert part.dtypes.tolist() == ["O", "O"]
        df2 = df.set_index("b")
        assert_eq(data, df2)
コード例 #19
0
def test_range(db):
    data = read_sql_table('test',
                          db,
                          npartitions=2,
                          index_col='number',
                          limits=[1, 4])
    assert data.index.min().compute() == 1
    assert data.index.max().compute() == 4
コード例 #20
0
ファイル: test_sql.py プロジェクト: djhoese/dask
def test_limits(db):
    data = read_sql_table("test",
                          db,
                          npartitions=2,
                          index_col="number",
                          limits=[1, 4])
    assert data.index.min().compute() == 1
    assert data.index.max().compute() == 4
コード例 #21
0
def test_npartitions(db):
    data = read_sql_table("test",
                          db,
                          columns=list(df.columns),
                          npartitions=2,
                          index_col="number")
    assert len(data.divisions) == 3
    assert (data.name.compute() == df.name).all()
    data = read_sql_table("test",
                          db,
                          columns=["name"],
                          npartitions=6,
                          index_col="number")
    assert_eq(data, df[["name"]])
    data = read_sql_table(
        "test",
        db,
        columns=list(df.columns),
        bytes_per_chunk="2 GiB",
        index_col="number",
    )
    assert data.npartitions == 1
    assert (data.name.compute() == df.name).all()

    data_1 = read_sql_table(
        "test",
        db,
        columns=list(df.columns),
        bytes_per_chunk=2**30,
        index_col="number",
        head_rows=1,
    )
    assert data_1.npartitions == 1
    assert (data_1.name.compute() == df.name).all()

    data = read_sql_table(
        "test",
        db,
        columns=list(df.columns),
        bytes_per_chunk=250,
        index_col="number",
        head_rows=1,
    )
    assert data.npartitions == 2
コード例 #22
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_with_func(db):
    from sqlalchemy import sql
    index = sql.func.abs(sql.column('negish')).label('abs')

    # function for the index, get all columns
    data = read_sql_table('test', db, npartitions=2, index_col=index)
    assert data.divisions[0] == 0
    part = data.get_partition(0).compute()
    assert (part.index == 0).all()

    # now an arith op for one column too; it's name will be 'age'
    data = read_sql_table('test', db, npartitions=2, index_col=index,
                          columns=[index, -sql.column('age')])
    assert (data.age.compute() < 0).all()

    # a column that would have no name, give it a label
    index = (-sql.column('negish')).label('index')
    data = read_sql_table('test', db, npartitions=2, index_col=index,
                          columns=['negish', 'age'])
    d = data.compute()
    assert (-d.index == d['negish']).all()
コード例 #23
0
def test_with_func(db):
    from sqlalchemy import sql
    index = sql.func.abs(sql.column('negish')).label('abs')

    # function for the index, get all columns
    data = read_sql_table('test', db, npartitions=2, index_col=index)
    assert data.divisions[0] == 0
    part = data.get_partition(0).compute()
    assert (part.index == 0).all()

    # now an arith op for one column too; it's name will be 'age'
    data = read_sql_table('test', db, npartitions=2, index_col=index,
                          columns=[index, -sql.column('age')])
    assert (data.age.compute() < 0).all()

    # a column that would have no name, give it a label
    index = (-sql.column('negish')).label('index')
    data = read_sql_table('test', db, npartitions=2, index_col=index,
                          columns=['negish', 'age'])
    d = data.compute()
    assert (-d.index == d['negish']).all()
コード例 #24
0
def test_read_sql(db):
    from sqlalchemy import sql

    s = sql.select([sql.column("number"), sql.column("name")]).select_from(
        sql.table("test")
    )
    out = read_sql(s, db, npartitions=2, index_col="number")
    assert_eq(out, df[["name"]])

    data = read_sql_table("test", db, npartitions=2, index_col="number").compute()
    assert (data.name == df.name).all()
    assert data.index.name == "number"
    assert_eq(data, df)
コード例 #25
0
def test_needs_rational(db):
    import datetime
    now = datetime.datetime.now()
    d = datetime.timedelta(seconds=1)
    df = pd.DataFrame({'a': list('ghjkl'), 'b': [now + i * d for i in range(5)],
                       'c': [True, True, False, True, True]})
    df = df.append([{'a': 'x', 'b': now + d * 1000, 'c': None},
                    {'a': None, 'b': now + d * 1001, 'c': None}])
    with tmpfile() as f:
        uri = 'sqlite:///%s' % f
        df.to_sql('test', uri, index=False, if_exists='replace')

        # one partition contains NULL
        data = read_sql_table('test', uri, npartitions=2, index_col='b')
        df2 = df.set_index('b')
        assert_eq(data, df2.astype({'c': bool}))  # bools are coerced

        # one partition contains NULL, but big enough head
        data = read_sql_table('test', uri, npartitions=2, index_col='b',
                              head_rows=12)
        df2 = df.set_index('b')
        assert_eq(data, df2)

        # empty partitions
        data = read_sql_table('test', uri, npartitions=20, index_col='b')
        part = data.get_partition(12).compute()
        assert part.dtypes.tolist() == ['O', bool]
        assert part.empty
        df2 = df.set_index('b')
        assert_eq(data, df2.astype({'c': bool}))

        # explicit meta
        data = read_sql_table('test', uri, npartitions=2, index_col='b',
                              meta=df2[:0])
        part = data.get_partition(1).compute()
        assert part.dtypes.tolist() == ['O', 'O']
        df2 = df.set_index('b')
        assert_eq(data, df2)
コード例 #26
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_needs_rational(db):
    import datetime
    now = datetime.datetime.now()
    d = datetime.timedelta(seconds=1)
    df = pd.DataFrame({'a': list('ghjkl'), 'b': [now + i * d for i in range(5)],
                       'c': [True, True, False, True, True]})
    df = df.append([{'a': 'x', 'b': now + d * 1000, 'c': None},
                    {'a': None, 'b': now + d * 1001, 'c': None}])
    with tmpfile() as f:
        uri = 'sqlite:///%s' % f
        df.to_sql('test', uri, index=False, if_exists='replace')

        # one partition contains NULL
        data = read_sql_table('test', uri, npartitions=2, index_col='b')
        df2 = df.set_index('b')
        assert_eq(data, df2.astype({'c': bool}))  # bools are coerced

        # one partition contains NULL, but big enough head
        data = read_sql_table('test', uri, npartitions=2, index_col='b',
                              head_rows=12)
        df2 = df.set_index('b')
        assert_eq(data, df2)

        # empty partitions
        data = read_sql_table('test', uri, npartitions=20, index_col='b')
        part = data.get_partition(12).compute()
        assert part.dtypes.tolist() == ['O', bool]
        assert part.empty
        df2 = df.set_index('b')
        assert_eq(data, df2.astype({'c': bool}))

        # explicit meta
        data = read_sql_table('test', uri, npartitions=2, index_col='b',
                              meta=df2[:0])
        part = data.get_partition(1).compute()
        assert part.dtypes.tolist() == ['O', 'O']
        df2 = df.set_index('b')
        assert_eq(data, df2)
コード例 #27
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_datetimes():
    import datetime
    now = datetime.datetime.now()
    d = datetime.timedelta(seconds=1)
    df = pd.DataFrame({'a': list('ghjkl'), 'b': [now + i * d
                                                 for i in range(2, -3, -1)]})
    with tmpfile() as f:
        uri = 'sqlite:///%s' % f
        df.to_sql('test', uri, index=False, if_exists='replace')
        data = read_sql_table('test', uri, npartitions=2, index_col='b')
        assert data.index.dtype.kind == "M"
        assert data.divisions[0] == df.b.min()
        df2 = df.set_index('b')
        assert_eq(data.map_partitions(lambda x: x.sort_index()),
                  df2.sort_index())
コード例 #28
0
def test_datetimes():
    import datetime
    now = datetime.datetime.now()
    d = datetime.timedelta(seconds=1)
    df = pd.DataFrame({'a': list('ghjkl'), 'b': [now + i * d
                                                 for i in range(2, -3, -1)]})
    with tmpfile() as f:
        uri = 'sqlite:///%s' % f
        df.to_sql('test', uri, index=False, if_exists='replace')
        data = read_sql_table('test', uri, npartitions=2, index_col='b')
        assert data.index.dtype.kind == "M"
        assert data.divisions[0] == df.b.min()
        df2 = df.set_index('b')
        assert_eq(data.map_partitions(lambda x: x.sort_index()),
                  df2.sort_index())
コード例 #29
0
def test_empty(db):
    from sqlalchemy import create_engine, MetaData, Table, Column, Integer
    with tmpfile() as f:
        uri = 'sqlite:///%s' % f
        metadata = MetaData()
        engine = create_engine(uri)
        table = Table('empty_table', metadata,
                      Column('id', Integer, primary_key=True),
                      Column('col2', Integer))
        metadata.create_all(engine)

        dask_df = read_sql_table(table.name, uri, index_col='id', npartitions=1)
        assert dask_df.index.name == 'id'
        assert dask_df.col2.dtype == np.dtype('int64')
        pd_dataframe = dask_df.compute()
        assert pd_dataframe.empty is True
コード例 #30
0
def test_datetimes():
    import datetime

    now = datetime.datetime.now()
    d = datetime.timedelta(seconds=1)
    df = pd.DataFrame(
        {"a": list("ghjkl"), "b": [now + i * d for i in range(2, -3, -1)]}
    )
    with tmpfile() as f:
        uri = "sqlite:///%s" % f
        df.to_sql("test", uri, index=False, if_exists="replace")
        data = read_sql_table("test", uri, npartitions=2, index_col="b")
        assert data.index.dtype.kind == "M"
        assert data.divisions[0] == df.b.min()
        df2 = df.set_index("b")
        assert_eq(data.map_partitions(lambda x: x.sort_index()), df2.sort_index())
コード例 #31
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_empty(db):
    from sqlalchemy import create_engine, MetaData, Table, Column, Integer
    with tmpfile() as f:
        uri = 'sqlite:///%s' % f
        metadata = MetaData()
        engine = create_engine(uri)
        table = Table('empty_table', metadata,
                      Column('id', Integer, primary_key=True),
                      Column('col2', Integer))
        metadata.create_all(engine)

        dask_df = read_sql_table(table.name, uri, index_col='id', npartitions=1)
        assert dask_df.index.name == 'id'
        assert dask_df.col2.dtype == np.dtype('int64')
        pd_dataframe = dask_df.compute()
        assert pd_dataframe.empty is True
コード例 #32
0
def test_empty_other_schema():
    from sqlalchemy import DDL, Column, Integer, MetaData, Table, create_engine, event

    # Database configurations.
    pg_host = "localhost"
    pg_port = "5432"
    pg_user = "******"
    pg_pass = "******"
    pg_db = "db"
    db_url = "postgresql://%s:%s@%s:%s/%s" % (pg_user, pg_pass, pg_host,
                                              pg_port, pg_db)

    # Create an empty table in a different schema.
    table_name = "empty_table"
    schema_name = "other_schema"
    engine = create_engine(db_url)
    metadata = MetaData()
    table = Table(
        table_name,
        metadata,
        Column("id", Integer, primary_key=True),
        Column("col2", Integer),
        schema=schema_name,
    )
    # Create the schema and the table.
    event.listen(metadata, "before_create",
                 DDL("CREATE SCHEMA IF NOT EXISTS %s" % schema_name))
    metadata.create_all(engine)

    # Read the empty table from the other schema.
    dask_df = read_sql_table(table.name,
                             db_url,
                             index_col="id",
                             schema=table.schema,
                             npartitions=1)

    # Validate that the retrieved table is empty.
    assert dask_df.index.name == "id"
    assert dask_df.col2.dtype == np.dtype("int64")
    pd_dataframe = dask_df.compute()
    assert pd_dataframe.empty is True

    # Drop the schema and the table.
    engine.execute("DROP SCHEMA IF EXISTS %s CASCADE" % schema_name)
コード例 #33
0
def test_empty(db):
    from sqlalchemy import Column, Integer, MetaData, Table, create_engine

    with tmpfile() as f:
        uri = "sqlite:///%s" % f
        metadata = MetaData()
        engine = create_engine(uri)
        table = Table(
            "empty_table",
            metadata,
            Column("id", Integer, primary_key=True),
            Column("col2", Integer),
        )
        metadata.create_all(engine)

        dask_df = read_sql_table(table.name, uri, index_col="id", npartitions=1)
        assert dask_df.index.name == "id"
        # The dtype of the empty result might no longer be as expected
        # assert dask_df.col2.dtype == np.dtype("int64")
        pd_dataframe = dask_df.compute()
        assert pd_dataframe.empty is True
コード例 #34
0
def test_no_meta_no_head_rows(db):
    with pytest.raises(ValueError):
        read_sql_table("test", db, index_col="number", head_rows=0, npartitions=1)
コード例 #35
0
def test_select_from_select(db):
    from sqlalchemy import sql
    s1 = sql.select([sql.column('number'), sql.column('name')]
                    ).select_from(sql.table('test'))
    out = read_sql_table(s1, db, npartitions=2, index_col='number')
    assert_eq(out, df[['name']])
コード例 #36
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_divisions(db):
    data = read_sql_table('test', db, columns=['name'], divisions=[0, 2, 4],
                          index_col="number")
    assert data.divisions == (0, 2, 4)
    assert data.index.max().compute() == 4
    assert_eq(data, df[['name']][df.index <= 4])
コード例 #37
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_range(db):
    data = read_sql_table('test', db, npartitions=2, index_col='number',
                          limits=[1, 4])
    assert data.index.min().compute() == 1
    assert data.index.max().compute() == 4
コード例 #38
0
def test_to_sql(npartitions, parallel):
    df_by_age = df.set_index("age")
    df_appended = pd.concat([df, df,])

    ddf = dd.from_pandas(df, npartitions)
    ddf_by_age = ddf.set_index("age")

    # Simple round trip test: use existing "number" index_col
    with tmp_db_uri() as uri:
        ddf.to_sql("test", uri, parallel=parallel)
        result = read_sql_table("test", uri, "number")
        assert_eq(df, result)

    # Test writing no index, and reading back in with one of the other columns as index (`read_sql_table` requires
    # an index_col)
    with tmp_db_uri() as uri:
        ddf.to_sql("test", uri, parallel=parallel, index=False)

        result = read_sql_table("test", uri, "negish")
        assert_eq(df.set_index("negish"), result)

        result = read_sql_table("test", uri, "age")
        assert_eq(df_by_age, result)

    # Index by "age" instead
    with tmp_db_uri() as uri:
        ddf_by_age.to_sql("test", uri, parallel=parallel)
        result = read_sql_table("test", uri, "age")
        assert_eq(df_by_age, result)

    # Index column can't have "object" dtype if no partitions are provided
    with tmp_db_uri() as uri:
        ddf.set_index("name").to_sql("test", uri)
        with pytest.raises(
            TypeError,
            match='Provided index column is of type "object".  If divisions is not provided the index column type must be numeric or datetime.',  # noqa: E501
        ):
            read_sql_table("test", uri, "name")

    # Test various "if_exists" values
    with tmp_db_uri() as uri:
        ddf.to_sql("test", uri)

        # Writing a table that already exists fails
        with pytest.raises(ValueError, match="Table 'test' already exists"):
            ddf.to_sql("test", uri)

        ddf.to_sql("test", uri, parallel=parallel, if_exists="append")
        result = read_sql_table("test", uri, "number")

        assert_eq(df_appended, result)

        ddf_by_age.to_sql("test", uri, parallel=parallel, if_exists="replace")
        result = read_sql_table("test", uri, "age")
        assert_eq(df_by_age, result)

    # Verify number of partitions returned, when compute=False
    with tmp_db_uri() as uri:
        result = ddf.to_sql("test", uri, parallel=parallel, compute=False)

        # the first result is from the "meta" insert
        actual = len(result.compute())

        assert actual == npartitions
コード例 #39
0
def test_no_character_index_without_divisions(db):

    # attempt to read the sql table with a character index and no divisions
    with pytest.raises(TypeError):
        read_sql_table("test", db, npartitions=2, index_col="name", divisions=None)
コード例 #40
0
ファイル: test_sql.py プロジェクト: mrocklin/dask
def test_select_from_select(db):
    from sqlalchemy import sql
    s1 = sql.select([sql.column('number'), sql.column('name')]
                    ).select_from(sql.table('test'))
    out = read_sql_table(s1, db, npartitions=2, index_col='number')
    assert_eq(out, df[['name']])
コード例 #41
0
ファイル: test_sql.py プロジェクト: caseyclements/dask
def test_no_character_index_without_divisions(db):

    # attempt to read the sql table with a character index and no divisions
    with pytest.raises(TypeError):
        read_sql_table('test', db, npartitions=2, index_col='name', divisions=None)