Beispiel #1
0
def test_from_sql_distributed(make_sql_connection):  # noqa: F811
    if Engine.get() == "Ray":
        filename = "test_from_sql_distributed.db"
        table = "test_from_sql_distributed"
        conn = make_sql_connection(filename, table)
        query = "select * from {0}".format(table)

        pandas_df = pandas.read_sql(query, conn)
        modin_df_from_query = pd.read_sql(
            query,
            conn,
            partition_column="col1",
            lower_bound=0,
            upper_bound=6,
            max_sessions=2,
        )
        modin_df_from_table = pd.read_sql(
            table,
            conn,
            partition_column="col1",
            lower_bound=0,
            upper_bound=6,
            max_sessions=2,
        )

        df_equals(modin_df_from_query, pandas_df)
        df_equals(modin_df_from_table, pandas_df)
Beispiel #2
0
def test_from_sql_distributed():
    if os.environ.get("MODIN_ENGINE", "") == "Ray":
        filename = "test_from_sql_distributed.db"
        teardown_sql_file(filename)
        table = "test_from_sql_distributed"
        db_uri = "sqlite:///" + filename
        setup_sql_file(db_uri, filename, table, True)
        query = "select * from {0}".format(table)

        pandas_df = pandas.read_sql(query, db_uri)
        modin_df_from_query = pd.read_sql(query,
                                          db_uri,
                                          partition_column="col1",
                                          lower_bound=0,
                                          upper_bound=6)
        modin_df_from_table = pd.read_sql(table,
                                          db_uri,
                                          partition_column="col1",
                                          lower_bound=0,
                                          upper_bound=6)

        assert modin_df_equals_pandas(modin_df_from_query, pandas_df)
        assert modin_df_equals_pandas(modin_df_from_table, pandas_df)

        teardown_sql_file(filename)
Beispiel #3
0
def test_from_sql_defaults(make_sql_connection):  # noqa: F811
    filename = "test_from_sql_distributed.db"
    table = "test_from_sql_distributed"
    conn = make_sql_connection(filename, table)
    query = "select * from {0}".format(table)

    pandas_df = pandas.read_sql(query, conn)
    with pytest.warns(UserWarning):
        modin_df_from_query = pd.read_sql(query, conn)
    with pytest.warns(UserWarning):
        modin_df_from_table = pd.read_sql(table, conn)

    df_equals(modin_df_from_query, pandas_df)
    df_equals(modin_df_from_table, pandas_df)
Beispiel #4
0
def test_from_sql_defaults():
    filename = "test_from_sql_distributed.db"
    teardown_sql_file(filename)
    table = "test_from_sql_distributed"
    db_uri = "sqlite:///" + filename
    setup_sql_file(db_uri, filename, table, True)
    query = "select * from {0}".format(table)

    pandas_df = pandas.read_sql(query, db_uri)
    with pytest.warns(UserWarning):
        modin_df_from_query = pd.read_sql(query, db_uri)
    with pytest.warns(UserWarning):
        modin_df_from_table = pd.read_sql(table, db_uri)

    assert modin_df_equals_pandas(modin_df_from_query, pandas_df)
    assert modin_df_equals_pandas(modin_df_from_table, pandas_df)

    teardown_sql_file(filename)
Beispiel #5
0
def test_from_sql_distributed(make_sql_connection):  # noqa: F811
    if Engine.get() == "Ray":
        pytest.xfail("Distributed read_sql is broken, see GH#2194")
        filename = "test_from_sql_distributed.db"
        table = "test_from_sql_distributed"
        conn = make_sql_connection(filename, table)
        query = "select * from {0}".format(table)

        pandas_df = pandas.read_sql(query, conn)
        modin_df_from_query = pd.read_sql(
            query, conn, partition_column="col1", lower_bound=0, upper_bound=6
        )
        modin_df_from_table = pd.read_sql(
            table, conn, partition_column="col1", lower_bound=0, upper_bound=6
        )

        df_equals(modin_df_from_query, pandas_df)
        df_equals(modin_df_from_table, pandas_df)
Beispiel #6
0
def test_from_sql_distributed(make_sql_connection):  # noqa: F811
    if os.environ.get("MODIN_ENGINE", "") == "Ray":
        filename = "test_from_sql_distributed.db"
        table = "test_from_sql_distributed"
        conn = make_sql_connection(filename, table)
        query = "select * from {0}".format(table)

        pandas_df = pandas.read_sql(query, conn)
        modin_df_from_query = pd.read_sql(query,
                                          conn,
                                          partition_column="col1",
                                          lower_bound=0,
                                          upper_bound=6)
        modin_df_from_table = pd.read_sql(table,
                                          conn,
                                          partition_column="col1",
                                          lower_bound=0,
                                          upper_bound=6)

        assert modin_df_equals_pandas(modin_df_from_query, pandas_df)
        assert modin_df_equals_pandas(modin_df_from_table, pandas_df)
Beispiel #7
0
if __name__ == "__main__":
    args = docopt(__doc__, version="1.0")
    conn = os.environ["POSTGRES_URL"]
    table = os.environ["POSTGRES_TABLE"]

    partitions = int(args["<num>"])
    # ray.init(num_cpus=partitions, object_store_memory=10**10, _plasma_directory="/tmp")
    ray.init(num_cpus=partitions, object_store_memory=10**10)

    import modin.config as config
    import modin.experimental.pandas as pd

    config.NPartitions.put(partitions)
    with Timer() as timer:
        df = pd.read_sql(
            f"{table}", # use table here, a bug exists in modin experimental read_sql for query
            conn,
            parse_dates=[
                "l_shipdate",
                "l_commitdate",
                "l_receiptdate",
            ],
            partition_column="l_orderkey",
            lower_bound=0,
            upper_bound=60000000,
            max_sessions=partitions,
        )
    print(f"[Total] {timer.elapsed:.2f}s")

    print(df.head())