Beispiel #1
0
 def _update_engine(cls, _):
     if os.environ.get("MODIN_EXPERIMENTAL", "").title() == "True":
         factory_fmt, experimental = "Experimental{}On{}Factory", True
     else:
         factory_fmt, experimental = "{}On{}Factory", False
     factory_name = factory_fmt.format(partition_format.get(),
                                       execution_engine.get())
     try:
         cls.__engine = getattr(factories, factory_name)
     except AttributeError:
         if not experimental:
             # allow missing factories in experimenal mode only
             if hasattr(factories, "Experimental" + factory_name):
                 msg = (
                     "{0} on {1} is only accessible through the experimental API.\nRun "
                     "`import modin.experimental.pandas as pd` to use {0} on {1}."
                 )
             else:
                 msg = (
                     "Cannot find a factory for partition '{}' and execution engine '{}'. "
                     "Potential reason might be incorrect environment variable value for "
                     "MODIN_BACKEND or MODIN_ENGINE")
             raise FactoryNotFoundError(
                 msg.format(partition_format.get(), execution_engine.get()))
         cls.__engine = StubFactory.set_failing_name(factory_name)
     else:
         cls.__engine.prepare()
Beispiel #2
0
 def _read_sql(cls, **kwargs):
     if execution_engine.get() != "Ray":
         if "partition_column" in kwargs:
             if kwargs["partition_column"] is not None:
                 warnings.warn(
                     "Distributed read_sql() was only implemented for Ray engine."
                 )
             del kwargs["partition_column"]
         if "lower_bound" in kwargs:
             if kwargs["lower_bound"] is not None:
                 warnings.warn(
                     "Distributed read_sql() was only implemented for Ray engine."
                 )
             del kwargs["lower_bound"]
         if "upper_bound" in kwargs:
             if kwargs["upper_bound"] is not None:
                 warnings.warn(
                     "Distributed read_sql() was only implemented for Ray engine."
                 )
             del kwargs["upper_bound"]
         if "max_sessions" in kwargs:
             if kwargs["max_sessions"] is not None:
                 warnings.warn(
                     "Distributed read_sql() was only implemented for Ray engine."
                 )
             del kwargs["max_sessions"]
     return cls.io_cls.read_sql(**kwargs)
Beispiel #3
0
        def __update_engine(self, _):
            if execution_engine.get() in REMOTE_ENGINES:
                from modin.experimental.cloud import get_connection

                self.__swap_numpy(get_connection().modules["numpy"])
            else:
                self.__swap_numpy()
Beispiel #4
0
    def update_class(_):
        if execution_engine.get() in REMOTE_ENGINES:
            from . import rpyc_proxy

            result.__real_cls__ = getattr(rpyc_proxy,
                                          rpyc_wrapper_name)(result)
        else:
            result.__real_cls__ = result
Beispiel #5
0
    def update_class(_):
        if execution_engine.get() == "Cloudray":
            from . import rpyc_proxy

            result.__real_cls__ = getattr(rpyc_proxy,
                                          rpyc_wrapper_name)(result)
        else:
            result.__real_cls__ = result
Beispiel #6
0
    pandas_df = pandas.read_table(Path(TEST_CSV_FILENAME))
    modin_df = pd.read_table(Path(TEST_CSV_FILENAME))

    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("usecols", [["a"], ["a", "b", "e"], [0, 1, 4]])
def test_from_csv_with_usecols(usecols):
    fname = "modin/pandas/test/data/test_usecols.csv"
    pandas_df = pandas.read_csv(fname, usecols=usecols)
    modin_df = pd.read_csv(fname, usecols=usecols)
    df_equals(modin_df, pandas_df)


@pytest.mark.skipif(execution_engine.get().lower() == "python",
                    reason="Using pandas implementation")
def test_from_csv_s3(make_csv_file):
    dataset_url = "s3://noaa-ghcn-pds/csv/1788.csv"
    pandas_df = pandas.read_csv(dataset_url)

    # This first load is to trigger all the import deprecation warnings
    modin_df = pd.read_csv(dataset_url)

    # This will warn if it defaults to pandas behavior, but it shouldn't
    with pytest.warns(None) as record:
        modin_df = pd.read_csv(dataset_url)

    assert not any("defaulting to pandas implementation" in str(err)
                   for err in record.list)
Beispiel #7
0
    pandas_df = pandas.read_table(Path(TEST_CSV_FILENAME))
    modin_df = pd.read_table(Path(TEST_CSV_FILENAME))

    df_equals(modin_df, pandas_df)


@pytest.mark.parametrize("usecols", [["a"], ["a", "b", "e"], [0, 1, 4]])
def test_from_csv_with_usecols(usecols):
    fname = "modin/pandas/test/data/test_usecols.csv"
    pandas_df = pandas.read_csv(fname, usecols=usecols)
    modin_df = pd.read_csv(fname, usecols=usecols)
    df_equals(modin_df, pandas_df)


@pytest.mark.skipif(
    execution_engine.get().lower() == "python", reason="Using pandas implementation"
)
def test_from_csv_s3(make_csv_file):
    dataset_url = "s3://noaa-ghcn-pds/csv/1788.csv"
    pandas_df = pandas.read_csv(dataset_url)

    # This first load is to trigger all the import deprecation warnings
    modin_df = pd.read_csv(dataset_url)

    # This will warn if it defaults to pandas behavior, but it shouldn't
    with pytest.warns(None) as record:
        modin_df = pd.read_csv(dataset_url)

    assert not any(
        "defaulting to pandas implementation" in str(err) for err in record.list
    )