Beispiel #1
0
    def _update_factory(cls, _):
        """
        Update and prepare factory with a new one specified via Modin config.

        Parameters
        ----------
        _ : object
            This parameters serves the compatibility purpose.
            Does not affect the result.
        """
        factory_name = get_current_execution() + "Factory"
        try:
            cls.__factory = getattr(factories, factory_name)
        except AttributeError:
            if factory_name == "ExperimentalOmnisciOnRayFactory":
                msg = ("OmniSci storage format no longer needs Ray engine; "
                       "please specify MODIN_ENGINE='native'")
                raise FactoryNotFoundError(msg)
            if not IsExperimental.get():
                # allow missing factories in experimenal mode only
                if hasattr(factories, "Experimental" + factory_name):
                    msg = (
                        "{0} on {1} is only accessible through the experimental API.\nRun "
                        "`import modin.experimental.pandas as pd` to use {0} on {1}."
                    )
                else:
                    msg = (
                        "Cannot find a factory for partition '{}' and execution engine '{}'. "
                        "Potential reason might be incorrect environment variable value for "
                        f"{StorageFormat.varname} or {Engine.varname}")
                raise FactoryNotFoundError(
                    msg.format(StorageFormat.get(), Engine.get()))
            cls.__factory = StubFactory.set_failing_name(factory_name)
        else:
            cls.__factory.prepare()
Beispiel #2
0
def pytest_runtest_call(item):
    custom_markers = ["xfail", "skip"]

    # dynamicly adding custom markers to tests
    for custom_marker in custom_markers:
        for marker in item.iter_markers(name=f"{custom_marker}_executions"):
            executions = marker.args[0]
            if not isinstance(executions, list):
                executions = [executions]

            current_execution = get_current_execution()
            reason = marker.kwargs.pop("reason", "")

            item.add_marker(
                getattr(pytest.mark, custom_marker)(
                    condition=current_execution in executions,
                    reason=
                    f"Execution {current_execution} does not pass this test. {reason}",
                    **marker.kwargs,
                ))
Beispiel #3
0
def test_copy(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)  # noqa F841

    # pandas_df is unused but there so there won't be confusing list comprehension
    # stuff in the pytest.mark.parametrize
    new_modin_df = modin_df.copy()

    assert new_modin_df is not modin_df
    if get_current_execution() != "BaseOnPython":
        assert np.array_equal(
            new_modin_df._query_compiler._modin_frame._partitions,
            modin_df._query_compiler._modin_frame._partitions,
        )
    assert new_modin_df is not modin_df
    df_equals(new_modin_df, modin_df)

    # Shallow copy tests
    modin_df = pd.DataFrame(data)
    modin_df_cp = modin_df.copy(False)

    modin_df[modin_df.columns[0]] = 0
    df_equals(modin_df, modin_df_cp)
Beispiel #4
0
        ) for i in range(10)
    ]
    pandas_df = pd.concat(pandas_dfs)

    # Indexes get messed up when concatting so we reset both.
    pandas_df = pandas_df.reset_index(drop=True)
    modin_df = modin_df.reset_index(drop=True)

    df_equals(modin_df, pandas_df)


test_default_to_pickle_filename = "test_default_to_pickle.pkl"


@pytest.mark.skipif(
    get_current_execution() != "ExperimentalPandasOnRay",
    reason=f"Execution {get_current_execution()} isn't supported.",
)
@pytest.mark.parametrize(
    "storage_options",
    [{
        "anon": False
    }, {
        "anon": True
    }, {
        "key": "123",
        "secret": "123"
    }, None],
)
def test_read_multiple_csv_s3_storage_opts(storage_options):
    path = "s3://modin-datasets/testing/multiple_csv/"
Beispiel #5
0
    md_df.index = index
    md_df.columns = columns

    pd_df = md_df._to_pandas()

    for axis in [0, 1]:
        assert md_df.axes[axis].equals(
            pd_df.axes[axis]), f"Indices at axis {axis} are different!"
        assert md_df.axes[axis].equal_levels(
            pd_df.axes[axis]
        ), f"Levels of indices at axis {axis} are different!"


@pytest.mark.skipif(
    get_current_execution() != "BaseOnPython",
    reason="This test make sense only on BaseOnPython execution.",
)
@pytest.mark.parametrize(
    "func, regex",
    [
        (lambda df: df.mean(level=0), r"DataFrame\.mean"),
        (lambda df: df + df, r"DataFrame\.add"),
        (lambda df: df.index, r"DataFrame\.get_axis\(0\)"),
        (
            lambda df: df.drop(columns="col1").squeeze().repeat(2),
            r"Series\.repeat",
        ),
        (lambda df: df.groupby("col1").prod(), r"GroupBy\.prod"),
        (lambda df: df.rolling(1).count(), r"Rolling\.count"),
    ],
Beispiel #6
0
def test_append(data):
    modin_df = pd.DataFrame(data)
    pandas_df = pandas.DataFrame(data)

    data_to_append = {"append_a": 2, "append_b": 1000}

    ignore_idx_values = [True, False]

    for ignore in ignore_idx_values:
        try:
            pandas_result = pandas_df.append(data_to_append,
                                             ignore_index=ignore)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.append(data_to_append, ignore_index=ignore)
        else:
            modin_result = modin_df.append(data_to_append, ignore_index=ignore)
            df_equals(modin_result, pandas_result)

    try:
        pandas_result = pandas_df.append(pandas_df.iloc[-1])
    except Exception as e:
        with pytest.raises(type(e)):
            modin_df.append(modin_df.iloc[-1])
    else:
        modin_result = modin_df.append(modin_df.iloc[-1])
        df_equals(modin_result, pandas_result)

    try:
        pandas_result = pandas_df.append(list(pandas_df.iloc[-1]))
    except Exception as e:
        with pytest.raises(type(e)):
            modin_df.append(list(modin_df.iloc[-1]))
    else:
        modin_result = modin_df.append(list(modin_df.iloc[-1]))
        # Pandas has bug where sort=False is ignored
        # (https://github.com/pandas-dev/pandas/issues/35092), but Modin
        # now does the right thing, so for now manually sort to workaround
        # this. Once the Pandas bug is fixed and Modin upgrades to that
        # Pandas release, this sort will cause the test to fail, and the
        # next three lines should be deleted.
        if get_current_execution() != "BaseOnPython":
            assert list(modin_result.columns) == list(modin_df.columns) + [0]
            modin_result = modin_result[[0] + sorted(modin_df.columns)]
        df_equals(modin_result, pandas_result)

    verify_integrity_values = [True, False]

    for verify_integrity in verify_integrity_values:
        try:
            pandas_result = pandas_df.append([pandas_df, pandas_df],
                                             verify_integrity=verify_integrity)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.append([modin_df, modin_df],
                                verify_integrity=verify_integrity)
        else:
            modin_result = modin_df.append([modin_df, modin_df],
                                           verify_integrity=verify_integrity)
            df_equals(modin_result, pandas_result)

        try:
            pandas_result = pandas_df.append(pandas_df,
                                             verify_integrity=verify_integrity)
        except Exception as e:
            with pytest.raises(type(e)):
                modin_df.append(modin_df, verify_integrity=verify_integrity)
        else:
            modin_result = modin_df.append(modin_df,
                                           verify_integrity=verify_integrity)
            df_equals(modin_result, pandas_result)