Example #1
0
def test_engine_switch():
    Engine.put("Test")
    assert EngineDispatcher.get_engine() == PandasOnTestFactory
    assert EngineDispatcher.get_engine().io_cls == "Foo"
    Engine.put("Python")  # revert engine to default

    Backend.put("Test")
    assert EngineDispatcher.get_engine() == TestOnPythonFactory
    assert EngineDispatcher.get_engine().io_cls == "Bar"
    Backend.put("Pandas")  # revert engine to default
Example #2
0
def test_engine_switch():
    execution_engine.put("Test")
    assert EngineDispatcher.get_engine() == PandasOnTestFactory
    assert EngineDispatcher.get_engine().io_cls == "Foo"
    execution_engine.put("Python")  # revert engine to default

    partition_format.put("Test")
    assert EngineDispatcher.get_engine() == TestOnPythonFactory
    assert EngineDispatcher.get_engine().io_cls == "Bar"
    partition_format.put("Pandas")  # revert engine to default
Example #3
0
def from_partitions(partitions, axis):
    """
    Create DataFrame from remote partitions.

    Parameters
    ----------
    partitions : list
        A list of Ray.ObjectRef/Dask.Future to partitions depending on the engine used.
        Or a list of tuples of Ray.ObjectRef/Dask.Future to node ip addresses and partitions
        depending on the engine used (i.e. ``[(Ray.ObjectRef/Dask.Future, Ray.ObjectRef/Dask.Future), ...]``).
    axis : None, 0 or 1
        The ``axis`` parameter is used to identify what are the partitions passed.
        You have to set:

        * ``axis=0`` if you want to create DataFrame from row partitions
        * ``axis=1`` if you want to create DataFrame from column partitions
        * ``axis=None`` if you want to create DataFrame from 2D list of partitions

    Returns
    -------
    DataFrame
        DataFrame instance created from remote partitions.
    """
    from modin.data_management.factories.dispatcher import EngineDispatcher

    factory = EngineDispatcher.get_engine()

    partition_class = factory.io_cls.frame_cls._frame_mgr_cls._partition_class
    partition_frame_class = factory.io_cls.frame_cls
    partition_mgr_class = factory.io_cls.frame_cls._frame_mgr_cls

    # Since we store partitions of Modin DataFrame as a 2D NumPy array we need to place
    # passed partitions to 2D NumPy array to pass it to internal Modin Frame class.
    # `axis=None` - convert 2D list to 2D NumPy array
    if axis is None:
        if isinstance(partitions[0][0], tuple):
            parts = np.array(
                [
                    [partition_class(partition, ip=ip) for ip, partition in row]
                    for row in partitions
                ]
            )
        else:
            parts = np.array(
                [
                    [partition_class(partition) for partition in row]
                    for row in partitions
                ]
            )
    # `axis=0` - place row partitions to 2D NumPy array so that each row of the array is one row partition.
    elif axis == 0:
        if isinstance(partitions[0], tuple):
            parts = np.array(
                [[partition_class(partition, ip=ip)] for ip, partition in partitions]
            )
        else:
            parts = np.array([[partition_class(partition)] for partition in partitions])
    # `axis=1` - place column partitions to 2D NumPy array so that each column of the array is one column partition.
    elif axis == 1:
        if isinstance(partitions[0], tuple):
            parts = np.array(
                [[partition_class(partition, ip=ip) for ip, partition in partitions]]
            )
        else:
            parts = np.array([[partition_class(partition) for partition in partitions]])
    else:
        raise ValueError(
            f"Got unacceptable value of axis {axis}. Possible values are {0}, {1} or {None}."
        )

    index = partition_mgr_class.get_indices(0, parts, lambda df: df.axes[0])
    columns = partition_mgr_class.get_indices(1, parts, lambda df: df.axes[1])
    return DataFrame(
        query_compiler=PandasQueryCompiler(partition_frame_class(parts, index, columns))
    )
Example #4
0
def test_set_backends():
    set_backends("Bar", "Foo")
    assert EngineDispatcher.get_engine() == FooOnBarFactory
Example #5
0
def test_default_engine():
    assert issubclass(EngineDispatcher.get_engine(), factories.BaseFactory)
    assert EngineDispatcher.get_engine().io_cls