Ejemplo n.º 1
0
    def apply(self, func, **kwargs):
        """Apply some callable function to the data in this partition.

        Note: It is up to the implementation how kwargs are handled. They are
            an important part of many implementations. As of right now, they
            are not serialized.

        Args:
            func: The lambda to apply (may already be correctly formatted)

        Returns:
             A new `BaseFramePartition` containing the object that has had `func`
             applied to it.
        """
        call_queue = self.call_queue + [[func, kwargs]]

        global client
        if __execution_engine__ == "Cloudburst" and client is None:
            from modin.engines.cloudburst.utils import get_or_init_client
            client = get_or_init_client()

        func = client.register(
            lambda _, call_queue, self_future: apply_list_of_funcs(
                call_queue, self_future), "apply_list_of_funcs")
        future = func(call_queue, self.future)
        return PandasOnCloudburstFramePartition(future)
Ejemplo n.º 2
0
    def deploy(cls, func, num_return_vals, kwargs):

        global cloudburst
        if __execution_engine__ == "Cloudburst" and cloudburst is None:
            from modin.engines.cloudburst.utils import get_or_init_client
            cloudburst = get_or_init_client()

        f = cloudburst.register(lambda _, kwargs: func(**kwargs),
                                func.__name__)
        print(f"____________task wrapper array size = {num_return_vals}")
        future_obj = f(kwargs)
        unpack = cloudburst.register(lambda _, l, i: l[i], "unpack")
        return [unpack(future_obj, i) for i in range(num_return_vals)]
Ejemplo n.º 3
0
    def __init__(self, future, length=None, width=None, call_queue=None):
        self.future = future

        from cloudburst.shared.reference import CloudburstReference
        if (isinstance(future, CloudburstReference)):
            from modin.engines.cloudburst.utils import get_or_init_client
            client = get_or_init_client()
            future = client.get_object(future.key)

        if call_queue is None:
            call_queue = []
        self.call_queue = call_queue
        self._length_cache = length
        self._width_cache = width
Ejemplo n.º 4
0
    def get(self):
        """Flushes the call_queue and returns the data.

        Note: Since this object is a simple wrapper, just return the data.

        Returns:
            The object that was `put`.
        """
        self.drain_call_queue()
        # blocking operation
        if isinstance(self.future, pandas.DataFrame):
            return self.future
        elif isinstance(self.future, CloudburstReference):
            from modin.engines.cloudburst.utils import get_or_init_client
            client = get_or_init_client()
            return client.get_object(self.future.key)
        return self.future.get()
Ejemplo n.º 5
0
    def deploy_axis_func(cls, axis, func, num_splits, kwargs,
                         maintain_partitioning, *partitions):
        from modin.engines.cloudburst.utils import get_or_init_client
        cloudburst = get_or_init_client()

        args = [
            axis, func, num_splits, kwargs, maintain_partitioning, *partitions
        ]
        f = cloudburst.register(
            lambda _, *args: PandasFrameAxisPartition.deploy_axis_func(*args),
            "deploy_axis_func")
        axis_result = f(*args)

        if num_splits == 1:
            return axis_result
        unpack = cloudburst.register(lambda _, l, i: l[i], "unpack")
        res = [unpack(axis_result, i) for i in range(num_splits)]
        return res
Ejemplo n.º 6
0
    def deploy_func_between_two_axis_partitions(cls, axis, func, num_splits,
                                                len_of_left, kwargs,
                                                *partitions):
        from modin.engines.cloudburst.utils import get_or_init_client
        cloudburst = get_or_init_client()

        args = [axis, func, num_splits, len_of_left, kwargs, *partitions]
        f = cloudburst.register(
            lambda _, *args: PandasFrameAxisPartition.
            deploy_func_between_two_axis_partitions(*args),
            "deploy_between_two_axix_partitions")

        axis_result = f(*args)

        if num_splits == 1:
            return axis_result

        unpack = cloudburst.register(lambda _, l, i: l[i], "unpack")
        return [unpack(axis_result, i) for i in range(num_splits)]
Ejemplo n.º 7
0
    def put(cls, obj):
        """A factory classmethod to format a given object.

        Args:
            obj: An object.

        Returns:
            A `RemotePartitions` object.
        """
        import uuid

        ref = str(uuid.uuid4())

        global client
        if __execution_engine__ == "Cloudburst" and client is None:
            from modin.engines.cloudburst.utils import get_or_init_client
            client = get_or_init_client()

        # TODO: Does this return a reference
        client.put_object(ref, obj)
        return cls(CloudburstReference(ref, deserialize=True))