Beispiel #1
0
    def from_pandas(cls, df, return_dims=False):
        """Return the partitions from Pandas DataFrame."""
        def update_bar(pbar, f):
            if ProgressBar.get():
                pbar.update(1)
            return f

        num_splits = NPartitions.get()
        put_func = cls._partition_class.put
        row_chunksize, col_chunksize = compute_chunksize(df, num_splits)

        bar_format = (
            "{l_bar}{bar}{r_bar}"
            if os.environ.get("DEBUG_PROGRESS_BAR", "False") == "True" else
            "{desc}: {percentage:3.0f}%{bar} Elapsed time: {elapsed}, estimated remaining time: {remaining}"
        )
        if ProgressBar.get():
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                try:
                    from tqdm.autonotebook import tqdm as tqdm_notebook
                except ImportError:
                    raise ImportError(
                        "Please pip install tqdm to use the progress bar")

            rows = max(1, round(len(df) / row_chunksize))
            cols = max(1, round(len(df.columns) / col_chunksize))
            update_count = rows * cols
            pbar = tqdm_notebook(
                total=round(update_count),
                desc="Distributing Dataframe",
                bar_format=bar_format,
            )
        else:
            pbar = None
        parts = [[
            update_bar(
                pbar,
                put_func(df.iloc[i:i + row_chunksize,
                                 j:j + col_chunksize].copy()),
            ) for j in range(0, len(df.columns), col_chunksize)
        ] for i in range(0, len(df), row_chunksize)]
        if ProgressBar.get():
            pbar.close()
        if not return_dims:
            return np.array(parts)
        else:
            row_lengths = [
                row_chunksize if i + row_chunksize < len(df) else
                len(df) % row_chunksize or row_chunksize
                for i in range(0, len(df), row_chunksize)
            ]
            col_widths = [
                col_chunksize if i + col_chunksize < len(df.columns) else
                len(df.columns) % col_chunksize or col_chunksize
                for i in range(0, len(df.columns), col_chunksize)
            ]
            return np.array(parts), row_lengths, col_widths
Beispiel #2
0
    def magic(*args, **kwargs):
        result_parts = f(*args, **kwargs)
        if ProgressBar.get():
            current_frame = inspect.currentframe()
            function_name = None
            while function_name != "<module>":
                (
                    filename,
                    line_number,
                    function_name,
                    lines,
                    index,
                ) = inspect.getframeinfo(current_frame)
                current_frame = current_frame.f_back
            t = threading.Thread(
                target=call_progress_bar,
                args=(result_parts, line_number),
            )
            t.start()
            # We need to know whether or not we are in a jupyter notebook
            from IPython import get_ipython

            try:
                ipy_str = str(type(get_ipython()))
                if "zmqshell" not in ipy_str:
                    t.join()
            except Exception:
                pass
        return result_parts
Beispiel #3
0
 def update_bar(pbar, f):
     if ProgressBar.get():
         pbar.update(1)
     return f