Exemple #1
0
    def from_pandas(cls, df, return_dims=False):
        """Return the partitions from Pandas DataFrame."""
        def update_bar(pbar, f):
            if ProgressBar.get():
                pbar.update(1)
            return f

        num_splits = NPartitions.get()
        put_func = cls._partition_class.put
        row_chunksize, col_chunksize = compute_chunksize(df, num_splits)

        bar_format = (
            "{l_bar}{bar}{r_bar}"
            if os.environ.get("DEBUG_PROGRESS_BAR", "False") == "True" else
            "{desc}: {percentage:3.0f}%{bar} Elapsed time: {elapsed}, estimated remaining time: {remaining}"
        )
        if ProgressBar.get():
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                try:
                    from tqdm.autonotebook import tqdm as tqdm_notebook
                except ImportError:
                    raise ImportError(
                        "Please pip install tqdm to use the progress bar")

            rows = max(1, round(len(df) / row_chunksize))
            cols = max(1, round(len(df.columns) / col_chunksize))
            update_count = rows * cols
            pbar = tqdm_notebook(
                total=round(update_count),
                desc="Distributing Dataframe",
                bar_format=bar_format,
            )
        else:
            pbar = None
        parts = [[
            update_bar(
                pbar,
                put_func(df.iloc[i:i + row_chunksize,
                                 j:j + col_chunksize].copy()),
            ) for j in range(0, len(df.columns), col_chunksize)
        ] for i in range(0, len(df), row_chunksize)]
        if ProgressBar.get():
            pbar.close()
        if not return_dims:
            return np.array(parts)
        else:
            row_lengths = [
                row_chunksize if i + row_chunksize < len(df) else
                len(df) % row_chunksize or row_chunksize
                for i in range(0, len(df), row_chunksize)
            ]
            col_widths = [
                col_chunksize if i + col_chunksize < len(df.columns) else
                len(df.columns) % col_chunksize or col_chunksize
                for i in range(0, len(df.columns), col_chunksize)
            ]
            return np.array(parts), row_lengths, col_widths
Exemple #2
0
def call_progress_bar(result_parts, line_no):
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        try:
            from tqdm.autonotebook import tqdm as tqdm_notebook
        except ImportError:
            raise ImportError(
                "Please pip install tqdm to use the progress bar")
        from IPython import get_ipython

    try:
        cell_no = get_ipython().execution_count
    # This happens if we are not in ipython or jupyter.
    # No progress bar is supported in that case.
    except AttributeError:
        return
    pbar_id = str(cell_no) + "-" + str(line_no)
    futures = [x.oid for row in result_parts for x in row]
    bar_format = (
        "{l_bar}{bar}{r_bar}" if "DEBUG_PROGRESS_BAR" in os.environ
        and os.environ["DEBUG_PROGRESS_BAR"] == "True" else
        "{desc}: {percentage:3.0f}%{bar} Elapsed time: {elapsed}, estimated remaining time: {remaining}"
    )
    bar_lock.acquire()
    if pbar_id in progress_bars:
        if hasattr(progress_bars[pbar_id], "container"):
            if hasattr(progress_bars[pbar_id].container.children[0], "max"):
                index = 0
            else:
                index = 1
            progress_bars[pbar_id].container.children[
                index].max = progress_bars[pbar_id].container.children[
                    index].max + len(futures)
        progress_bars[pbar_id].total = progress_bars[pbar_id].total + len(
            futures)
        progress_bars[pbar_id].refresh()
    else:
        progress_bars[pbar_id] = tqdm_notebook(
            total=len(futures),
            desc="Estimated completion of line " + str(line_no),
            bar_format=bar_format,
        )
    bar_lock.release()

    threading.Thread(target=show_time_updates,
                     args=(progress_bars[pbar_id], )).start()
    for i in range(1, len(futures) + 1):
        ray.wait(futures, num_returns=i)
        progress_bars[pbar_id].update(1)
        progress_bars[pbar_id].refresh()
    if progress_bars[pbar_id].n == progress_bars[pbar_id].total:
        progress_bars[pbar_id].close()