Example #1
0
def make_progress() -> Progress:
    _time = 0.0

    def fake_time():
        nonlocal _time
        try:
            return _time
        finally:
            _time += 1

    console = Console(
        file=io.StringIO(),
        force_terminal=True,
        color_system="truecolor",
        width=80,
        legacy_windows=False,
        _environ={},
    )
    progress = Progress(console=console, get_time=fake_time, auto_refresh=False)
    task1 = progress.add_task("foo")
    task2 = progress.add_task("bar", total=30)
    progress.advance(task2, 16)
    task3 = progress.add_task("baz", visible=False)
    task4 = progress.add_task("egg")
    progress.remove_task(task4)
    task4 = progress.add_task("foo2", completed=50, start=False)
    progress.stop_task(task4)
    progress.start_task(task4)
    progress.update(
        task4, total=200, advance=50, completed=200, visible=True, refresh=True
    )
    progress.stop_task(task4)
    return progress
Example #2
0
def extract_schema_from_collection(collection: Collection, partial: bool):
    """ Extracts a schema definition from a collection.

    If the extraction is partial, only the first document in the collection is
    used to create the schema.
    """

    schema = {"count": 0, "document": {}}
    if partial:
        count = 1
    else:
        count = collection.estimated_document_count()
    with progress:
        t = progress.add_task(collection.name, total=count)
        try:
            for document in collection.find():
                schema["count"] += 1
                schema["document"] = extract_schema_from_document(
                    document, schema["document"])
                progress.update(t, advance=1)
                if partial:
                    break
        except KeyboardInterrupt:
            return schema
    return schema
Example #3
0
def _parallel_executor(tasks: List[_Task]) -> bool:
    result_columns = rich.columns.Columns()
    results: Dict[str, rich.panel.Panel] = {}
    fail = False

    with _ProgressWithResults(
            rich.progress.SpinnerColumn(spinner_name='point'),
            '[bold blue]{task.description}',
            rich.progress.BarColumn(bar_width=80),
            '[progress.percentage]{task.completed}/{task.total}',
            result_columns=result_columns,
    ) as progress:
        task_progress = progress.add_task('running tasks...', total=len(tasks))

        pool = _ProcessPool()
        for task in tasks:
            pool.submit(task.name, task.func)
            status = rich.panel.Panel('running', title=f'[bold]{task.name}')
            results[task.name] = status
            result_columns.add_renderable(status)

        # wait for tasks to complete
        for name, process, out, exc in pool.as_completed():
            if process.exitcode:
                progress.print(f'[bold red]error executing: {name}')
                progress.print(out, end='', highlight=False)
                results[name].renderable = 'failed'
                results[name].style = 'red'
                fail = True
            else:
                progress.print(f'[bold green]sucessfully executed: {name}')
                results[name].renderable = 'success'
                results[name].style = 'green'
            progress.advance(task_progress)
    return fail
Example #4
0
def test_columns() -> None:

    console = Console(
        file=io.StringIO(),
        force_terminal=True,
        width=80,
        log_time_format="[TIME]",
        color_system="truecolor",
        legacy_windows=False,
        log_path=False,
        _environ={},
    )
    progress = Progress(
        "test",
        TextColumn("{task.description}"),
        BarColumn(bar_width=None),
        TimeRemainingColumn(),
        TimeElapsedColumn(),
        FileSizeColumn(),
        TotalFileSizeColumn(),
        DownloadColumn(),
        TransferSpeedColumn(),
        MofNCompleteColumn(),
        MofNCompleteColumn(separator=" of "),
        transient=True,
        console=console,
        auto_refresh=False,
        get_time=MockClock(),
    )
    task1 = progress.add_task("foo", total=10)
    task2 = progress.add_task("bar", total=7)
    with progress:
        for n in range(4):
            progress.advance(task1, 3)
            progress.advance(task2, 4)
        print("foo")
        console.log("hello")
        console.print("world")
        progress.refresh()
    from .render import replace_link_ids

    result = replace_link_ids(console.file.getvalue())
    print(repr(result))
    expected = "\x1b[?25ltest foo \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:07\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m10 bytes\x1b[0m \x1b[32m0/10 bytes\x1b[0m \x1b[31m?\x1b[0m \x1b[32m 0/10\x1b[0m \x1b[32m 0 of 10\x1b[0m\ntest bar \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:18\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m7 bytes \x1b[0m \x1b[32m0/7 bytes \x1b[0m \x1b[31m?\x1b[0m \x1b[32m0/7  \x1b[0m \x1b[32m0 of 7  \x1b[0m\r\x1b[2K\x1b[1A\x1b[2Kfoo\ntest foo \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:07\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m10 bytes\x1b[0m \x1b[32m0/10 bytes\x1b[0m \x1b[31m?\x1b[0m \x1b[32m 0/10\x1b[0m \x1b[32m 0 of 10\x1b[0m\ntest bar \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:18\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m7 bytes \x1b[0m \x1b[32m0/7 bytes \x1b[0m \x1b[31m?\x1b[0m \x1b[32m0/7  \x1b[0m \x1b[32m0 of 7  \x1b[0m\r\x1b[2K\x1b[1A\x1b[2K\x1b[2;36m[TIME]\x1b[0m\x1b[2;36m \x1b[0mhello                                                                    \ntest foo \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:07\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m10 bytes\x1b[0m \x1b[32m0/10 bytes\x1b[0m \x1b[31m?\x1b[0m \x1b[32m 0/10\x1b[0m \x1b[32m 0 of 10\x1b[0m\ntest bar \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:18\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m7 bytes \x1b[0m \x1b[32m0/7 bytes \x1b[0m \x1b[31m?\x1b[0m \x1b[32m0/7  \x1b[0m \x1b[32m0 of 7  \x1b[0m\r\x1b[2K\x1b[1A\x1b[2Kworld\ntest foo \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:07\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m10 bytes\x1b[0m \x1b[32m0/10 bytes\x1b[0m \x1b[31m?\x1b[0m \x1b[32m 0/10\x1b[0m \x1b[32m 0 of 10\x1b[0m\ntest bar \x1b[38;5;237m━━━━━━━━━━\x1b[0m \x1b[36m-:--:--\x1b[0m \x1b[33m0:00:18\x1b[0m \x1b[32m0 bytes\x1b[0m \x1b[32m7 bytes \x1b[0m \x1b[32m0/7 bytes \x1b[0m \x1b[31m?\x1b[0m \x1b[32m0/7  \x1b[0m \x1b[32m0 of 7  \x1b[0m\r\x1b[2K\x1b[1A\x1b[2Ktest foo \x1b[38;2;114;156;31m━━━━━━━\x1b[0m \x1b[36m0:00:00\x1b[0m \x1b[33m0:00:34\x1b[0m \x1b[32m12     \x1b[0m \x1b[32m10     \x1b[0m \x1b[32m12/10   \x1b[0m \x1b[31m1      \x1b[0m \x1b[32m12/10\x1b[0m \x1b[32m12 of 10\x1b[0m\n                                 \x1b[32mbytes  \x1b[0m \x1b[32mbytes  \x1b[0m \x1b[32mbytes   \x1b[0m \x1b[31mbyte/s \x1b[0m               \ntest bar \x1b[38;2;114;156;31m━━━━━━━\x1b[0m \x1b[36m0:00:00\x1b[0m \x1b[33m0:00:29\x1b[0m \x1b[32m16     \x1b[0m \x1b[32m7 bytes\x1b[0m \x1b[32m16/7    \x1b[0m \x1b[31m2      \x1b[0m \x1b[32m16/7 \x1b[0m \x1b[32m16 of 7 \x1b[0m\n                                 \x1b[32mbytes  \x1b[0m         \x1b[32mbytes   \x1b[0m \x1b[31mbytes/s\x1b[0m               \r\x1b[2K\x1b[1A\x1b[2K\x1b[1A\x1b[2K\x1b[1A\x1b[2Ktest foo \x1b[38;2;114;156;31m━━━━━━━\x1b[0m \x1b[36m0:00:00\x1b[0m \x1b[33m0:00:34\x1b[0m \x1b[32m12     \x1b[0m \x1b[32m10     \x1b[0m \x1b[32m12/10   \x1b[0m \x1b[31m1      \x1b[0m \x1b[32m12/10\x1b[0m \x1b[32m12 of 10\x1b[0m\n                                 \x1b[32mbytes  \x1b[0m \x1b[32mbytes  \x1b[0m \x1b[32mbytes   \x1b[0m \x1b[31mbyte/s \x1b[0m               \ntest bar \x1b[38;2;114;156;31m━━━━━━━\x1b[0m \x1b[36m0:00:00\x1b[0m \x1b[33m0:00:29\x1b[0m \x1b[32m16     \x1b[0m \x1b[32m7 bytes\x1b[0m \x1b[32m16/7    \x1b[0m \x1b[31m2      \x1b[0m \x1b[32m16/7 \x1b[0m \x1b[32m16 of 7 \x1b[0m\n                                 \x1b[32mbytes  \x1b[0m         \x1b[32mbytes   \x1b[0m \x1b[31mbytes/s\x1b[0m               \n\x1b[?25h\r\x1b[1A\x1b[2K\x1b[1A\x1b[2K\x1b[1A\x1b[2K\x1b[1A\x1b[2K"

    assert result == expected
async def generate_site(
    *,
    theme: Theme,
    progress: rich.progress.Progress,
) -> None:
    task = progress.add_task(theme.name, total=5)

    env = IsolatedEnvironment(theme.name)
    destination_path = DESTINATION["sites"] / theme.name

    try:
        await env.create(delete="CI" in os.environ)
        progress.advance(task, 1)

        progress.log(f"[yellow]{theme.name}[reset]: Installing packages...")
        await env.install("--pre", "sphinx")
        progress.advance(task, 1)
        await env.install(theme.pypi_package)
        progress.advance(task, 1)

        render_conf_template(theme, env.path / "conf.py")
        progress.advance(task, 1)

        if destination_path.exists():
            shutil.rmtree(destination_path)

        progress.log(f"[yellow]{theme.name}[reset]: Building site...")
        returncode, output = await env.run(
            "sphinx-build",
            "-v",
            "-b=dirhtml",
            f"-c={env.path}",
            str(BUILD["sources"]),
            str(destination_path),
        )
        progress.advance(task, 1)

        if returncode:
            stdout, stderr = output
            message = [
                " stdout ".center(88, "="),
                stdout.decode(),
                " stderr ".center(88, "="),
                stderr.decode(),
            ]
            raise Exception("\n".join(message))
    except Exception as e:
        progress.log(f"Fail: [red]{theme.name}[reset]\n\t{e}")
        destination_path.mkdir(parents=True, exist_ok=True)
        (destination_path / "index.html").write_text(get_error_page(theme, e))
    else:
        progress.log(f"Done: [green]{theme.name}[reset]")
    finally:
        progress.remove_task(task)
Example #6
0
    def index_object(self, engine, bucket, obj, progress=None, overall=None):
        """
        Read a file in S3, index it, and insert records into the table.
        """
        key, version, size = obj['Key'], obj['ETag'].strip('"'), obj['Size']
        key_id = self.insert_key(engine, key, version)

        # read the file from s3
        content = read_object(bucket, key)
        start_offset = 0
        records = {}

        # per-file progress bar
        rel_key = relative_key(key, self.s3_prefix)
        file_progress = progress and progress.add_task(f'[yellow]{rel_key}[/]',
                                                       total=size)

        # process each line (record)
        for line_num, line in enumerate(content.iter_lines()):
            row = orjson.loads(line)
            end_offset = start_offset + len(line) + 1  # newline

            try:
                for key_tuple in self.schema.index_builder(row):
                    if key_tuple in records:
                        records[key_tuple]['end_offset'] = end_offset
                    else:
                        records[key_tuple] = {
                            'key': key_id,
                            'start_offset': start_offset,
                            'end_offset': end_offset,
                        }
            except (KeyError, ValueError) as e:
                logging.warning('%s; skipping...', e)

            # update progress
            if progress:
                progress.update(file_progress, completed=end_offset)

            # track current file offset
            start_offset = end_offset

        # done with this object; tick the overall progress
        if progress:
            progress.remove_task(file_progress)
            progress.advance(overall, advance=size)

        # NOTE: Because this is called as a job, be sure and return a iterator
        #       and not the records as this is memory that is kept around for
        #       the entire duration of indexing.
        return key, ({
            **self.schema.column_values(k),
            **r
        } for k, r in records.items())
    def lint_project(self,
                     calling_class,
                     check_functions: list = None,
                     custom_check_files: bool = False,
                     is_subclass_calling=True) -> None:
        """Main linting function.
        Takes the template directory as the primary input and iterates through
        the different linting checks in order. Collects any warnings or errors
        and returns summary at completion. Raises an exception if there is a
        critical error that makes the rest of the tests pointless (eg. no
        project script). Results from this function are printed by the main script.

        :param calling_class: The class that calls the function -> used to get the class methods, which are the linting methods
        :param check_functions: List of functions of the calling class that should be checked. If not set, the default TemplateLinter check functions are called
        :param custom_check_files: Set to true if TemplateLinter check_files_exist should not be run
        :param is_subclass_calling: Indicates whether a domain specific linter calls the linting or not
        """
        # Called on its own, so not from a subclass -> run general linting
        if check_functions is None:
            # Fetch all general linting functions
            check_functions = [
                func for func in dir(TemplateLinter)
                if (callable(getattr(TemplateLinter, func))
                    and not func.startswith('_'))
            ]
            # Remove internal functions
            check_functions = list(
                set(check_functions).difference(
                    {'lint_project', 'print_results', 'check_version_match'}))
            log.debug(
                f'Linting functions of general linting are:\n {check_functions}'
            )
        # Some templates (e.g. latex based) do not adhere to the common programming based templates and therefore do not need to check for e.g. docs
        # or lint changelog
        if custom_check_files:
            check_functions.remove('check_files_exist')
            check_functions.remove('lint_changelog')

        progress = rich.progress.Progress(
            "[bold green]{task.description}",
            rich.progress.BarColumn(bar_width=None),
            "[bold yellow]{task.completed} of {task.total}[reset] [bold green]{task.fields[func_name]}",
        )
        with progress:
            lint_progress = progress.add_task("Running lint checks",
                                              total=len(check_functions),
                                              func_name=check_functions)
            for fun_name in check_functions:
                log.debug(f'Running linting function: {fun_name}')
                progress.update(lint_progress, advance=1, func_name=fun_name)
                if fun_name == 'check_files_exist':
                    getattr(calling_class, fun_name)(is_subclass_calling)
                else:
                    getattr(calling_class, fun_name)()
Example #8
0
    def delete_stale_keys(self, engine, objects, console=None):
        """
        Deletes all records indexed where the key...

         - no longer exists
         - has the wrong version
         - hasn't been fully indexed
        """
        logging.info('Finding stale keys...')
        keys = self.lookup_keys(engine)

        # all keys are considered stale initially
        stale_ids = set(map(lambda k: k['id'], keys.values()))
        indexed_keys = set()

        # loop over all the valid objects to be indexed
        for obj in objects:
            key, version = obj['Key'], obj['ETag'].strip('"')
            k = keys.get(key)

            # is this key already built and match versions?
            if k and k['version'] == version:
                stale_ids.remove(k['id'])
                indexed_keys.add(key)

        # delete stale keys
        if stale_ids:
            # if all the keys are stale, just drop the table
            if not indexed_keys:
                logging.info(f'Deleting table...')
                self.prepare(engine, rebuild=True)
            else:
                with rich.progress.Progress(console=console) as progress:
                    task = progress.add_task('[red]Deleting...[/]',
                                             total=len(stale_ids))
                    n = 0

                    # delete all the keys from the table
                    for kid in stale_ids:
                        sql = f'DELETE FROM {self.table.name} WHERE `key` = %s'
                        n += engine.execute(sql, kid).rowcount

                        # remove the key from the __Keys table
                        self.delete_key(engine, key)
                        progress.advance(task)

                    # show what was done
                    logging.info(f'Deleted {n:,} records')
        else:
            logging.info('No stale keys; delete skipped')

        # filter the objects that still need to be indexed
        return [o for o in objects if o['Key'] not in indexed_keys]
Example #9
0
def test_track_thread() -> None:
    progress = Progress()
    task_id = progress.add_task("foo")
    track_thread = _TrackThread(progress, task_id, 0.1)
    assert track_thread.completed == 0
    from time import sleep

    with track_thread:
        track_thread.completed = 1
        sleep(0.3)
        assert progress.tasks[task_id].completed >= 1
        track_thread.completed += 1
Example #10
0
    async def _download_jar(self) -> None:
        log.info("Downloading Lavalink.jar...")
        async with aiohttp.ClientSession(json_serialize=json.dumps) as session:
            async with session.get(LAVALINK_DOWNLOAD_URL) as response:
                if response.status == 404:
                    # A 404 means our LAVALINK_DOWNLOAD_URL is invalid, so likely the jar version
                    # hasn't been published yet
                    raise LavalinkDownloadFailed(
                        f"Lavalink jar version {JAR_VERSION}_{JAR_BUILD} hasn't been published "
                        "yet",
                        response=response,
                        should_retry=False,
                    )
                elif 400 <= response.status < 600:
                    # Other bad responses should be raised but we should retry just incase
                    raise LavalinkDownloadFailed(response=response,
                                                 should_retry=True)
                fd, path = tempfile.mkstemp()
                file = open(fd, "wb")
                nbytes = 0
                with rich.progress.Progress(
                        rich.progress.SpinnerColumn(),
                        rich.progress.TextColumn(
                            "[progress.description]{task.description}"),
                        rich.progress.BarColumn(),
                        rich.progress.TextColumn(
                            "[progress.percentage]{task.percentage:>3.0f}%"),
                        rich.progress.TimeRemainingColumn(),
                        rich.progress.TimeElapsedColumn(),
                ) as progress:
                    progress_task_id = progress.add_task(
                        "[red]Downloading Lavalink.jar",
                        total=response.content_length)
                    try:
                        chunk = await response.content.read(1024)
                        while chunk:
                            chunk_size = file.write(chunk)
                            nbytes += chunk_size
                            progress.update(progress_task_id,
                                            advance=chunk_size)
                            chunk = await response.content.read(1024)
                        file.flush()
                    finally:
                        file.close()

                shutil.move(path,
                            str(LAVALINK_JAR_FILE),
                            copy_function=shutil.copyfile)

        log.info("Successfully downloaded Lavalink.jar (%s bytes written)",
                 format(nbytes, ","))
        await self._is_up_to_date()
Example #11
0
    def singularity_pull_image(self, container, out_path, cache_path,
                               progress):
        """Pull a singularity image using ``singularity pull``

        Attempt to use a local installation of singularity to pull the image.

        Args:
            container (str): A pipeline's container name. Usually it is of similar format
                to ``nfcore/name:version``.

        Raises:
            Various exceptions possible from `subprocess` execution of Singularity.
        """
        output_path = cache_path or out_path

        # Pull using singularity
        address = "docker://{}".format(container.replace("docker://", ""))
        singularity_command = [
            "singularity", "pull", "--name", output_path, address
        ]
        log.debug("Building singularity image: {}".format(address))
        log.debug("Singularity command: {}".format(
            " ".join(singularity_command)))

        # Progress bar to show that something is happening
        task = progress.add_task(container,
                                 start=False,
                                 total=False,
                                 progress_type="singularity_pull",
                                 current_log="")

        # Run the singularity pull command
        proc = subprocess.Popen(
            singularity_command,
            stdout=subprocess.PIPE,
            stderr=subprocess.STDOUT,
            universal_newlines=True,
            bufsize=1,
        )
        for line in proc.stdout:
            log.debug(line.strip())
            progress.update(task, current_log=line.strip())

        # Copy cached download if we are using the cache
        if cache_path:
            log.debug("Copying {} from cache: '{}'".format(
                container, os.path.basename(out_path)))
            progress.update(
                task, current_log="Copying from cache to target directory")
            shutil.copyfile(cache_path, out_path)

        progress.remove_task(task)
Example #12
0
def test_expand_bar() -> None:
    console = Console(
        file=io.StringIO(),
        force_terminal=True,
        width=10,
        color_system="truecolor",
        legacy_windows=False,
        _environ={},
    )
    progress = Progress(
        BarColumn(bar_width=None),
        console=console,
        get_time=lambda: 1.0,
        auto_refresh=False,
    )
    progress.add_task("foo")
    with progress:
        pass
    expected = "\x1b[?25l\x1b[38;5;237m━━━━━━━━━━\x1b[0m\r\x1b[2K\x1b[38;5;237m━━━━━━━━━━\x1b[0m\n\x1b[?25h"
    render_result = console.file.getvalue()
    print("RESULT\n", repr(render_result))
    print("EXPECTED\n", repr(expected))
    assert render_result == expected
Example #13
0
def test_progress_with_none_total_renders_a_pulsing_bar() -> None:
    console = Console(
        file=io.StringIO(),
        force_terminal=True,
        width=10,
        color_system="truecolor",
        legacy_windows=False,
        _environ={},
    )
    progress = Progress(
        BarColumn(bar_width=None),
        console=console,
        get_time=lambda: 1.0,
        auto_refresh=False,
    )
    progress.add_task("foo", total=None)
    with progress:
        pass
    expected = "\x1b[?25l\x1b[38;2;153;48;86m━\x1b[0m\x1b[38;2;183;44;94m━\x1b[0m\x1b[38;2;209;42;102m━\x1b[0m\x1b[38;2;230;39;108m━\x1b[0m\x1b[38;2;244;38;112m━\x1b[0m\x1b[38;2;249;38;114m━\x1b[0m\x1b[38;2;244;38;112m━\x1b[0m\x1b[38;2;230;39;108m━\x1b[0m\x1b[38;2;209;42;102m━\x1b[0m\x1b[38;2;183;44;94m━\x1b[0m\r\x1b[2K\x1b[38;2;153;48;86m━\x1b[0m\x1b[38;2;183;44;94m━\x1b[0m\x1b[38;2;209;42;102m━\x1b[0m\x1b[38;2;230;39;108m━\x1b[0m\x1b[38;2;244;38;112m━\x1b[0m\x1b[38;2;249;38;114m━\x1b[0m\x1b[38;2;244;38;112m━\x1b[0m\x1b[38;2;230;39;108m━\x1b[0m\x1b[38;2;209;42;102m━\x1b[0m\x1b[38;2;183;44;94m━\x1b[0m\n\x1b[?25h"
    render_result = console.file.getvalue()
    print("RESULT\n", repr(render_result))
    print("EXPECTED\n", repr(expected))
    assert render_result == expected
Example #14
0
async def _my_stub(theme: Theme, progress: rich.progress.Progress):
    first = random.randint(1, 10) * 0.1
    second = random.randint(1, 10) * 0.1

    task = progress.add_task(theme.name, total=first + second)

    await asyncio.sleep(first)
    progress.advance(task, first)

    await asyncio.sleep(second)
    progress.advance(task, second)

    progress.log(f"Done: {theme}")
    progress.remove_task(task)
Example #15
0
    async def migrate_to(
        cls,
        new_driver_cls: Type["BaseDriver"],
        all_custom_group_data: Dict[str, Dict[str, Dict[str, int]]],
    ) -> None:
        """Migrate data from this backend to another.

        Both drivers must be initialized beforehand.

        This will only move the data - no instance metadata is modified
        as a result of this operation.

        Parameters
        ----------
        new_driver_cls
            Subclass of `BaseDriver`.
        all_custom_group_data : Dict[str, Dict[str, Dict[str, int]]]
            Dict mapping cog names, to cog IDs, to custom groups, to
            primary key lengths.

        """
        # Backend-agnostic method of migrating from one driver to another.
        with rich.progress.Progress(
            rich.progress.SpinnerColumn(),
            rich.progress.TextColumn("[progress.description]{task.description}"),
            RichIndefiniteBarColumn(),
            rich.progress.TextColumn("{task.completed} cogs processed"),
            rich.progress.TimeElapsedColumn(),
        ) as progress:
            cog_count = 0
            tid = progress.add_task("[yellow]Migrating", completed=cog_count, total=cog_count + 1)
            async for cog_name, cog_id in cls.aiter_cogs():
                progress.console.print(f"Working on {cog_name}...")

                this_driver = cls(cog_name, cog_id)
                other_driver = new_driver_cls(cog_name, cog_id)
                custom_group_data = all_custom_group_data.get(cog_name, {}).get(cog_id, {})
                exported_data = await this_driver.export_data(custom_group_data)
                await other_driver.import_data(exported_data, custom_group_data)

                cog_count += 1
                progress.update(tid, completed=cog_count, total=cog_count + 1)
            progress.update(tid, total=cog_count)
        print()
Example #16
0
def download_response(response: Response, download: typing.BinaryIO) -> None:
    console = rich.console.Console()
    console.print()
    content_length = response.headers.get("Content-Length")
    with rich.progress.Progress(
        "[progress.description]{task.description}",
        "[progress.percentage]{task.percentage:>3.0f}%",
        rich.progress.BarColumn(bar_width=None),
        rich.progress.DownloadColumn(),
        rich.progress.TransferSpeedColumn(),
    ) as progress:
        description = f"Downloading [bold]{rich.markup.escape(download.name)}"
        download_task = progress.add_task(
            description,
            total=int(content_length or 0),
            start=content_length is not None,
        )
        for chunk in response.iter_bytes():
            download.write(chunk)
            progress.update(download_task, completed=response.num_bytes_downloaded)
Example #17
0
def download_response(response: httpx.Response) -> None:
    console = rich.console.Console()
    syntax = rich.syntax.Syntax("", "http", theme="ansi_dark", word_wrap=True)
    console.print(syntax)

    filename = get_download_filename(response)
    content_length = response.headers.get("Content-Length")
    kwargs = {"total": int(content_length)} if content_length else {}
    with open(filename, mode="bw") as download_file:
        with rich.progress.Progress(
            "[progress.description]{task.description}",
            "[progress.percentage]{task.percentage:>3.0f}%",
            rich.progress.BarColumn(bar_width=None),
            rich.progress.DownloadColumn(),
            rich.progress.TransferSpeedColumn(),
        ) as progress:
            description = f"Downloading [bold]{filename}"
            download_task = progress.add_task(description, **kwargs)  # type: ignore
            for chunk in response.iter_bytes():
                download_file.write(chunk)
                progress.update(download_task, completed=response.num_bytes_downloaded)
Example #18
0
def test_wrap_file_task_total() -> None:
    console = Console(
        file=io.StringIO(),
        force_terminal=True,
        width=60,
        color_system="truecolor",
        legacy_windows=False,
        _environ={},
    )
    progress = Progress(console=console, )

    fd, filename = tempfile.mkstemp()
    with os.fdopen(fd, "wb") as f:
        total = f.write(b"Hello, World!")
    try:
        with progress:
            with open(filename, "rb") as file:
                task_id = progress.add_task("Reading", total=total)
                with progress.wrap_file(file, task_id=task_id) as f:
                    assert f.read() == b"Hello, World!"
    finally:
        os.remove(filename)
Example #19
0
def test_progress_max_refresh() -> None:
    """Test max_refresh argument."""
    time = 0.0

    def get_time() -> float:
        nonlocal time
        try:
            return time
        finally:
            time = time + 1.0

    console = Console(
        color_system=None,
        width=80,
        legacy_windows=False,
        force_terminal=True,
        _environ={},
    )
    column = TextColumn("{task.description}")
    column.max_refresh = 3
    progress = Progress(
        column,
        get_time=get_time,
        auto_refresh=False,
        console=console,
    )
    console.begin_capture()
    with progress:
        task_id = progress.add_task("start")
        for tick in range(6):
            progress.update(task_id, description=f"tick {tick}")
            progress.refresh()
    result = console.end_capture()
    print(repr(result))
    assert (
        result
        == "\x1b[?25l\r\x1b[2Kstart\r\x1b[2Kstart\r\x1b[2Ktick 1\r\x1b[2Ktick 1\r\x1b[2Ktick 3\r\x1b[2Ktick 3\r\x1b[2Ktick 5\r\x1b[2Ktick 5\n\x1b[?25h"
    )
Example #20
0
def test_reset() -> None:
    progress = Progress()
    task_id = progress.add_task("foo")
    progress.advance(task_id, 1)
    progress.advance(task_id, 1)
    progress.advance(task_id, 1)
    progress.advance(task_id, 7)
    task = progress.tasks[task_id]
    assert task.completed == 10
    progress.reset(
        task_id,
        total=200,
        completed=20,
        visible=False,
        description="bar",
        example="egg",
    )
    assert task.total == 200
    assert task.completed == 20
    assert task.visible == False
    assert task.description == "bar"
    assert task.fields == {"example": "egg"}
    assert not task._progress
Example #21
0
def get_filelist(run_module_names):
    """
    Go through all supplied search directories and assembly a master
    list of files to search. Then fire search functions for each file.
    """
    # Prep search patterns
    spatterns = [{}, {}, {}, {}, {}, {}, {}]
    runtimes["sp"] = defaultdict()
    ignored_patterns = []
    skipped_patterns = []
    for key, sps in config.sp.items():
        mod_name = key.split("/", 1)[0]
        if mod_name.lower() not in [m.lower() for m in run_module_names]:
            ignored_patterns.append(key)
            continue
        files[key] = list()
        if not isinstance(sps, list):
            sps = [sps]

        # Warn if we have any unrecognised search pattern keys
        expected_sp_keys = [
            "fn",
            "fn_re",
            "contents",
            "contents_re",
            "num_lines",
            "shared",
            "skip",
            "max_filesize",
            "exclude_fn",
            "exclude_fn_re",
            "exclude_contents",
            "exclude_contents_re",
        ]
        unrecognised_keys = [
            y for x in sps for y in x.keys() if y not in expected_sp_keys
        ]
        if len(unrecognised_keys) > 0:
            logger.warning(
                "Unrecognised search pattern keys for '{}': {}".format(
                    key, ", ".join(unrecognised_keys)))

        # Check if we are skipping this search key
        if any([x.get("skip") for x in sps]):
            skipped_patterns.append(key)

        # Split search patterns according to speed of execution.
        if any([x for x in sps if "contents_re" in x]):
            if any([x for x in sps if "num_lines" in x]):
                spatterns[4][key] = sps
            elif any([x for x in sps if "max_filesize" in x]):
                spatterns[5][key] = sps
            else:
                spatterns[6][key] = sps
        elif any([x for x in sps if "contents" in x]):
            if any([x for x in sps if "num_lines" in x]):
                spatterns[1][key] = sps
            elif any([x for x in sps if "max_filesize" in x]):
                spatterns[2][key] = sps
            else:
                spatterns[3][key] = sps
        else:
            spatterns[0][key] = sps

    if len(ignored_patterns) > 0:
        logger.debug(
            "Ignored {} search patterns as didn't match running modules.".
            format(len(ignored_patterns)))

    if len(skipped_patterns) > 0:
        logger.info("Skipping {} file search patterns".format(
            len(skipped_patterns)))
        logger.debug("Skipping search patterns: {}".format(
            ", ".join(skipped_patterns)))

    def add_file(fn, root):
        """
        Function applied to each file found when walking the analysis
        directories. Runs through all search patterns and returns True
        if a match is found.
        """
        f = {"fn": fn, "root": root}

        # Check that this is a file and not a pipe or anything weird
        if not os.path.isfile(os.path.join(root, fn)):
            file_search_stats["skipped_not_a_file"] += 1
            return False

        # Check that we don't want to ignore this file
        i_matches = [
            n for n in config.fn_ignore_files if fnmatch.fnmatch(fn, n)
        ]
        if len(i_matches) > 0:
            logger.debug(
                "Ignoring file as matched an ignore pattern: {}".format(fn))
            file_search_stats["skipped_ignore_pattern"] += 1
            return False

        # Limit search to small files, to avoid 30GB FastQ files etc.
        try:
            f["filesize"] = os.path.getsize(os.path.join(root, fn))
        except (IOError, OSError, ValueError, UnicodeDecodeError):
            logger.debug(
                "Couldn't read file when checking filesize: {}".format(fn))
        else:
            if f["filesize"] > config.log_filesize_limit:
                file_search_stats["skipped_filesize_limit"] += 1
                return False

        # Test file for each search pattern
        file_matched = False
        for patterns in spatterns:
            for key, sps in patterns.items():
                start = time.time()
                for sp in sps:
                    if search_file(sp, f, key):
                        # Check that we shouldn't exclude this file
                        if not exclude_file(sp, f):
                            # Looks good! Remember this file
                            files[key].append(f)
                            file_search_stats[key] = file_search_stats.get(
                                key, 0) + 1
                            file_matched = True
                        # Don't keep searching this file for other modules
                        if not sp.get("shared", False):
                            runtimes["sp"][key] = runtimes["sp"].get(
                                key, 0) + (time.time() - start)
                            return True
                        # Don't look at other patterns for this module
                        else:
                            break
                runtimes["sp"][key] = runtimes["sp"].get(
                    key, 0) + (time.time() - start)

        return file_matched

    # Go through the analysis directories and get file list
    multiqc_installation_dir_files = [
        "LICENSE",
        "CHANGELOG.md",
        "Dockerfile",
        "MANIFEST.in",
        ".gitmodules",
        "README.md",
        "CSP.txt",
        "setup.py",
        ".gitignore",
    ]
    total_sp_starttime = time.time()
    for path in config.analysis_dir:
        if os.path.islink(path) and config.ignore_symlinks:
            file_search_stats["skipped_symlinks"] += 1
            continue
        elif os.path.isfile(path):
            searchfiles.append([os.path.basename(path), os.path.dirname(path)])
        elif os.path.isdir(path):
            for root, dirnames, filenames in os.walk(
                    path, followlinks=(not config.ignore_symlinks),
                    topdown=True):
                bname = os.path.basename(root)

                # Skip any sub-directories matching ignore params
                orig_dirnames = dirnames[:]
                for n in config.fn_ignore_dirs:
                    dirnames[:] = [
                        d for d in dirnames
                        if not fnmatch.fnmatch(d, n.rstrip(os.sep))
                    ]
                    if len(orig_dirnames) != len(dirnames):
                        removed_dirs = [
                            os.path.join(root, d)
                            for d in set(orig_dirnames).symmetric_difference(
                                set(dirnames))
                        ]
                        logger.debug(
                            "Ignoring directory as matched fn_ignore_dirs: {}".
                            format(", ".join(removed_dirs)))
                        orig_dirnames = dirnames[:]
                for n in config.fn_ignore_paths:
                    dirnames[:] = [
                        d for d in dirnames if not fnmatch.fnmatch(
                            os.path.join(root, d), n.rstrip(os.sep))
                    ]
                    if len(orig_dirnames) != len(dirnames):
                        removed_dirs = [
                            os.path.join(root, d)
                            for d in set(orig_dirnames).symmetric_difference(
                                set(dirnames))
                        ]
                        logger.debug(
                            "Ignoring directory as matched fn_ignore_paths: {}"
                            .format(", ".join(removed_dirs)))

                # Skip *this* directory if matches ignore params
                d_matches = [
                    n for n in config.fn_ignore_dirs
                    if fnmatch.fnmatch(bname, n.rstrip(os.sep))
                ]
                if len(d_matches) > 0:
                    logger.debug(
                        "Ignoring directory as matched fn_ignore_dirs: {}".
                        format(bname))
                    continue
                p_matches = [
                    n for n in config.fn_ignore_paths
                    if fnmatch.fnmatch(root, n.rstrip(os.sep))
                ]
                if len(p_matches) > 0:
                    logger.debug(
                        "Ignoring directory as matched fn_ignore_paths: {}".
                        format(root))
                    continue

                # Sanity check - make sure that we're not just running in the installation directory
                if len(filenames) > 0 and all(
                    [fn in filenames
                     for fn in multiqc_installation_dir_files]):
                    logger.error(
                        "Error: MultiQC is running in source code directory! {}"
                        .format(root))
                    logger.warning(
                        "Please see the docs for how to use MultiQC: https://multiqc.info/docs/#running-multiqc"
                    )
                    dirnames[:] = []
                    filenames[:] = []
                    continue

                # Search filenames in this directory
                for fn in filenames:
                    searchfiles.append([fn, root])

    # Search through collected files
    progress_obj = rich.progress.Progress(
        "[blue]|[/]      ",
        rich.progress.SpinnerColumn(),
        "[blue]{task.description}[/] |",
        rich.progress.BarColumn(),
        "[progress.percentage]{task.percentage:>3.0f}%",
        "[green]{task.completed}/{task.total}",
        "[dim]{task.fields[s_fn]}",
    )
    with progress_obj as progress:
        mqc_task = progress.add_task("searching",
                                     total=len(searchfiles),
                                     s_fn="")
        for sf in searchfiles:
            progress.update(mqc_task,
                            advance=1,
                            s_fn=os.path.join(sf[1], sf[0])[-50:])
            if not add_file(sf[0], sf[1]):
                file_search_stats["skipped_no_match"] += 1
        progress.update(mqc_task, s_fn="")

    runtimes["total_sp"] = time.time() - total_sp_starttime
Example #22
0
    def get_singularity_images(self):
        """Loop through container names and download Singularity images"""

        if len(self.containers) == 0:
            log.info("No container names found in workflow")
        else:
            with DownloadProgress() as progress:
                task = progress.add_task("all_containers",
                                         total=len(self.containers),
                                         progress_type="summary")

                # Organise containers based on what we need to do with them
                containers_exist = []
                containers_cache = []
                containers_download = []
                containers_pull = []
                for container in self.containers:

                    # Fetch the output and cached filenames for this container
                    out_path, cache_path = self.singularity_image_filenames(
                        container)

                    # Check that the directories exist
                    out_path_dir = os.path.dirname(out_path)
                    if not os.path.isdir(out_path_dir):
                        log.debug(
                            f"Output directory not found, creating: {out_path_dir}"
                        )
                        os.makedirs(out_path_dir)
                    if cache_path:
                        cache_path_dir = os.path.dirname(cache_path)
                        if not os.path.isdir(cache_path_dir):
                            log.debug(
                                f"Cache directory not found, creating: {cache_path_dir}"
                            )
                            os.makedirs(cache_path_dir)

                    # We already have the target file in place, return
                    if os.path.exists(out_path):
                        containers_exist.append(container)
                        continue

                    # We have a copy of this in the NXF_SINGULARITY_CACHE dir
                    if cache_path and os.path.exists(cache_path):
                        containers_cache.append(
                            [container, out_path, cache_path])
                        continue

                    # Direct download within Python
                    if container.startswith("http"):
                        containers_download.append(
                            [container, out_path, cache_path])
                        continue

                    # Pull using singularity
                    containers_pull.append([container, out_path, cache_path])

                # Exit if we need to pull images and Singularity is not installed
                if len(containers_pull) > 0 and shutil.which(
                        "singularity") is None:
                    raise OSError(
                        "Singularity is needed to pull images, but it is not installed"
                    )

                # Go through each method of fetching containers in order
                for container in containers_exist:
                    progress.update(task, description="Image file exists")
                    progress.update(task, advance=1)

                for container in containers_cache:
                    progress.update(
                        task,
                        description=f"Copying singularity images from cache")
                    self.singularity_copy_cache_image(*container)
                    progress.update(task, advance=1)

                with concurrent.futures.ThreadPoolExecutor(
                        max_workers=self.parallel_downloads) as pool:
                    progress.update(
                        task, description="Downloading singularity images")

                    # Kick off concurrent downloads
                    future_downloads = [
                        pool.submit(self.singularity_download_image,
                                    *container, progress)
                        for container in containers_download
                    ]

                    # Make ctrl-c work with multi-threading
                    self.kill_with_fire = False

                    try:
                        # Iterate over each threaded download, waiting for them to finish
                        for future in concurrent.futures.as_completed(
                                future_downloads):
                            try:
                                future.result()
                            except Exception:
                                raise
                            else:
                                try:
                                    progress.update(task, advance=1)
                                except Exception as e:
                                    log.error(
                                        f"Error updating progress bar: {e}")

                    except KeyboardInterrupt:
                        # Cancel the future threads that haven't started yet
                        for future in future_downloads:
                            future.cancel()
                        # Set the variable that the threaded function looks for
                        # Will trigger an exception from each thread
                        self.kill_with_fire = True
                        # Re-raise exception on the main thread
                        raise

                for container in containers_pull:
                    progress.update(task,
                                    description="Pulling singularity images")
                    try:
                        self.singularity_pull_image(*container, progress)
                    except RuntimeWarning as r:
                        # Raise exception if this is not possible
                        log.error(
                            "Not able to pull image. Service might be down or internet connection is dead."
                        )
                        raise r
                    progress.update(task, advance=1)
Example #23
0
    def singularity_download_image(self, container, out_path, cache_path,
                                   progress):
        """Download a singularity image from the web.

        Use native Python to download the file.

        Args:
            container (str): A pipeline's container name. Usually it is of similar format
                to ``https://depot.galaxyproject.org/singularity/name:version``
            out_path (str): The final target output path
            cache_path (str, None): The NXF_SINGULARITY_CACHEDIR path if set, None if not
            progress (Progress): Rich progress bar instance to add tasks to.
        """
        log.debug(f"Downloading Singularity image: '{container}'")

        # Set output path to save file to
        output_path = cache_path or out_path
        output_path_tmp = f"{output_path}.partial"
        log.debug(f"Downloading to: '{output_path_tmp}'")

        # Set up progress bar
        nice_name = container.split("/")[-1][:50]
        task = progress.add_task(nice_name,
                                 start=False,
                                 total=False,
                                 progress_type="download")
        try:
            # Delete temporary file if it already exists
            if os.path.exists(output_path_tmp):
                os.remove(output_path_tmp)

            # Open file handle and download
            with open(output_path_tmp, "wb") as fh:
                # Disable caching as this breaks streamed downloads
                with requests_cache.disabled():
                    r = requests.get(container,
                                     allow_redirects=True,
                                     stream=True,
                                     timeout=60 * 5)
                    filesize = r.headers.get("Content-length")
                    if filesize:
                        progress.update(task, total=int(filesize))
                        progress.start_task(task)

                    # Stream download
                    for data in r.iter_content(chunk_size=4096):
                        # Check that the user didn't hit ctrl-c
                        if self.kill_with_fire:
                            raise KeyboardInterrupt
                        progress.update(task, advance=len(data))
                        fh.write(data)

            # Rename partial filename to final filename
            os.rename(output_path_tmp, output_path)
            output_path_tmp = None

            # Copy cached download if we are using the cache
            if cache_path:
                log.debug("Copying {} from cache: '{}'".format(
                    container, os.path.basename(out_path)))
                progress.update(
                    task, description="Copying from cache to target directory")
                shutil.copyfile(cache_path, out_path)

            progress.remove_task(task)

        except:
            # Kill the progress bars
            for t in progress.task_ids:
                progress.remove_task(t)
            # Try to delete the incomplete download
            log.debug(
                f"Deleting incompleted singularity image download:\n'{output_path_tmp}'"
            )
            if output_path_tmp and os.path.exists(output_path_tmp):
                os.remove(output_path_tmp)
            if output_path and os.path.exists(output_path):
                os.remove(output_path)
            # Re-raise the caught exception
            raise
Example #24
0
def main():
    parser = argparse.ArgumentParser(prog='hasher', description='hash files.')
    parser.add_argument('algo',
                        type=str,
                        choices=hashlib.algorithms_available,
                        help='one of these hash algorithms')
    parser.add_argument('input',
                        type=str,
                        help='file path, omit if reading from stdin',
                        nargs='*')
    parser.add_argument('--version',
                        action='version',
                        version=f'%(prog)s {__version__}')
    parser.add_argument('-b',
                        '--buffer-size',
                        default=65536,
                        type=int,
                        nargs='?',
                        help='buffer size. default 65536')
    parser.add_argument('-c',
                        '--checksum_file',
                        type=str,
                        nargs='?',
                        help='checksum file to check against')
    parser.add_argument('-g',
                        '--glob',
                        action='store_true',
                        help='treat input as glob pattern')
    parser.add_argument('-p',
                        '--parallel',
                        default=1,
                        type=int,
                        nargs='?',
                        help='parallel count')
    parser.add_argument('--progress',
                        action='store_true',
                        help='print progress bar to stderr')
    args = parser.parse_args()

    checksum_file = args.checksum_file
    if checksum_file and not os.path.exists(checksum_file):
        raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT),
                                checksum_file)

    # If not connected to a tty device, i.e. terminal, ignore input argument and read from stdin.
    hashed_lst = []
    if not sys.stdin.isatty():
        # read from stdin
        hashed = _hash_stream(args.algo, lambda: sys.stdin.buffer.read(4096))
        hashed_lst.append(
            Hashed(input_name='-',
                   algo=hashed.name,
                   hex=hashed.hexdigest(),
                   mode='b'))
        _print_hashed(hashed_lst)  # print result
    else:
        # read from files
        paths = []
        expected_hashes: typ.Dict[str, Hashed] = None
        # if checksum file provided, hash file in it. Otherwise use input.
        if checksum_file:
            matches = [m for m in parse_checksum_file(checksum_file)]
            paths = [m.groupdict()['input_name'] for m in matches]
            # construct dict from parsed values
            # input_name -> Hashed(input_name, hex, mode)
            expected_hashes = {
                d['input_name']: Hashed(input_name=d['input_name'],
                                        hex=d['hex'],
                                        mode='b' if d['mode'] == '*' else 't')
                for d in [m.groupdict() for m in matches]
            }
        else:
            if args.glob:
                [
                    paths.extend(glob.glob(path, recursive=True))
                    for path in args.input
                ]
            else:
                paths = args.input

        rich_print = False
        if args.progress:
            try:
                import rich.console
                import rich.progress
                rich_print = True
            except ImportError as e:
                print(f'Warning: Failed to import \'rich\'. {e}',
                      file=sys.stderr)

        if rich_print:
            console = rich.console.Console(stderr=True)
            with rich.progress.Progress(console=console,
                                        transient=True) as progress:
                task_id = progress.add_task(total=len(paths),
                                            description='Hashing...')
                _hash_paths(paths, hashed_lst, args, task_id, progress)
        else:
            _hash_paths(paths, hashed_lst, args)

        if expected_hashes is not None:
            """Match hex from file to actual file from filesystem.
            Result could be one of [match, mismatch, file not found]"""
            mismatch = 0
            not_found = 0
            ok = 0

            actual_hashes: typ.Dict[str, Hashed] = {
                h.input_name: h
                for h in hashed_lst
            }
            for filename, expected in expected_hashes.items():
                if filename not in actual_hashes:
                    # should not happens
                    print(f'{filename}: No candidate?', file=sys.stderr)
                    not_found += 1
                actual = actual_hashes[filename]
                if actual.err is not None and len(actual.err) > 0:
                    print(f'{filename}: {actual.err}', file=sys.stderr)
                    not_found += 1
                else:
                    if expected.hex == actual.hex:
                        print(f'{filename}: OK')
                        ok += 1
                    else:
                        print(f'{filename}: Mismatch')
                        mismatch += 1

            print(f'Total {len(expected_hashes.keys())} files',
                  file=sys.stderr)
            print(f'{ok} file{"s" if ok > 1 else ""} OK', file=sys.stderr)
            if not_found > 0:
                print(
                    f'{not_found} file{"s" if not_found > 1 else ""} cannot be found',
                    file=sys.stderr)
            if mismatch > 0:
                print(
                    f'{mismatch} file{"s" if mismatch > 1 else ""} checksum mismatch',
                    file=sys.stderr)
        else:
            _print_hashed(hashed_lst)
Example #25
0
def run(prefix, archive, program, prefix_specified, copy_threshold,
        distance_threshold):
    start = timer()
    failed_runs = []
    error_runs = []
    lagrange_runner = lagrange.lagrange(program)

    console = rich.console.Console()
    linreg_xs = []
    linreg_ys = []

    with rich.progress.Progress() as progress:
        jobs = directory.extractTarFileAndMakeDirectories(
            archive, prefix, progress)

        random.shuffle(jobs)

        work_task = progress.add_task("[red]Running...", total=len(jobs))
        for expected, experiment in jobs:
            try:
                experiment.runExperiment(lagrange_runner)
            except directory.ExperimentFilesMissing:
                error_runs.append(experiment)
            progress.update(work_task, advance=1.0)

        check_task = progress.add_task("[red]Checking...", total=len(jobs))

        for expected, experiment in jobs:
            if experiment.failed():
                continue
            parameter_diff = expected.parameterVectorDifference(experiment)
            try:
                dist = expected.metricCompare(experiment)
            except:
                experiment.setFailed()
                continue

            linreg_xs.append(parameter_diff)
            linreg_ys.append(dist)
            if dist > distance_threshold:
                failed_runs.append(
                    directory.ExperimentWithDistance(experiment, dist))
            progress.update(check_task, advance=1.0)

    if len(linreg_xs) > 0:
        linreg_result = LinearRegression().fit(linreg_xs, linreg_ys)
        linreg_rsquared = linreg_result.score(linreg_xs, linreg_ys)
        console.print("Parameter error regression coefficient: {}".format(
            linreg_rsquared))

    with open(os.path.join(prefix, "failed_paths.yaml"), "w") as outfile:
        yaml.add_representer(directory.ExpectedTrialDirectory,
                             directory.DirectoryRepresenter)
        yaml.add_representer(directory.ExperimentTrialDirectory,
                             directory.DirectoryRepresenter)
        yaml.add_representer(directory.ExperimentWithDistance,
                             directory.ExperimentWithDistanceRepresenter)
        outfile.write(
            yaml.dump({
                "failed-runs": failed_runs,
                "error-runs": error_runs
            }))

    if len(failed_runs) != 0 or len(error_runs) != 0:
        if len(failed_runs) != 0:
            console.print(
                "Tests that completed, but gave a wrong result (top 10):",
                sorted(failed_runs, key=lambda a: a._dist)[-10:])
            console.print(
                "Total of {} ({}%) jobs resulted in errors over tolerance".
                format(len(failed_runs),
                       len(failed_runs) / len(jobs) * 100))
        if len(error_runs) != 0:
            console.print("Tests that failed to complete:",
                          sorted(error_runs, key=lambda d: d._path))
            console.print("Total of {} ({}%) jobs failed to run".format(
                len(error_runs),
                len(error_runs) / len(jobs) * 100))
        if not prefix_specified and (
            (len(failed_runs) > copy_threshold and not linreg_rsquared > 0.95)
                or len(error_runs) != 0):
            basename = os.path.split(prefix)[1]
            new_prefix = os.path.abspath(os.path.join(os.getcwd(), basename))
            console.print(
                "Copying the failed directories to {}".format(new_prefix))
            shutil.copytree(prefix, new_prefix)

    else:
        console.print("[bold green]All Clear!")
    end = timer()
    console.print("Testing took {:.3f} seconds".format(end - start))
Example #26
0
    def build(self, config, engine, use_lambda=False, workers=3, console=None):
        """
        Builds the index table for objects in S3.
        """
        logging.info('Finding keys in %s...', self.s3_prefix)
        s3_objects = list(
            list_objects(config.s3_bucket, self.s3_prefix, exclude='_SUCCESS'))

        # delete all stale keys; get the list of objects left to index
        objects = self.delete_stale_keys(engine, s3_objects, console=console)

        # calculate the total size of all the objects
        total_size = functools.reduce(lambda a, b: a + b['Size'], objects, 0)

        # progress format
        p_fmt = [
            "[progress.description]{task.description}",
            rich.progress.BarColumn(),
            rich.progress.FileSizeColumn(),
            "[progress.percentage]{task.percentage:>3.0f}%"
        ]

        if objects:
            self.schema.drop_index(engine, self.table)

            # as each job finishes...
            with rich.progress.Progress(*p_fmt, console=console) as progress:
                overall = progress.add_task('[green]Indexing keys...[/]',
                                            total=total_size)

                # read several files in parallel
                pool = concurrent.futures.ThreadPoolExecutor(
                    max_workers=workers)

                # index the objects remotely using lambda or locally
                if use_lambda:
                    self.index_objects_remote(
                        config,
                        engine,
                        pool,
                        objects,
                        progress,
                        overall,
                    )
                else:
                    self.index_objects_local(
                        config,
                        engine,
                        pool,
                        objects,
                        progress,
                        overall,
                    )

                # finally, build the index after all inserts are done
                logging.info('Building table index...')

            # each table knows how to build its own index
            self.schema.create_index(engine, self.table)

        # set the built flag for the index
        self.set_built_flag(engine, True)

        # done indexing
        logging.info('Index is up to date')
Example #27
0
def make_report(spec: Spec, storage: Storage, output: Path) -> None:
    print(storage.get_branches())
    msg.info("Begin report generation")
    global github_project
    github_project = spec.github_project

    with rich.progress.Progress() as progress:
        task = progress.add_task("Creating dataframe",
                                 total=storage.num_runs())

        def update():
            progress.advance(task)

        df, metrics_by_group = storage.dataframe(with_metrics=True,
                                                 progress_callback=update)

    metrics_by_group = {
        g: list(filter(lambda m: spec.report_filter(m, df), ms))
        for g, ms in metrics_by_group.items()
    }

    msg.good("Dataframe created")

    env = make_environment()
    env.globals["metrics"] = metrics_by_group
    env.globals["github_project"] = spec.github_project

    copy_static(output)

    global current_url

    # start page
    tpl = env.get_template("index.html.j2")
    current_url = "/"
    (output / "index.html").write_text(tpl.render())

    group_tpl = env.get_template("group.html.j2")

    for group, metrics in metrics_by_group.items():
        msg.info(f"Group: {group}")

        with ProcessPoolExecutor() as ex:
            futures = [
                ex.submit(
                    process_metric,
                    m,
                    df,
                    output,
                    metrics_by_group,
                    spec.github_project,
                    spec.report_num_commits,
                ) for m in metrics
            ]
            for f in rich.progress.track(as_completed(futures),
                                         total=len(futures)):
                metric = f.result()
                print(metric.name)
            msg.good(f"Completed group {group}")

        # for metric in rich.progress.track(metrics):
        #     process_metric(metric, df, output, env)

        url = group_url(group)
        page = output / url / "index.html"

        with push_url(url):
            page.write_text(group_tpl.render(group=group))
Example #28
0
    def __init__(self,
                 kernel,
                 bin,
                 profile,
                 mutop,
                 timeout=30,
                 fitness='time',
                 popsize=128,
                 llvm_src_filename='cuda-device-only-kernel.ll',
                 use_fitness_map=True,
                 combine_positive_epistasis=False,
                 CXPB=0.8,
                 MUPB=0.1,
                 err_rate='0.01',
                 global_seed=None):
        self.CXPB = CXPB
        self.MUPB = MUPB
        self.err_rate = err_rate
        self.kernels = kernel
        self.appBinary = bin
        self.timeout = timeout
        self.fitness_function = fitness
        self.use_fitness_map = use_fitness_map
        self.combine_positive_epistasis = combine_positive_epistasis
        self.popsize = popsize
        self.mutop = mutop.split(',')
        self.rng = {}
        if global_seed is not None:
            random.seed(global_seed)

        try:
            with open(llvm_src_filename, 'r') as f:
                self.initSrcEnc = f.read().encode()
        except IOError:
            print("File {} does not exist".format(llvm_src_filename))
            exit(1)

        self.verifier = profile['verify']

        # Tools initialization
        # Detect GPU property
        cuda.init()
        # TODO: check if there are multiple GPUs.
        SM_MAJOR, SM_MINOR = cuda.Device(0).compute_capability()
        self.mgpu = 'sm_' + str(SM_MAJOR) + str(SM_MINOR)
        print(f'[Initializing GEVO] GPU compute capability: {self.mgpu}')

        # check Nvidia Profiler exists
        self.nvprof_path = shutil.which('nvprof')
        if self.nvprof_path is None:
            raise Exception('nvprof cannot be found')
        print(f'[Initializing GEVO] nvprof detected: {self.nvprof_path}')

        # Minimize both performance and error
        creator.create("FitnessMin", base.Fitness, weights=(-1.0, -1.0))
        creator.create("Individual",
                       irind.llvmIRrep,
                       fitness=creator.FitnessMin)
        self.history = tools.History()
        self.toolbox = base.Toolbox()
        self.toolbox.register('mutate', self.mutLLVM)
        self.toolbox.register('mate', self.cxOnePointLLVM)
        # self.toolbox.register('select', tools.selDoubleTournament, fitness_size=2, parsimony_size=1.4, fitness_first=True)
        self.toolbox.register('select', tools.selNSGA2)
        self.toolbox.register('individual',
                              creator.Individual,
                              srcEnc=self.initSrcEnc,
                              mgpu=self.mgpu)
        self.toolbox.register('population', tools.initRepeat, list,
                              self.toolbox.individual)
        # Decorate the variation operators
        self.toolbox.decorate("mate", self.history.decorator)
        self.toolbox.decorate("mutate", self.history.decorator)

        self.stats = tools.Statistics(lambda ind: ind.fitness.values)
        self.stats.register("min", min)
        self.stats.register("max", max)

        self.logbook = tools.Logbook()
        self.paretof = tools.ParetoFront()
        self.logbook.header = "gen", "evals", "min", "max"

        # Set up testcase
        self.origin = creator.Individual(self.initSrcEnc, self.mgpu)
        self.origin.ptx(self.cudaPTX)
        arg_array = [[]]
        for i, arg in enumerate(profile['args']):
            if arg.get('bond', None) is None:
                arg_array_next = [
                    e[:] for e in arg_array for _ in range(len(arg['value']))
                ]
                arg_array = arg_array_next
                for e1, e2 in zip(arg_array, cycle(arg['value'])):
                    e1.append(e2)
            else:
                for e in arg_array:
                    bonded_arg = arg['bond'][0]
                    bonded_idx = profile['args'][bonded_arg]['value'].index(
                        e[bonded_arg])
                    e.append(arg['value'][bonded_idx])

        arg_array = [[str(e) for e in args] for args in arg_array]

        self.testcase = []
        for i in range(len(arg_array)):
            self.testcase.append(
                self._testcase(self, i, kernel, bin, profile['verify']))
        with Progress(
                "[Initializing GEVO] Evaluate original program with test cases",
                "({task.completed} / {task.total})",
                rich.progress.TimeElapsedColumn()) as progress:
            task = progress.add_task("", total=len(arg_array))
            for tc, arg in zip(self.testcase, arg_array):
                tc.args = arg
                tc.evaluate()
                progress.update(task, advance=1)

        self.ofits = [tc.fitness[0] for tc in self.testcase]
        self.oerrs = [tc.fitness[1] for tc in self.testcase]
        self.origin.fitness.values = (sum(self.ofits) / len(self.ofits),
                                      max(self.oerrs))
        self.editFitMap[tuple()] = self.origin.fitness.values
        print(
            f"Average fitness of the original program: ({self.origin.fitness.values[0]:.2f}, {self.origin.fitness.values[1]:.2f})"
        )
        print("Individual test cases:")
        for fit, err in zip(self.ofits, self.oerrs):
            print(f"\t({fit:.2f}, {err:.2f})")
        self.positive_epistasis = {}
        self.negative_epistasis = {}
        self.need_discussion = {}