Exemple #1
0
    def _get_poetry_env(project_dir: Path):
        from clikit.io import ConsoleIO
        from poetry.factory import Factory
        from poetry.utils.env import EnvManager

        poetry = Factory().create_poetry(project_dir)
        # TODO: unify ConsoleIO with ui.output
        return EnvManager(poetry).create_venv(ConsoleIO())
Exemple #2
0
def bundle(zip_file,
           source_dir,
           *more_root_files,
           build_dir=None,
           clean=False,
           verbose=False):
    "Bundle the package into a ZIP file."

    io = ConsoleIO()
    if args.verbose:
        io.set_verbosity(DEBUG)

    with _build_directory(build_dir) as build_dir:
        build_dir = Path(build_dir)
        poetry = Factory().create_poetry(cwd=source_dir, io=io)
        env = _sane_env(poetry, build_dir, io, clean=clean)
        _install_deps(poetry, env, io)
        _install_package(poetry, env, io)
        _zip(zip_file, build_dir, *[source_dir / f for f in more_root_files])
Exemple #3
0
    async def go():
        io = ConsoleIO()
        io.error("<info>Building...</info>")
        async with stan.common.HttpstanClient() as client:
            # Check to see if model is in cache.
            model_name = httpstan.models.calculate_model_name(program_code)
            resp = await client.post(f"/{model_name}/params",
                                     json={"data": data})
            model_in_cache = resp.status != 404
            io.error("\n" if model_in_cache else " This may take some time.\n")

            # Note: during compilation `httpstan` redirects stderr to /dev/null, making `print` impossible.
            resp = await client.post("/models",
                                     json={"program_code": program_code})
            if resp.status != 201:
                raise RuntimeError(resp.json()["message"])
            assert model_name == resp.json()["name"]
            if resp.json().get("stanc_warnings"):
                io.error_line(
                    "<comment>Messages from <fg=cyan;options=bold>stanc</>:</comment>"
                )
                io.error_line(resp.json()["stanc_warnings"])

            resp = await client.post(f"/{model_name}/params",
                                     json={"data": data})
            if resp.status != 200:
                raise RuntimeError(resp.json()["message"])
            params_list = resp.json()["params"]
            assert len({param["name"]
                        for param in params_list}) == len(params_list)
            param_names, dims = zip(*((param["name"], param["dims"])
                                      for param in params_list))
            constrained_param_names = sum((tuple(param["constrained_names"])
                                           for param in params_list), ())
            if model_in_cache:
                io.error("<comment>Found model in cache.</comment> ")
            io.error_line("<info>Done.</info>")
            return Model(model_name, program_code, data, param_names,
                         constrained_param_names, dims, random_seed)
Exemple #4
0
        async def go():
            io = ConsoleIO()
            io.error_line("<info>Sampling...</info>")
            progress_bar = ProgressBar(io)
            progress_bar.set_format("very_verbose")

            current_and_max_iterations_re = re.compile(
                r"Iteration:\s+(\d+)\s+/\s+(\d+)")
            async with stan.common.HttpstanClient() as client:
                operations = []
                for payload in payloads:
                    resp = await client.post(f"/{self.model_name}/fits",
                                             json=payload)
                    if resp.status == 422:
                        raise ValueError(str(resp.json()))
                    elif resp.status != 201:
                        raise RuntimeError(resp.json()["message"])
                    assert resp.status == 201
                    operations.append(resp.json())

                # poll to get progress for each chain until all chains finished
                current_iterations = {}
                while not all(operation["done"] for operation in operations):
                    for operation in operations:
                        if operation["done"]:
                            continue
                        resp = await client.get(f"/{operation['name']}")
                        assert resp.status != 404
                        operation.update(resp.json())
                        progress_message = operation["metadata"].get(
                            "progress")
                        if not progress_message:
                            continue
                        iteration, iteration_max = map(
                            int,
                            current_and_max_iterations_re.findall(
                                progress_message).pop(0))
                        if not progress_bar.get_max_steps(
                        ):  # i.e., has not started
                            progress_bar.start(max=iteration_max * num_chains)
                        current_iterations[operation["name"]] = iteration
                        progress_bar.set_progress(
                            sum(current_iterations.values()))
                    await asyncio.sleep(0.01)
                # Sampling has finished. But we do not call `progress_bar.finish()` right
                # now. First we write informational messages to the screen, then we
                # redraw the (complete) progress bar. Only after that do we call `finish`.

                stan_outputs = []
                for operation in operations:
                    fit_name = operation["result"].get("name")
                    if fit_name is None:  # operation["result"] is an error
                        assert not str(operation["result"]["code"]).startswith(
                            "2"), operation
                        raise RuntimeError(operation["result"]["message"])
                    resp = await client.get(f"/{fit_name}")
                    if resp.status != 200:
                        raise RuntimeError((resp.json())["message"])
                    stan_outputs.append(resp.content)

                    # clean up after ourselves when fit is uncacheable (no random seed)
                    if self.random_seed is None:
                        resp = await client.delete(f"/{fit_name}")
                        if resp.status not in {200, 202, 204}:
                            raise RuntimeError((resp.json())["message"])

            stan_outputs = tuple(
                stan_outputs)  # Fit constructor expects a tuple.

            def is_nonempty_logger_message(msg: simdjson.Object):
                return msg["topic"] == "logger" and msg["values"][0] != "info:"

            def is_iteration_or_elapsed_time_logger_message(
                    msg: simdjson.Object):
                # Assumes `msg` is a message with topic `logger`.
                text = msg["values"][0]
                return (
                    text.startswith("info:Iteration:")
                    or text.startswith("info: Elapsed Time:")
                    # this detects lines following "Elapsed Time:", part of a multi-line Stan message
                    or text.startswith("info:" + " " * 15))

            parser = simdjson.Parser()
            nonstandard_logger_messages = []
            for stan_output in stan_outputs:
                for line in stan_output.splitlines():
                    # Do not attempt to parse non-logger messages. Draws could contain nan or inf values.
                    # simdjson cannot parse lines containing such values.
                    if b'"logger"' not in line:
                        continue
                    msg = parser.parse(line)
                    if is_nonempty_logger_message(
                            msg
                    ) and not is_iteration_or_elapsed_time_logger_message(msg):
                        nonstandard_logger_messages.append(msg.as_dict())
            del parser  # simdjson.Parser is no longer used at this point.

            progress_bar.clear()
            io.error("\x08" * progress_bar._last_messages_length
                     )  # move left to start of line
            if nonstandard_logger_messages:
                io.error_line(
                    "<comment>Messages received during sampling:</comment>")
                for msg in nonstandard_logger_messages:
                    text = msg["values"][0].replace("info:", "  ").replace(
                        "error:", "  ")
                    if text.strip():
                        io.error_line(f"{text}")
            progress_bar.display()  # re-draw the (complete) progress bar
            progress_bar.finish()
            io.error_line("\n<info>Done.</info>")

            fit = stan.fit.Fit(
                stan_outputs,
                num_chains,
                self.param_names,
                self.constrained_param_names,
                self.dims,
                num_warmup,
                num_samples,
                num_thin,
                save_warmup,
            )

            for entry_point in stan.plugins.get_plugins():
                Plugin = entry_point.load()
                fit = Plugin().on_post_fit(fit)
            return fit
Exemple #5
0
 def io(self):
     return ConsoleIO()
Exemple #6
0
    async def go():
        io = ConsoleIO()
        # hack: use stdout instead of stderr because httpstan silences stderr during compilation
        building_output = io.section().output
        if not io.supports_ansi():
            building_output.write("<comment>Building...</comment>")
        async with stan.common.HttpstanClient() as client:
            # Check to see if model is in cache.
            model_name = httpstan.models.calculate_model_name(program_code)
            resp = await client.post(f"/{model_name}/params",
                                     json={"data": data})
            model_in_cache = resp.status != 404

            task = asyncio.create_task(
                client.post("/models", json={"program_code": program_code}))
            start = time.time()
            while True:
                done, pending = await asyncio.wait({task}, timeout=0.1)
                if done:
                    break
                if io.supports_ansi():
                    building_output.clear()
                    building_output.write(
                        f"<comment>Building:</comment> {time.time() - start:0.1f}s"
                    )
            building_output.clear() if io.supports_ansi(
            ) else building_output.write("\n")
            # now that httpstan has released stderr, we can use error_output again
            building_output = io.section().error_output
            resp = task.result()

            if resp.status != 201:
                match = re.search(r"""ValueError\(['"](.*)['"]\)""",
                                  resp.json()["message"])
                if not match:  # unknown error, should not happen
                    raise RuntimeError(resp.json()["message"])
                exception_body = match.group(1).encode().decode(
                    "unicode_escape")
                error_type_match = re.match(r"(Semantic|Syntax) error",
                                            exception_body)
                if error_type_match:
                    error_type = error_type_match.group(0)
                    exception_body_without_first_line = exception_body.split(
                        "\n", 1)[1]
                    building_output.write_line(
                        f"<info>Building:</info> <error>{error_type}:</error>")
                    building_output.write_line(
                        f"<error>{exception_body_without_first_line}</error>")
                    raise ValueError(error_type)
                else:
                    raise RuntimeError(exception_body)
            building_output.clear() if io.supports_ansi(
            ) else building_output.write("\n")
            if model_in_cache:
                building_output.write(
                    "<info>Building:</info> found in cache, done.")
            else:
                building_output.write(
                    f"<info>Building:</info> {time.time() - start:0.1f}s, done."
                )
            assert model_name == resp.json()["name"]
            if resp.json().get("stanc_warnings"):
                io.error_line(
                    "<comment>Messages from <fg=cyan;options=bold>stanc</>:</comment>"
                )
                io.error_line(resp.json()["stanc_warnings"])

            resp = await client.post(f"/{model_name}/params",
                                     json={"data": data})
            if resp.status != 200:
                raise RuntimeError(resp.json()["message"])
            params_list = resp.json()["params"]
            assert len({param["name"]
                        for param in params_list}) == len(params_list)
            param_names, dims = zip(*((param["name"], param["dims"])
                                      for param in params_list))
            constrained_param_names = sum((tuple(param["constrained_names"])
                                           for param in params_list), ())
            return Model(model_name, program_code, data, param_names,
                         constrained_param_names, dims, random_seed)
Exemple #7
0
        async def go():
            io = ConsoleIO()
            sampling_output = io.section().error_output
            percent_complete = 0
            sampling_output.write_line(
                f"<comment>Sampling:</comment> {percent_complete:3.0f}%")

            current_and_max_iterations_re = re.compile(
                r"Iteration:\s+(\d+)\s+/\s+(\d+)")
            async with stan.common.HttpstanClient() as client:
                operations = []
                for payload in payloads:
                    resp = await client.post(f"/{self.model_name}/fits",
                                             json=payload)
                    if resp.status == 422:
                        raise ValueError(str(resp.json()))
                    elif resp.status != 201:
                        raise RuntimeError(resp.json()["message"])
                    assert resp.status == 201
                    operations.append(resp.json())

                # poll to get progress for each chain until all chains finished
                current_iterations = {}
                while not all(operation["done"] for operation in operations):
                    for operation in operations:
                        if operation["done"]:
                            continue
                        resp = await client.get(f"/{operation['name']}")
                        assert resp.status != 404
                        operation.update(resp.json())
                        progress_message = operation["metadata"].get(
                            "progress")
                        if not progress_message:
                            continue
                        iteration, iteration_max = map(
                            int,
                            current_and_max_iterations_re.findall(
                                progress_message).pop(0))
                        current_iterations[operation["name"]] = iteration
                        iterations_count = sum(current_iterations.values())
                        total_iterations = iteration_max * num_chains
                        percent_complete = 100 * iterations_count / total_iterations
                        sampling_output.clear() if io.supports_ansi(
                        ) else sampling_output.write("\n")
                        sampling_output.write_line(
                            f"<comment>Sampling:</comment> {round(percent_complete):3.0f}% ({iterations_count}/{total_iterations})"
                        )
                    await asyncio.sleep(0.01)

                fit_in_cache = len(current_iterations) < num_chains

                stan_outputs = []
                for operation in operations:
                    fit_name = operation["result"].get("name")
                    if fit_name is None:  # operation["result"] is an error
                        assert not str(operation["result"]["code"]).startswith(
                            "2"), operation
                        message = operation["result"]["message"]
                        if """ValueError('Initialization failed.')""" in message:
                            sampling_output.clear()
                            sampling_output.write_line(
                                "<info>Sampling:</info> <error>Initialization failed.</error>"
                            )
                            raise RuntimeError("Initialization failed.")
                        raise RuntimeError(message)

                    resp = await client.get(f"/{fit_name}")
                    if resp.status != 200:
                        raise RuntimeError((resp.json())["message"])
                    stan_outputs.append(resp.content)

                    # clean up after ourselves when fit is uncacheable (no random seed)
                    if self.random_seed is None:
                        resp = await client.delete(f"/{fit_name}")
                        if resp.status not in {200, 202, 204}:
                            raise RuntimeError((resp.json())["message"])

                sampling_output.clear() if io.supports_ansi(
                ) else sampling_output.write("\n")
                sampling_output.write_line(
                    "<info>Sampling:</info> 100%, done." if fit_in_cache else
                    f"<info>Sampling:</info> {percent_complete:3.0f}% ({iterations_count}/{total_iterations}), done."
                )
                if not io.supports_ansi():
                    sampling_output.write("\n")

            stan_outputs = tuple(
                stan_outputs)  # Fit constructor expects a tuple.

            def is_nonempty_logger_message(msg: simdjson.Object):
                return msg["topic"] == "logger" and msg["values"][0] != "info:"

            def is_iteration_or_elapsed_time_logger_message(
                    msg: simdjson.Object):
                # Assumes `msg` is a message with topic `logger`.
                text = msg["values"][0]
                return (
                    text.startswith("info:Iteration:")
                    or text.startswith("info: Elapsed Time:")
                    # this detects lines following "Elapsed Time:", part of a multi-line Stan message
                    or text.startswith("info:" + " " * 15))

            parser = simdjson.Parser()
            nonstandard_logger_messages = []
            for stan_output in stan_outputs:
                for line in stan_output.splitlines():
                    # Do not attempt to parse non-logger messages. Draws could contain nan or inf values.
                    # simdjson cannot parse lines containing such values.
                    if b'"logger"' not in line:
                        continue
                    msg = parser.parse(line)
                    if is_nonempty_logger_message(
                            msg
                    ) and not is_iteration_or_elapsed_time_logger_message(msg):
                        nonstandard_logger_messages.append(msg.as_dict())
            del parser  # simdjson.Parser is no longer used at this point.

            if nonstandard_logger_messages:
                io.error_line(
                    "<comment>Messages received during sampling:</comment>")
                for msg in nonstandard_logger_messages:
                    text = msg["values"][0].replace("info:", "  ").replace(
                        "error:", "  ")
                    if text.strip():
                        io.error_line(f"{text}")

            fit = stan.fit.Fit(
                stan_outputs,
                num_chains,
                self.param_names,
                self.constrained_param_names,
                self.dims,
                num_warmup,
                num_samples,
                num_thin,
                save_warmup,
            )

            for entry_point in stan.plugins.get_plugins():
                Plugin = entry_point.load()
                fit = Plugin().on_post_sample(fit)
            return fit
Exemple #8
0
 async def go():
     io = ConsoleIO()
     io.error("<info>Building...</info>")
     async with stan.common.httpstan_server() as (host, port):
         # Check to see if model is in cache.
         model_name = httpstan.models.calculate_model_name(program_code)
         path, payload = f"/v1/{model_name}/params", {"data": data}
         async with aiohttp.request("POST",
                                    f"http://{host}:{port}{path}",
                                    json=payload) as resp:
             model_in_cache = resp.status != 404
         io.error("\n" if model_in_cache else " This may take some time.\n")
         # Note: during compilation `httpstan` redirects stderr to /dev/null, making `print` impossible.
         path, payload = "/v1/models", {"program_code": program_code}
         async with aiohttp.request("POST",
                                    f"http://{host}:{port}{path}",
                                    json=payload) as resp:
             response_payload = await resp.json()
             if resp.status != 201:
                 raise RuntimeError(response_payload["message"])
             assert model_name == response_payload["name"]
             if response_payload.get("stanc_warnings"):
                 io.error_line(
                     "<comment>Messages from <fg=cyan;options=bold>stanc</>:</comment>"
                 )
                 io.error_line(response_payload["stanc_warnings"])
         path, payload = f"/v1/{model_name}/params", {"data": data}
         async with aiohttp.request("POST",
                                    f"http://{host}:{port}{path}",
                                    json=payload) as resp:
             if resp.status != 200:
                 raise RuntimeError((await resp.json())["message"])
             params_list = (await resp.json())["params"]
         assert len({param["name"]
                     for param in params_list}) == len(params_list)
         param_names, dims = zip(*((param["name"], param["dims"])
                                   for param in params_list))
         constrained_param_names = sum((tuple(param["constrained_names"])
                                        for param in params_list), ())
         if model_in_cache:
             io.error("<comment>Found model in cache.</comment> ")
         io.error_line("<info>Done.</info>")
         return Model(model_name, program_code, data, param_names,
                      constrained_param_names, dims, random_seed)
def solve_pypi(
    pip_specs: Dict[str, src_parser.Dependency],
    use_latest: List[str],
    pip_locked: Dict[str, src_parser.LockedDependency],
    conda_locked: Dict[str, src_parser.LockedDependency],
    python_version: str,
    platform: str,
    verbose: bool = False,
) -> Dict[str, src_parser.LockedDependency]:
    """
    Solve pip dependencies for the given platform

    Parameters
    ----------
    conda :
        Path to conda, mamba, or micromamba
    use_latest :
        Names of packages to update to the latest version compatible with pip_specs
    pip_specs :
        PEP440 package specifications
    pip_locked :
        Previous solution for the given platform (pip packages only)
    conda_locked :
        Current solution of conda-only specs for the given platform
    python_version :
        Version of Python in conda_locked
    platform :
        Target platform
    verbose :
        Print chatter from solver

    """
    dummy_package = ProjectPackage("_dummy_package_", "0.0.0")
    dependencies = [get_dependency(spec) for spec in pip_specs.values()]
    for dep in dependencies:
        dummy_package.add_dependency(dep)

    pypi = PyPiRepository()
    pool = Pool(repositories=[pypi])

    installed = Repository()
    locked = Repository()

    python_packages = dict()
    for dep in conda_locked.values():
        if dep.name.startswith("__"):
            continue
        try:
            pypi_name = conda_name_to_pypi_name(dep.name).lower()
        except KeyError:
            continue
        # Prefer the Python package when its name collides with the Conda package
        # for the underlying library, e.g. python-xxhash (pypi: xxhash) over xxhash
        # (pypi: no equivalent)
        if pypi_name not in python_packages or pypi_name != dep.name:
            python_packages[pypi_name] = dep.version
    # treat conda packages as both locked and installed
    for name, version in python_packages.items():
        for repo in (locked, installed):
            repo.add_package(Package(name=name, version=version))
    # treat pip packages as locked only
    for spec in pip_locked.values():
        locked.add_package(get_package(spec))

    if verbose:
        io = ConsoleIO()
        io.set_verbosity(VERY_VERBOSE)
    else:
        io = NullIO()
    s = Solver(
        dummy_package,
        pool=pool,
        installed=installed,
        locked=locked,
        io=io,
    )
    to_update = list({spec.name
                      for spec in pip_locked.values()
                      }.intersection(use_latest))
    env = PlatformEnv(python_version, platform)
    # find platform-specific solution (e.g. dependencies conditioned on markers)
    with s.use_environment(env):
        result = s.solve(use_latest=to_update)

    chooser = Chooser(pool, env=env)

    # Extract distributions from Poetry package plan, ignoring uninstalls
    # (usually: conda package with no pypi equivalent) and skipped ops
    # (already installed)
    requirements: List[src_parser.LockedDependency] = []
    for op in result:
        if not isinstance(op, Uninstall) and not op.skipped:
            # Take direct references verbatim
            source: Optional[src_parser.DependencySource] = None
            if op.package.source_type == "url":
                url, fragment = urldefrag(op.package.source_url)
                hash_type, hash = fragment.split("=")
                hash = src_parser.HashModel(**{hash_type: hash})
                source = src_parser.DependencySource(type="url",
                                                     url=op.package.source_url)
            # Choose the most specific distribution for the target
            else:
                link = chooser.choose_for(op.package)
                url = link.url_without_fragment
                hash = src_parser.HashModel(**{link.hash_name: link.hash})

            requirements.append(
                src_parser.LockedDependency(
                    name=op.package.name,
                    version=str(op.package.version),
                    manager="pip",
                    source=source,
                    platform=platform,
                    dependencies={
                        dep.name: str(dep.constraint)
                        for dep in op.package.requires
                    },
                    url=url,
                    hash=hash,
                ))

    # use PyPI names of conda packages to walking the dependency tree and propagate
    # categories from explicit to transitive dependencies
    planned = {
        **{dep.name: dep
           for dep in requirements},
        # prefer conda packages so add them afterwards
    }

    for conda_name, dep in conda_locked.items():
        try:
            pypi_name = conda_name_to_pypi_name(conda_name).lower()
        except KeyError:
            # no conda-name found, assuming conda packages do NOT intersect with the pip package
            continue
        planned[pypi_name] = dep

    src_parser._apply_categories(requested=pip_specs, planned=planned)

    return {dep.name: dep for dep in requirements}