Beispiel #1
0
def load_project_config(path: Path,
                        interpolate: bool = True) -> Dict[str, Any]:
    """Load the project.yml file from a directory and validate it. Also make
    sure that all directories defined in the config exist.

    path (Path): The path to the project directory.
    interpolate (bool): Whether to substitute project variables.
    RETURNS (Dict[str, Any]): The loaded project.yml.
    """
    config_path = path / PROJECT_FILE
    if not config_path.exists():
        msg.fail(f"Can't find {PROJECT_FILE}", config_path, exits=1)
    invalid_err = f"Invalid {PROJECT_FILE}. Double-check that the YAML is correct."
    try:
        config = srsly.read_yaml(config_path)
    except ValueError as e:
        msg.fail(invalid_err, e, exits=1)
    errors = validate(ProjectConfigSchema, config)
    if errors:
        msg.fail(invalid_err)
        print("\n".join(errors))
        sys.exit(1)
    validate_project_version(config)
    validate_project_commands(config)
    # Make sure directories defined in config exist
    for subdir in config.get("directories", []):
        dir_path = path / subdir
        if not dir_path.exists():
            dir_path.mkdir(parents=True)
    if interpolate:
        err = "project.yml validation error"
        with show_validation_error(title=err, hint_fill=False):
            config = substitute_project_variables(config)
    return config
def main(template_path, output=None, data_path=None):
    """Convert a jinja2 template to a JavaScript module.

    template_path (Path): Path to .jijna file.
    output (Optional[Path]): Path to output .js module (stdout if unset).
    data_path (Optional[Path]): Optional JSON or YAML file with additional data
        to be included in the JS module as the exported variable DATA.
    """
    data = "{}"
    if data_path is not None:
        if data_path.suffix in (".yml", ".yaml"):
            data = srsly.read_yaml(data_path)
        else:
            data = srsly.read_json(data_path)
        data = srsly.json_dumps(data)  # dump and load for compactness
    template_path = Path(template_path)
    tpl_file = template_path.parts[-1]
    compiler = JinjaToJS(template_path.parent,
                         tpl_file,
                         js_module_format="es6")
    header = f"// This file was auto-generated by {__file__} based on {tpl_file}"
    data_str = f"export const DATA = {data}"
    result = compiler.get_output()
    if output is not None:
        with output.open("w", encoding="utf8") as f:
            f.write(f"{header}\n{result}\n{data_str}")
        print(f"Updated {output.parts[-1]}")
    else:
        print(result)
Beispiel #3
0
def check_rerun(
    project_dir: Path,
    command: Dict[str, Any],
    *,
    check_spacy_version: bool = True,
    check_spacy_commit: bool = False,
) -> bool:
    """Check if a command should be rerun because its settings or inputs/outputs
    changed.

    project_dir (Path): The current project directory.
    command (Dict[str, Any]): The command, as defined in the project.yml.
    strict_version (bool):
    RETURNS (bool): Whether to re-run the command.
    """
    # Always rerun if no-skip is set
    if command.get("no_skip", False):
        return True
    lock_path = project_dir / PROJECT_LOCK
    if not lock_path.exists():  # We don't have a lockfile, run command
        return True
    data = srsly.read_yaml(lock_path)
    if command["name"] not in data:  # We don't have info about this command
        return True
    entry = data[command["name"]]
    # Always run commands with no outputs (otherwise they'd always be skipped)
    if not entry.get("outs", []):
        return True
    # Always rerun if spaCy version or commit hash changed
    spacy_v = entry.get("spacy_version")
    commit = entry.get("spacy_git_version")
    if check_spacy_version and not is_minor_version_match(
            spacy_v, about.__version__):
        info = f"({spacy_v} in {PROJECT_LOCK}, {about.__version__} current)"
        msg.info(
            f"Re-running '{command['name']}': spaCy minor version changed {info}"
        )
        return True
    if check_spacy_commit and commit != GIT_VERSION:
        info = f"({commit} in {PROJECT_LOCK}, {GIT_VERSION} current)"
        msg.info(
            f"Re-running '{command['name']}': spaCy commit changed {info}")
        return True
    # If the entry in the lockfile matches the lockfile entry that would be
    # generated from the current command, we don't rerun because it means that
    # all inputs/outputs, hashes and scripts are the same and nothing changed
    lock_entry = get_lock_entry(project_dir, command)
    exclude = ["spacy_version", "spacy_git_version"]
    return get_hash(lock_entry, exclude=exclude) != get_hash(entry,
                                                             exclude=exclude)
Beispiel #4
0
def update_lockfile(project_dir: Path, command: Dict[str, Any]) -> None:
    """Update the lockfile after running a command. Will create a lockfile if
    it doesn't yet exist and will add an entry for the current command, its
    script and dependencies/outputs.

    project_dir (Path): The current project directory.
    command (Dict[str, Any]): The command, as defined in the project.yml.
    """
    lock_path = project_dir / PROJECT_LOCK
    if not lock_path.exists():
        srsly.write_yaml(lock_path, {})
        data = {}
    else:
        data = srsly.read_yaml(lock_path)
    data[command["name"]] = get_lock_entry(project_dir, command)
    srsly.write_yaml(lock_path, data)
Beispiel #5
0
from pathlib import Path
from wasabi import Printer, diff_strings
from thinc.api import Config
import srsly
import re
from jinja2 import Template

from .. import util
from ..language import DEFAULT_CONFIG_PRETRAIN_PATH
from ..schemas import RecommendationSchema
from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND
from ._util import string_to_list, import_code

ROOT = Path(__file__).parent / "templates"
TEMPLATE_PATH = ROOT / "quickstart_training.jinja"
RECOMMENDATIONS = srsly.read_yaml(ROOT /
                                  "quickstart_training_recommendations.yml")


class Optimizations(str, Enum):
    efficiency = "efficiency"
    accuracy = "accuracy"


@init_cli.command("config")
def init_config_cli(
    # fmt: off
    output_file: Path = Arg(
        ...,
        help=
        "File to save config.cfg to or - for stdout (will only output config and no additional logging info)",
        allow_dash=True),
Beispiel #6
0
def config_context() -> DDRPRegistry:

    yml_loc = Path("ddrp.yml")
    config_dict = read_yaml(yml_loc)
    return DDRPRegistry.parse_obj(config_dict)