def load_project_config(path: Path, interpolate: bool = True) -> Dict[str, Any]: """Load the project.yml file from a directory and validate it. Also make sure that all directories defined in the config exist. path (Path): The path to the project directory. interpolate (bool): Whether to substitute project variables. RETURNS (Dict[str, Any]): The loaded project.yml. """ config_path = path / PROJECT_FILE if not config_path.exists(): msg.fail(f"Can't find {PROJECT_FILE}", config_path, exits=1) invalid_err = f"Invalid {PROJECT_FILE}. Double-check that the YAML is correct." try: config = srsly.read_yaml(config_path) except ValueError as e: msg.fail(invalid_err, e, exits=1) errors = validate(ProjectConfigSchema, config) if errors: msg.fail(invalid_err) print("\n".join(errors)) sys.exit(1) validate_project_version(config) validate_project_commands(config) # Make sure directories defined in config exist for subdir in config.get("directories", []): dir_path = path / subdir if not dir_path.exists(): dir_path.mkdir(parents=True) if interpolate: err = "project.yml validation error" with show_validation_error(title=err, hint_fill=False): config = substitute_project_variables(config) return config
def main(template_path, output=None, data_path=None): """Convert a jinja2 template to a JavaScript module. template_path (Path): Path to .jijna file. output (Optional[Path]): Path to output .js module (stdout if unset). data_path (Optional[Path]): Optional JSON or YAML file with additional data to be included in the JS module as the exported variable DATA. """ data = "{}" if data_path is not None: if data_path.suffix in (".yml", ".yaml"): data = srsly.read_yaml(data_path) else: data = srsly.read_json(data_path) data = srsly.json_dumps(data) # dump and load for compactness template_path = Path(template_path) tpl_file = template_path.parts[-1] compiler = JinjaToJS(template_path.parent, tpl_file, js_module_format="es6") header = f"// This file was auto-generated by {__file__} based on {tpl_file}" data_str = f"export const DATA = {data}" result = compiler.get_output() if output is not None: with output.open("w", encoding="utf8") as f: f.write(f"{header}\n{result}\n{data_str}") print(f"Updated {output.parts[-1]}") else: print(result)
def check_rerun( project_dir: Path, command: Dict[str, Any], *, check_spacy_version: bool = True, check_spacy_commit: bool = False, ) -> bool: """Check if a command should be rerun because its settings or inputs/outputs changed. project_dir (Path): The current project directory. command (Dict[str, Any]): The command, as defined in the project.yml. strict_version (bool): RETURNS (bool): Whether to re-run the command. """ # Always rerun if no-skip is set if command.get("no_skip", False): return True lock_path = project_dir / PROJECT_LOCK if not lock_path.exists(): # We don't have a lockfile, run command return True data = srsly.read_yaml(lock_path) if command["name"] not in data: # We don't have info about this command return True entry = data[command["name"]] # Always run commands with no outputs (otherwise they'd always be skipped) if not entry.get("outs", []): return True # Always rerun if spaCy version or commit hash changed spacy_v = entry.get("spacy_version") commit = entry.get("spacy_git_version") if check_spacy_version and not is_minor_version_match( spacy_v, about.__version__): info = f"({spacy_v} in {PROJECT_LOCK}, {about.__version__} current)" msg.info( f"Re-running '{command['name']}': spaCy minor version changed {info}" ) return True if check_spacy_commit and commit != GIT_VERSION: info = f"({commit} in {PROJECT_LOCK}, {GIT_VERSION} current)" msg.info( f"Re-running '{command['name']}': spaCy commit changed {info}") return True # If the entry in the lockfile matches the lockfile entry that would be # generated from the current command, we don't rerun because it means that # all inputs/outputs, hashes and scripts are the same and nothing changed lock_entry = get_lock_entry(project_dir, command) exclude = ["spacy_version", "spacy_git_version"] return get_hash(lock_entry, exclude=exclude) != get_hash(entry, exclude=exclude)
def update_lockfile(project_dir: Path, command: Dict[str, Any]) -> None: """Update the lockfile after running a command. Will create a lockfile if it doesn't yet exist and will add an entry for the current command, its script and dependencies/outputs. project_dir (Path): The current project directory. command (Dict[str, Any]): The command, as defined in the project.yml. """ lock_path = project_dir / PROJECT_LOCK if not lock_path.exists(): srsly.write_yaml(lock_path, {}) data = {} else: data = srsly.read_yaml(lock_path) data[command["name"]] = get_lock_entry(project_dir, command) srsly.write_yaml(lock_path, data)
from pathlib import Path from wasabi import Printer, diff_strings from thinc.api import Config import srsly import re from jinja2 import Template from .. import util from ..language import DEFAULT_CONFIG_PRETRAIN_PATH from ..schemas import RecommendationSchema from ._util import init_cli, Arg, Opt, show_validation_error, COMMAND from ._util import string_to_list, import_code ROOT = Path(__file__).parent / "templates" TEMPLATE_PATH = ROOT / "quickstart_training.jinja" RECOMMENDATIONS = srsly.read_yaml(ROOT / "quickstart_training_recommendations.yml") class Optimizations(str, Enum): efficiency = "efficiency" accuracy = "accuracy" @init_cli.command("config") def init_config_cli( # fmt: off output_file: Path = Arg( ..., help= "File to save config.cfg to or - for stdout (will only output config and no additional logging info)", allow_dash=True),
def config_context() -> DDRPRegistry: yml_loc = Path("ddrp.yml") config_dict = read_yaml(yml_loc) return DDRPRegistry.parse_obj(config_dict)