Ejemplo n.º 1
0
def print_module_info(module_types, module_names, snake_storage, reverse_config_usage):
    """Wrap module printing functions: print correct info for chosen module_types and module_names."""
    all_module_types = {
        "annotators": snake_storage.all_annotators,
        "importers": snake_storage.all_importers,
        "exporters": snake_storage.all_exporters,
        "installers": snake_storage.all_installers
    }

    if not module_types or "all" in module_types:
        module_types = all_module_types.keys()

    module_names = [n.lower() for n in module_names]

    # Print module info for all chosen module_types
    if not module_names:
        for module_type in module_types:
            modules = all_module_types.get(module_type)
            print_modules(modules, module_type, reverse_config_usage, snake_storage)

    # Print only info for chosen module_names
    else:
        invalid_modules = module_names
        for module_type in module_types:
            modules = all_module_types.get(module_type)
            modules = dict((k, v) for k, v in modules.items() if k in module_names)
            if modules:
                invalid_modules = [m for m in invalid_modules if m not in modules.keys()]
                print_modules(modules, module_type, reverse_config_usage, snake_storage)
        if invalid_modules:
            console.print("[red]Module{} not found: {}[/red]".format("s" if len(invalid_modules) > 1 else "",
                                                                     ", ".join(invalid_modules)))
Ejemplo n.º 2
0
def prettyprint_yaml(in_dict):
    """Pretty-print YAML."""
    from rich.syntax import Syntax
    import yaml

    class MyDumper(yaml.Dumper):
        """Customized YAML dumper that indents lists."""

        def increase_indent(self, flow=False, indentless=False):
            """Force indentation."""
            return super(MyDumper, self).increase_indent(flow)

    # Resolve aliases and replace them with their anchors' contents
    yaml.Dumper.ignore_aliases = lambda *args: True
    yaml_str = yaml.dump(in_dict, default_flow_style=False, Dumper=MyDumper, indent=4, allow_unicode=True)
    # Print syntax highlighted
    console.print(Syntax(yaml_str, "yaml"))
Ejemplo n.º 3
0
def print_annotation_classes():
    """Print info about annotation classes."""
    print()
    table = Table(title="Available annotation classes", box=box.SIMPLE, show_header=False, title_justify="left")
    table.add_column(no_wrap=True)
    table.add_column()

    table.add_row("[b]Defined by pipeline modules[/b]")
    table.add_row("  [i]Class[/i]", "[i]Annotation[/i]")
    for annotation_class, anns in registry.annotation_classes["module_classes"].items():
        table.add_row("  " + annotation_class, "\n".join(anns))

    if registry.annotation_classes["config_classes"]:
        table.add_row()
        table.add_row("[b]From config[/b]")
        table.add_row("  [i]Class[/i]", "[i]Annotation[/i]")
        for annotation_class, ann in registry.annotation_classes["config_classes"].items():
            table.add_row("  " + annotation_class, ann)

    console.print(table)
Ejemplo n.º 4
0
def print_module_summary(snake_storage):
    """Print a summary of all annotation modules."""
    all_module_types = {
        "annotators": snake_storage.all_annotators,
        "importers": snake_storage.all_importers,
        "exporters": snake_storage.all_exporters,
        "installers": snake_storage.all_installers
    }

    print()
    table = Table(title="Available modules", box=box.SIMPLE, show_header=False, title_justify="left")
    table.add_column(no_wrap=True)
    table.add_column()

    for module_type, modules in all_module_types.items():
        table.add_row(f"[b]{module_type.upper()}[/b]")
        for module_name in sorted(modules.keys()):
            description = registry.modules[module_name].description or ""
            if module_name.startswith("custom."):
                description = get_custom_module_description(module_name)
            table.add_row("  " + module_name, description)
        table.add_row()
    console.print(table)
    console.print("For more details about a specific module run [green]'sparv modules \\[module name]'[/green].",
                  highlight=False)
    console.print(
        "For more details about all modules of a specific type run [green]'sparv modules --\\[module type]'[/green].",
        highlight=False)
Ejemplo n.º 5
0
def copy_resource_files(data_dir: pathlib.Path):
    """Copy resource files to data dir."""
    resources_dir = pathlib.Path(
        pkg_resources.resource_filename("sparv", "resources"))

    for f in resources_dir.rglob("*"):
        rel_f = f.relative_to(resources_dir)
        if f.is_dir():
            (data_dir / rel_f).mkdir(parents=True, exist_ok=True)
        else:
            # Check if file already exists in data dir
            if (data_dir / rel_f).is_file():
                # Only copy if files are different
                if not filecmp.cmp(f, (data_dir / rel_f)):
                    shutil.copy(
                        (data_dir / rel_f),
                        (data_dir / rel_f.parent / (rel_f.name + ".bak")))
                    console.print(
                        f"{rel_f} has been updated and a backup was created")
                    shutil.copy(f, data_dir / rel_f)
            else:
                shutil.copy(f, data_dir / rel_f)
Ejemplo n.º 6
0
 def error(msg):
     """Print error message."""
     console.print(Text(msg, style="red"))
Ejemplo n.º 7
0
 def warning(msg):
     """Print warning message."""
     console.print(Text(msg, style="yellow"))
Ejemplo n.º 8
0
 def info(msg):
     """Print info message."""
     console.print(Text(msg, style="green"))
Ejemplo n.º 9
0
def run(sparv_datadir: Optional[str] = None):
    """Query user about data dir path unless provided by argument, and populate path with files."""
    default_dir = pathlib.Path(appdirs.user_data_dir("sparv"))
    current_dir = paths.get_data_path()
    path: pathlib.Path
    using_env = bool(os.environ.get(paths.data_dir_env))

    if sparv_datadir:
        # Specifying a path on the command line will perform the setup using that path, even if the environment
        # variable is set
        using_env = False
        path = pathlib.Path(sparv_datadir)
    else:
        console.print(
            "\n[b]Sparv Data Directory Setup[/b]\n\n"
            f"Current data directory: [green]{current_dir or '<not set>'}[/green]\n\n"
            "Sparv needs a place to store its configuration files, language models and other data. "
            "After selecting the directory you want to use for this purpose, Sparv will populate it with a default "
            "config file and presets. Any existing files in the target directory will be backed up. Any previous "
            "backups will be overwritten.")
        console.print(
            Padding(
                "[b]Tip:[/b] This process can also be completed non-interactively. Run 'sparv setup --help' for details. "
                f"You may also override the data directory setting using the environment variable '{paths.data_dir_env}'.",
                (1, 4)))

        if using_env:
            try:
                cont = Confirm.ask(
                    f"[b red]NOTE:[/b red] Sparv's data directory is currently set to '{current_dir}' using the "
                    f"environment variable '{paths.data_dir_env}'. This variable takes precedence over any previous "
                    f"path set using this setup process. To change the path, either edit the environment variable, or "
                    f"delete the variable and rerun the setup command.\n"
                    "Do you want to continue the setup process using the above path?"
                )
            except KeyboardInterrupt:
                console.print("\nSetup interrupted.")
                sys.exit()
            if not cont:
                console.print("\nSetup aborted.")
                sys.exit()
            path = current_dir
        else:
            # Ask user for path
            if current_dir:
                msg = f" Leave empty to continue using '{current_dir}':"
            else:
                msg = f" Leave empty to use the default which is '{default_dir}':"

            try:
                console.print(
                    f"Enter the path to the directory you want to use.{msg}")
                path_str = input().strip()
            except KeyboardInterrupt:
                console.print("\nSetup interrupted.")
                sys.exit()
            if path_str:
                path = pathlib.Path(path_str)
            else:
                if current_dir:
                    path = current_dir
                else:
                    path = default_dir

    try:
        # Expand any "~"
        path = path.expanduser()
        # Create directories
        dirs = [
            paths.bin_dir.name, paths.config_dir.name, paths.models_dir.name
        ]
        path.mkdir(parents=True, exist_ok=True)
        for d in dirs:
            (path / d).mkdir(exist_ok=True)
    except:
        console.print(
            "\nAn error occurred while trying to create the directories. "
            "Make sure the path you entered is correct, and that you have the necessary read/write permissions."
        )
        sys.exit(1)

    if not using_env:
        # Save data dir setting to config file
        config_dict = {"sparv_data": str(path)}

        paths.sparv_config_file.parent.mkdir(parents=True, exist_ok=True)
        with open(paths.sparv_config_file, "w") as f:
            yaml.dump(config_dict, f)

    copy_resource_files(path)

    # Save Sparv version number to a file in data dir
    (path / VERSION_FILE).write_text(__version__)

    console.print(
        f"\nSetup completed. The Sparv data directory is set to '{path}'.")
Ejemplo n.º 10
0
def print_error(msg: str):
    """Format msg into an error message."""
    console.print(f"[red]\n{msg}[/red]", highlight=False)
Ejemplo n.º 11
0
def print_modules(modules: dict, module_type: str, reverse_config_usage: dict, snake_storage: snake_utils.SnakeStorage,
                  print_params: bool = False):
    """Print module information."""
    custom_annotations = snake_storage.all_custom_annotators

    # Box styles
    left_line = box.Box("    \n┃   \n┃   \n┃   \n┃   \n┃   \n┃   \n    ")
    minimal = box.Box("    \n  │ \n╶─┼╴\n  │ \n╶─┼╴\n╶─┼╴\n  │ \n    \n")
    box_style = minimal

    # Module type header
    print()
    console.print(f"  [b]{module_type.upper()}[/b]", style="reverse", justify="left")  # 'justify' to fill entire width
    print()

    for i, module_name in enumerate(sorted(modules)):
        if i:
            console.print(Rule())

        # Module name header
        console.print(f"\n[bright_black]:[/][dim]:[/]: [b]{module_name.upper()}[/b]\n")

        # Module description
        description = None
        if registry.modules[module_name].description:
            description = registry.modules[module_name].description
        elif module_name.startswith("custom."):
            description = get_custom_module_description(module_name)
        if description:
            console.print(Padding(description, (0, 4, 1, 4)))

        for f_name in sorted(modules[module_name]):
            # Function name and description
            f_desc = modules[module_name][f_name]["description"]
            console.print(Padding(Panel(f"[b]{f_name.upper()}[/b]\n[i]{f_desc}[/i]", box=left_line, padding=(0, 1),
                                        border_style="bright_green"), (0, 2)))

            # Get parameters. Always print these for custom annotations
            params = modules[module_name][f_name].get("params", {})
            custom_params = None
            if custom_annotations.get(module_name, {}).get(f_name):
                custom_params = custom_annotations[module_name][f_name].get("params", {})
                params = custom_params

            # Annotations
            f_anns = modules[module_name][f_name].get("annotations", {})
            if f_anns:
                this_box_style = box_style if any(a[1] for a in f_anns) else box.SIMPLE
                table = Table(title="[b]Annotations[/b]", box=this_box_style, show_header=False,
                              title_justify="left", padding=(0, 2), pad_edge=False, border_style="bright_black")
                table.add_column(no_wrap=True)
                table.add_column()
                for f_ann in sorted(f_anns):
                    table.add_row("• " + f_ann[0].name + (
                        f"\n  [i dim]class:[/] <{f_ann[0].cls}>" if f_ann[0].cls else ""),
                        f_ann[1] or "")
                console.print(Padding(table, (0, 0, 0, 4)))
            elif custom_params:
                # Print info about custom annotators
                this_box_style = box_style if any(a[1] for a in f_anns) else box.SIMPLE
                table = Table(title="[b]Annotations[/b]", box=this_box_style, show_header=False,
                              title_justify="left", padding=(0, 2), pad_edge=False, border_style="bright_black")
                table.add_column()
                table.add_row("In order to use this annotator you first need to declare it in the 'custom_annotations' "
                              "section of your corpus configuration and specify its arguments.")
                console.print(Padding(table, (0, 0, 0, 4)))

            # Config variables
            f_config = reverse_config_usage.get(f"{module_name}:{f_name}")
            if f_config:
                console.print()
                table = Table(title="[b]Configuration variables used[/b]", box=box_style, show_header=False,
                              title_justify="left", padding=(0, 2), pad_edge=False, border_style="bright_black")
                table.add_column(no_wrap=True)
                table.add_column()
                for config_key in sorted(f_config):
                    table.add_row("• " + config_key[0], config_key[1] or "")
                console.print(Padding(table, (0, 0, 0, 4)))

            # Arguments
            if (print_params and params) or custom_params:
                table = Table(title="[b]Arguments[/b]", box=box_style, show_header=False, title_justify="left",
                              padding=(0, 2), pad_edge=False, border_style="bright_black")
                table.add_column(no_wrap=True)
                table.add_column()
                for p, (default, typ, li, optional) in params.items():
                    opt_str = "(optional) " if optional else ""
                    typ_str = "list of " + typ.__name__ if li else typ.__name__
                    def_str = f", default: {repr(default)}" if default is not None else ""
                    table.add_row("• " + p, f"{opt_str}{typ_str}{def_str}")
                console.print(Padding(table, (0, 0, 0, 4)))
            print()
Ejemplo n.º 12
0
def rule_helper(rule: RuleStorage,
                config: dict,
                storage: SnakeStorage,
                config_missing: bool = False,
                custom_rule_obj: Optional[dict] = None) -> bool:
    """
    Populate rule with Snakemake input, output and parameter list.

    Return True if a Snakemake rule should be created.

    Args:
        rule: Object containing snakemake rule parameters.
        config: Dictionary containing the corpus configuration.
        storage: Object for saving information for all rules.
        config_missing: True if there is no corpus config file.
        custom_rule_obj: Custom annotation dictionary from corpus config.
    """
    # Only create certain rules when config is missing
    if config_missing and not rule.modelbuilder:
        return False

    # Skip any annotator that is not available for the selected corpus language
    if rule.annotator_info["language"] and sparv_config.get("metadata.language") and \
            sparv_config.get("metadata.language") not in rule.annotator_info["language"]:
        return False

    # Get this function's parameters
    params = OrderedDict(
        inspect.signature(rule.annotator_info["function"]).parameters)
    param_dict = make_param_dict(params)

    if rule.importer:
        rule.inputs.append(
            Path(get_source_path(), "{doc}." + rule.file_extension))
        storage.all_importers.setdefault(rule.module_name, {}).setdefault(
            rule.f_name, {
                "description": rule.description,
                "params": param_dict
            })
        if rule.target_name == sparv_config.get("import.importer"):
            # Exports always generate corpus text file
            rule.outputs.append(paths.work_dir / "{doc}" / io.TEXT_FILE)
            # If importer guarantees other outputs, add them to outputs list
            if rule.import_outputs:
                if isinstance(rule.import_outputs, Config):
                    rule.import_outputs = sparv_config.get(
                        rule.import_outputs, rule.import_outputs.default)
                annotations_ = set()
                renames = {}
                # Annotation list needs to be sorted to handle plain annotations before attributes
                for ann, target in sorted(
                        util.parse_annotation_list(rule.import_outputs)):
                    # Handle annotations renamed during import
                    if target:
                        source_ann, source_attr = BaseAnnotation(ann).split()
                        if not source_attr:
                            renames[ann] = target
                            ann = target
                        else:
                            ann = io.join_annotation(
                                renames.get(source_ann, source_ann), target)
                    annotations_.add(ann)

                for element in annotations_:
                    rule.outputs.append(paths.work_dir /
                                        get_annotation_path(element))

            # If import.document_annotation has been specified, add it to outputs if not already there
            if sparv_config.get("import.document_annotation"):
                doc_ann_file = paths.work_dir / get_annotation_path(
                    sparv_config.get("import.document_annotation"))
                if doc_ann_file not in rule.outputs:
                    rule.outputs.append(doc_ann_file)

    if rule.exporter:
        storage.all_exporters.setdefault(rule.module_name, {}).setdefault(
            rule.f_name, {
                "description": rule.description,
                "params": param_dict
            })
    elif rule.installer:
        storage.all_installers.setdefault(rule.module_name, {}).setdefault(
            rule.f_name, {
                "description": rule.description,
                "params": param_dict
            })

    output_dirs = set()  # Directories where export files are stored
    custom_params = set()

    if custom_rule_obj:
        if custom_rule_obj.get("params"):
            name_custom_rule(rule, storage)
            custom_params = set(custom_rule_obj.get("params").keys())
        else:
            # This rule has already been populated, so don't process it again
            return False

    # Go though function parameters and handle based on type
    for param_name, param in params.items():
        param_default_empty = param.default == inspect.Parameter.empty
        param_value: Any

        # Get parameter value, either from custom rule object or default value
        if custom_rule_obj:
            if param_name in custom_rule_obj["params"]:
                param_value = custom_rule_obj["params"][param_name]
                custom_params.remove(param_name)
            elif not param_default_empty:
                param_value = copy.deepcopy(param.default)
            else:
                raise util.SparvErrorMessage(
                    f"Parameter '{param_name}' in custom rule '{rule.full_name}' has no value!",
                    "sparv", "config")
        else:
            if param_default_empty:
                # This is probably an unused custom rule, so don't process it any further,
                # but save it in all_custom_annotators and all_annotators
                storage.all_custom_annotators.setdefault(
                    rule.module_name, {}).setdefault(rule.f_name, {
                        "description": rule.description,
                        "params": param_dict
                    })
                storage.custom_targets.append(
                    (rule.target_name, rule.description))
                storage.all_annotators.setdefault(
                    rule.module_name, {}).setdefault(
                        rule.f_name, {
                            "description": rule.description,
                            "annotations": [],
                            "params": param_dict
                        })
                return False
            else:
                param_value = copy.deepcopy(param.default)

        param_type, param_list, param_optional = registry.get_type_hint_type(
            param.annotation)

        # Output
        if issubclass(param_type, BaseOutput):
            if not isinstance(param_value, BaseOutput):
                if not param_value:
                    return False
                param_value = param_type(param_value)
            rule.configs.update(
                registry.find_config_variables(param_value.name))
            rule.classes.update(registry.find_classes(param_value.name))
            missing_configs = param_value.expand_variables(rule.full_name)
            rule.missing_config.update(missing_configs)
            ann_path = get_annotation_path(param_value,
                                           data=param_type.data,
                                           common=param_type.common)
            if param_type.all_docs:
                rule.outputs.extend(
                    map(
                        Path,
                        expand(escape_wildcards(paths.work_dir / ann_path),
                               doc=get_source_files(storage.source_files))))
            elif param_type.common:
                rule.outputs.append(paths.work_dir / ann_path)
                if rule.installer:
                    storage.install_outputs[rule.target_name].append(
                        paths.work_dir / ann_path)
            else:
                rule.outputs.append(
                    get_annotation_path(param_value, data=param_type.data))
            rule.parameters[param_name] = param_value
            if "{" in param_value:
                rule.wildcard_annotations.append(param_name)
            if rule.annotator:
                storage.all_annotators.setdefault(
                    rule.module_name, {}).setdefault(
                        rule.f_name, {
                            "description": rule.description,
                            "annotations": [],
                            "params": param_dict
                        })
                storage.all_annotators[rule.module_name][
                    rule.f_name]["annotations"].append(
                        (param_value, param_value.description))
        # ModelOutput
        elif param_type == ModelOutput:
            rule.configs.update(
                registry.find_config_variables(param_value.name))
            rule.classes.update(registry.find_classes(param_value.name))
            rule.missing_config.update(
                param_value.expand_variables(rule.full_name))
            model_path = param_value.path
            rule.outputs.append(model_path)
            rule.parameters[param_name] = ModelOutput(str(model_path))
            storage.model_outputs.append(model_path)
        # Annotation
        elif issubclass(param_type, BaseAnnotation):
            if not isinstance(param_value, BaseAnnotation):
                if not param_value:
                    return False
                param_value = param_type(param_value)
            rule.configs.update(
                registry.find_config_variables(param_value.name))
            rule.classes.update(registry.find_classes(param_value.name))
            missing_configs = param_value.expand_variables(rule.full_name)
            if (not param_value or missing_configs) and param_optional:
                rule.parameters[param_name] = None
                continue
            rule.missing_config.update(missing_configs)
            ann_path = get_annotation_path(param_value,
                                           data=param_type.data,
                                           common=param_type.common)
            if param_type.all_docs:
                rule.inputs.extend(
                    expand(escape_wildcards(paths.work_dir / ann_path),
                           doc=get_source_files(storage.source_files)))
            elif rule.exporter or rule.installer or param_type.common:
                rule.inputs.append(paths.work_dir / ann_path)
            else:
                rule.inputs.append(ann_path)

            rule.parameters[param_name] = param_value
            if "{" in param_value:
                rule.wildcard_annotations.append(param_name)
        # ExportAnnotations
        elif param_type in (ExportAnnotations, ExportAnnotationsAllDocs):
            if not isinstance(param_value, param_type):
                param_value = param_type(param_value)
            rule.parameters[param_name] = param_value

            source = param.default.config_name
            annotations = sparv_config.get(f"{source}", [])
            if not annotations:
                rule.missing_config.add(f"{source}")
            export_annotations = util.parse_annotation_list(
                annotations, add_plain_annotations=False)
            annotation_type = Annotation if param_type == ExportAnnotations else AnnotationAllDocs
            plain_annotations = set()
            possible_plain_annotations = []
            for i, (export_annotation_name,
                    export_name) in enumerate(export_annotations):
                annotation = annotation_type(export_annotation_name)
                rule.configs.update(
                    registry.find_config_variables(annotation.name))
                rule.classes.update(registry.find_classes(annotation.name))
                rule.missing_config.update(
                    annotation.expand_variables(rule.full_name))
                export_annotations[i] = (annotation, export_name)
                plain_name, attr = annotation.split()
                if not attr:
                    plain_annotations.add(plain_name)
                else:
                    if plain_name not in possible_plain_annotations:
                        possible_plain_annotations.append(plain_name)
            # Add plain annotations where needed
            for a in possible_plain_annotations:
                if a not in plain_annotations:
                    export_annotations.append((annotation_type(a), None))

            for annotation, export_name in export_annotations:
                if param.default.is_input:
                    if param_type == ExportAnnotationsAllDocs:
                        rule.inputs.extend(
                            expand(escape_wildcards(
                                paths.work_dir /
                                get_annotation_path(annotation.name)),
                                   doc=get_source_files(storage.source_files)))
                    else:
                        rule.inputs.append(
                            paths.work_dir /
                            get_annotation_path(annotation.name))
                rule.parameters[param_name].append((annotation, export_name))
        # SourceAnnotations
        elif param_type == SourceAnnotations:
            rule.parameters[param_name] = sparv_config.get(
                f"{param.default.config_name}", None)
        # Corpus
        elif param.annotation == Corpus:
            rule.parameters[param_name] = Corpus(
                sparv_config.get("metadata.id"))
        # Language
        elif param.annotation == Language:
            rule.parameters[param_name] = Language(
                sparv_config.get("metadata.language"))
        # Document
        elif param.annotation == Document:
            rule.docs.append(param_name)
        # AllDocuments (all source documents)
        elif param_type == AllDocuments:
            rule.parameters[param_name] = AllDocuments(
                get_source_files(storage.source_files))
        # Text
        elif param_type == Text:
            text_path = Path("{doc}") / io.TEXT_FILE
            if rule.exporter or rule.installer:
                rule.inputs.append(paths.work_dir / text_path)
            else:
                rule.inputs.append(text_path)
            rule.parameters[param_name] = param_value
        # Model
        elif param_type == Model:
            if param_value is not None:
                if param_list:
                    rule.parameters[param_name] = []
                    for model in param_value:
                        if not isinstance(model, Model):
                            model = Model(param_value)
                        rule.configs.update(
                            registry.find_config_variables(model.name))
                        rule.classes.update(registry.find_classes(model.name))
                        rule.missing_config.update(
                            model.expand_variables(rule.full_name))
                        rule.inputs.append(model.path)
                        rule.parameters[param_name].append(
                            Model(str(model.path)))
                else:
                    if not isinstance(param_value, Model):
                        param_value = Model(param_value)
                    rule.configs.update(
                        registry.find_config_variables(param_value.name))
                    rule.classes.update(registry.find_classes(
                        param_value.name))
                    rule.missing_config.update(
                        param_value.expand_variables(rule.full_name))
                    rule.inputs.append(param_value.path)
                    rule.parameters[param_name] = Model(str(param_value.path))
        # Binary
        elif param.annotation in (Binary, BinaryDir):
            rule.configs.update(registry.find_config_variables(param.default))
            rule.classes.update(registry.find_classes(param.default))
            param_value, missing_configs = registry.expand_variables(
                param.default, rule.full_name)
            rule.missing_config.update(missing_configs)
            binary = util.find_binary(param_value,
                                      executable=False,
                                      allow_dir=param.annotation == BinaryDir)
            if not binary:
                rule.missing_binaries.add(param_value)
            binary = Path(binary if binary else param_value)
            rule.inputs.append(binary)
            rule.parameters[param_name] = param.annotation(binary)
        # Source
        elif param.annotation == Source:
            rule.parameters[param_name] = Source(get_source_path())
        # Export
        elif param.annotation == Export:
            rule.configs.update(registry.find_config_variables(param.default))
            rule.classes.update(registry.find_classes(param.default))
            param_value, missing_configs = registry.expand_variables(
                param.default, rule.full_name)
            rule.missing_config.update(missing_configs)
            if param.default.absolute_path:
                export_path = Path(param_value)
            else:
                export_path = paths.export_dir / param_value
            output_dirs.add(export_path.parent)
            rule.outputs.append(export_path)
            rule.parameters[param_name] = Export(str(export_path))
            if "{doc}" in rule.parameters[param_name]:
                rule.doc_annotations.append(param_name)
            if "{" in param_value:
                rule.wildcard_annotations.append(param_name)
        # ExportInput
        elif param.annotation == ExportInput:
            rule.configs.update(registry.find_config_variables(param.default))
            rule.classes.update(registry.find_classes(param.default))
            param_value, missing_configs = registry.expand_variables(
                param.default, rule.full_name)
            rule.missing_config.update(missing_configs)
            if param.default.absolute_path:
                rule.parameters[param_name] = ExportInput(param_value)
            else:
                rule.parameters[param_name] = ExportInput(paths.export_dir /
                                                          param_value)
            if param.default.all_docs:
                rule.inputs.extend(
                    expand(escape_wildcards(rule.parameters[param_name]),
                           doc=get_source_files(storage.source_files)))
            else:
                rule.inputs.append(Path(rule.parameters[param_name]))
            if "{" in rule.parameters[param_name]:
                rule.wildcard_annotations.append(param_name)
        # Config
        elif isinstance(param_value, Config):
            rule.configs.add(param_value.name)
            config_value = sparv_config.get(param_value, sparv_config.Unset)
            if config_value is sparv_config.Unset:
                if param_value.default is not None:
                    config_value = param_value.default
                elif param_optional:
                    config_value = None
                else:
                    rule.missing_config.add(param_value)
            rule.parameters[param_name] = config_value
        # Everything else
        else:
            rule.parameters[param_name] = param_value

    # For custom rules, warn the user of any unknown parameters
    if custom_params:
        print_sparv_warning(
            "The parameter{} '{}' used in one of your custom rules "
            "do{} not exist in {}.".format(
                "s" if len(custom_params) > 1 else "",
                "', '".join(custom_params),
                "es" if len(custom_params) == 1 else "", rule.full_name))

    storage.all_rules.append(rule)

    # Add to rule lists in storage
    update_storage(storage, rule)

    # Add exporter dirs (used for informing user)
    if rule.exporter:
        if rule.abstract:
            output_dirs = set([p.parent for p in rule.inputs])
        rule.export_dirs = [str(p / "_")[:-1] for p in output_dirs]

    if rule.missing_config:
        log_handler.messages["missing_configs"][rule.full_name].update(
            [c for c in rule.missing_config if not c.startswith("<")])
        log_handler.messages["missing_classes"][rule.full_name].update(
            [c[1:-1] for c in rule.missing_config if c.startswith("<")])

    if rule.missing_binaries:
        log_handler.messages["missing_binaries"][rule.full_name].update(
            rule.missing_binaries)

    if config.get("debug"):
        print()
        console.print("[b]{}:[/b] {}".format(rule.module_name.upper(),
                                             rule.f_name))
        print()
        console.print("    [b]INPUTS[/b]")
        for i in rule.inputs:
            print("        {}".format(i))
        print()
        console.print("    [b]OUTPUTS[/b]")
        for o in rule.outputs:
            print("        {}".format(o))
        print()
        console.print("    [b]PARAMETERS[/b]")
        for p in rule.parameters:
            print("        {} = {!r}".format(p, rule.parameters[p]))
        print()
        print()

    return True
Ejemplo n.º 13
0
def print_sparv_info(msg):
    """Format msg into a Sparv info message."""
    console.print(f"[green]{msg}[/green]", highlight=False)
Ejemplo n.º 14
0
def print_sparv_warning(msg):
    """Format msg into a Sparv warning message."""
    console.print(f"[yellow]WARNING: {msg}[/yellow]", highlight=False)