Beispiel #1
0
def validate_manifest(ctx, manifest_path, data_type, json_schema):
    """
    Running CLI for manifest validation.
    """
    data_type = fill_in_from_config("data_type", data_type,
                                    ("manifest", "data_type"))

    json_schema = fill_in_from_config("json_schema",
                                      json_schema,
                                      ("model", "input", "validation_schema"),
                                      allow_none=True)

    jsonld = get_from_config(CONFIG.DATA, ("model", "input", "location"))

    model_file_type = get_from_config(CONFIG.DATA,
                                      ("model", "input", "file_type"))

    metadata_model = MetadataModel(inputMModelLocation=jsonld,
                                   inputMModelLocationType=model_file_type)

    errors = metadata_model.validateModelManifest(manifestPath=manifest_path,
                                                  rootNode=data_type,
                                                  jsonSchema=json_schema)

    if not errors:
        click.echo("Your manifest has been validated successfully. "
                   "There are no errors in your manifest, and it can "
                   "be submitted without any modifications.")
    else:
        click.echo(errors)
Beispiel #2
0
    def test_fill_in_from_config(self, mocker):

        jsonld = "/path/to/one"
        jsonld_none = None

        mock_config = {"model": {"path": "/path/to/two"}}
        mock_keys = ["model", "path"]
        mock_keys_invalid = ["model", "file"]

        mocker.patch("schematic.CONFIG.DATA", mock_config)

        result1 = cli_utils.fill_in_from_config(
            "jsonld", jsonld, mock_keys
        )
        result2 = cli_utils.fill_in_from_config(
            "jsonld", jsonld, mock_keys
        )
        result3 = cli_utils.fill_in_from_config(
            "jsonld_none", jsonld_none, mock_keys
        )

        assert result1 == "/path/to/one"
        assert result2 == "/path/to/one"
        assert result3 == "/path/to/two"

        with pytest.raises(MissingConfigAndArgumentValueError):
            cli_utils.fill_in_from_config(
                "jsonld_none", jsonld_none, mock_keys_invalid
            )
Beispiel #3
0
def get_manifest(ctx, title, data_type, jsonld, dataset_id, sheet_url,
                 output_csv, use_annotations, oauth, json_schema):
    """
    Running CLI with manifest generation options.
    """
    # optional parameters that need to be passed to ManifestGenerator()
    # can be read from config.yml as well
    data_type = fill_in_from_config("data_type", data_type,
                                    ("manifest", "data_type"))
    jsonld = fill_in_from_config("jsonld", jsonld,
                                 ("model", "input", "location"))
    title = fill_in_from_config("title",
                                title, ("manifest", "title"),
                                allow_none=True)
    json_schema = fill_in_from_config("json_schema",
                                      json_schema,
                                      ("model", "input", "validation_schema"),
                                      allow_none=True)

    # create object of type ManifestGenerator
    manifest_generator = ManifestGenerator(
        path_to_json_ld=jsonld,
        title=title,
        root=data_type,
        oauth=oauth,
        use_annotations=use_annotations,
    )

    # call get_manifest() on manifest_generator
    result = manifest_generator.get_manifest(
        dataset_id=dataset_id,
        sheet_url=sheet_url,
        json_schema=json_schema,
    )

    if sheet_url:
        logger.info("Find the manifest template using this Google Sheet URL:")
        click.echo(result)

    elif isinstance(result, pd.DataFrame):
        if output_csv is None:
            prefix, _ = os.path.splitext(jsonld)
            prefix_root, prefix_ext = os.path.splitext(prefix)
            if prefix_ext == ".model":
                prefix = prefix_root
            output_csv = f"{prefix}.{data_type}.manifest.csv"

        logger.info(
            f"Find the manifest template using this CSV file path: {output_csv}"
        )

        result.to_csv(output_csv, index=False)

    return result
Beispiel #4
0
def validate_manifest(ctx, manifest_path, data_type, json_schema,
                      restrict_rules, project_scope):
    """
    Running CLI for manifest validation.
    """
    data_type = fill_in_from_config("data_type", data_type,
                                    ("manifest", "data_type"))

    try:
        len(data_type) == 1
    except:
        logger.error(
            f"Can only validate a single data_type at a time. Please provide a single data_type"
        )

    data_type = data_type[0]

    json_schema = fill_in_from_config(
        "json_schema",
        json_schema,
        ("model", "input", "validation_schema"),
        allow_none=True,
    )

    jsonld = get_from_config(CONFIG.DATA, ("model", "input", "location"))

    model_file_type = get_from_config(CONFIG.DATA,
                                      ("model", "input", "file_type"))

    metadata_model = MetadataModel(inputMModelLocation=jsonld,
                                   inputMModelLocationType=model_file_type)

    errors, warnings = metadata_model.validateModelManifest(
        manifestPath=manifest_path,
        rootNode=data_type,
        jsonSchema=json_schema,
        restrict_rules=restrict_rules,
        project_scope=project_scope,
    )

    if not errors:
        click.echo("Your manifest has been validated successfully. "
                   "There are no errors in your manifest, and it can "
                   "be submitted without any modifications.")
    else:
        click.echo(errors)
Beispiel #5
0
def get_manifest(
    ctx,
    title,
    data_type,
    jsonld,
    dataset_id,
    sheet_url,
    output_csv,
    use_annotations,
    oauth,
    json_schema,
    output_xlsx,
    alphabetize_valid_values,
):
    """
    Running CLI with manifest generation options.
    """
    # optional parameters that need to be passed to ManifestGenerator()
    # can be read from config.yml as well
    data_type = fill_in_from_config("data_type", data_type,
                                    ("manifest", "data_type"))
    jsonld = fill_in_from_config("jsonld", jsonld,
                                 ("model", "input", "location"))
    title = fill_in_from_config("title",
                                title, ("manifest", "title"),
                                allow_none=True)
    json_schema = fill_in_from_config(
        "json_schema",
        json_schema,
        ("model", "input", "validation_schema"),
        allow_none=True,
    )

    def create_single_manifest(data_type, output_csv=None, output_xlsx=None):
        # create object of type ManifestGenerator
        manifest_generator = ManifestGenerator(
            path_to_json_ld=jsonld,
            title=t,
            root=data_type,
            oauth=oauth,
            use_annotations=use_annotations,
            alphabetize_valid_values=alphabetize_valid_values,
        )

        # call get_manifest() on manifest_generator
        result = manifest_generator.get_manifest(
            dataset_id=dataset_id,
            sheet_url=sheet_url,
            json_schema=json_schema,
        )

        if sheet_url:
            logger.info(
                "Find the manifest template using this Google Sheet URL:")
            click.echo(result)
        if output_csv is None and output_xlsx is None:
            prefix, _ = os.path.splitext(jsonld)
            prefix_root, prefix_ext = os.path.splitext(prefix)
            if prefix_ext == ".model":
                prefix = prefix_root
            output_csv = f"{prefix}.{data_type}.manifest.csv"
        elif output_xlsx:
            export_manifest_excel(output_excel=output_xlsx, manifest=result)
            logger.info(
                f"Find the manifest template using this Excel file path: {output_xlsx}"
            )
            return result
        export_manifest_csv(file_name=output_csv, manifest=result)
        logger.info(
            f"Find the manifest template using this CSV file path: {output_csv}"
        )
        return result

    if type(data_type) is str:
        data_type = [data_type]

    if data_type[0] == 'all manifests':
        sg = SchemaGenerator(path_to_json_ld=jsonld)
        component_digraph = sg.se.get_digraph_by_edge_type('requiresComponent')
        components = component_digraph.nodes()
        for component in components:
            t = f'{title}.{component}.manifest'
            result = create_single_manifest(data_type=component)
    else:
        for dt in data_type:
            if len(data_type) > 1:
                t = f'{title}.{dt}.manifest'
            else:
                t = title
            result = create_single_manifest(data_type=dt,
                                            output_csv=output_csv,
                                            output_xlsx=output_xlsx)

    return result