Esempio n. 1
0
def test_record_package_schema_with_schema_base_url():
    schema_base_url = 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/'
    builder = ProfileBuilder('1__1__4', {}, schema_base_url=schema_base_url)
    result = builder.record_package_schema()

    # Changes `id` and `$ref`.
    assert result[
        'id'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/record-package-schema.json'  # noqa: E501
    assert result['definitions']['record']['properties']['compiledRelease'][
        '$ref'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-schema.json'  # noqa: E501
    assert result['definitions']['record']['properties']['releases']['oneOf'][
        1]['items'][
            '$ref'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-schema.json'  # noqa: E501
Esempio n. 2
0
File: cli.py Progetto: lttga/test2
def cli(
    filename,
    schema,
    selection,
    split,
    threshold,
    state_file,
    xlsx,
    csv,
    combine,
    unnest,
    unnest_file,
    only,
    only_file,
    repeat,
    repeat_file,
    count,
    human,
    language,
):
    """Spoonbill cli entry point"""
    click.echo(_("Detecting input file format"))
    # TODO: handle line separated json
    # TODO: handle single release/record
    (
        input_format,
        _is_concatenated,
        _is_array,
    ) = detect_format(filename)
    if csv:
        csv = pathlib.Path(csv).resolve()
        if not csv.exists():
            raise click.BadParameter(
                _("Desired location {} does not exists").format(csv))
    if xlsx:
        xlsx = pathlib.Path(xlsx).resolve()
        if not xlsx.parent.exists():
            raise click.BadParameter(
                _("Desired location {} does not exists").format(xlsx.parent))
    click.echo(
        _("Input file is {}").format(click.style(input_format, fg="green")))
    is_package = "package" in input_format
    combine_choice = combine if combine else ""
    if not is_package:
        # TODO: fix this
        click.echo("Single releases are not supported by now")
        return
    if schema:
        schema = resolve_file_uri(schema)
    if "release" in input_format:
        root_key = "releases"
        if not schema:
            click.echo(
                _("No schema provided, using version {}").format(
                    click.style(CURRENT_SCHEMA_TAG, fg="cyan")))
            profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {})
            schema = profile.release_package_schema()
    else:
        root_key = "records"
        if not schema:
            click.echo(
                _("No schema provided, using version {}").format(
                    click.style(CURRENT_SCHEMA_TAG, fg="cyan")))
            profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {})
            schema = profile.record_package_schema()
    title = schema.get("title", "").lower()
    if not title:
        raise ValueError(
            _("Incomplete schema, please make sure your data is correct"))
    if "package" in title:
        # TODO: is is a good way to get release/record schema
        schema = schema["properties"][root_key]["items"]

    path = pathlib.Path(filename)
    workdir = path.parent
    filename = path.name
    selection = selection or ROOT_TABLES.keys()
    combine = combine or COMBINED_TABLES.keys()
    root_tables = get_selected_tables(ROOT_TABLES, selection)
    combined_tables = get_selected_tables(COMBINED_TABLES, combine)

    if state_file:
        click.secho(_("Restoring from provided state file"), bold=True)
        analyzer = FileAnalyzer(workdir, state_file=state_file)
    else:
        click.secho(
            _("State file not supplied, going to analyze input file first"),
            bold=True)
        analyzer = FileAnalyzer(
            workdir,
            schema=schema,
            root_key=root_key,
            root_tables=root_tables,
            combined_tables=combined_tables,
            language=language,
            table_threshold=threshold,
        )
        click.echo(_("Analyze options:"))
        click.echo(
            _(" - table threshold => {}").format(
                click.style(str(threshold), fg="cyan")))
        click.echo(
            _(" - language        => {}").format(
                click.style(language, fg="cyan")))
        click.echo(
            _("Processing file: {}").format(click.style(str(path), fg="cyan")))
        total = path.stat().st_size
        progress = 0
        # Progress bar not showing with small files
        # https://github.com/pallets/click/pull/1296/files
        with click.progressbar(width=0,
                               show_percent=True,
                               show_pos=True,
                               length=total) as bar:
            for read, number in analyzer.analyze_file(filename,
                                                      with_preview=True):
                bar.label = ANALYZED_LABEL.format(
                    click.style(str(number), fg="cyan"))
                bar.update(read - progress)
                progress = read
        click.secho(_("Done processing. Analyzed objects: {}").format(
            click.style(str(number + 1), fg="red")),
                    fg="green")
        state_file = pathlib.Path(f"{filename}.state")
        state_file_path = workdir / state_file
        click.echo(
            _("Dumping analyzed data to '{}'").format(
                click.style(str(state_file_path.absolute()), fg="cyan")))
        analyzer.dump_to_file(state_file)

    click.echo(
        _("Flattening file: {}").format(click.style(str(path), fg="cyan")))

    if unnest and unnest_file:
        raise click.UsageError(
            _("Conflicting options: unnest and unnest-file"))
    if repeat and repeat_file:
        raise click.UsageError(
            _("Conflicting options: repeat and repeat-file"))
    if only and only_file:
        raise click.UsageError(_("Conflicting options: only and only-file"))

    options = {"selection": {}, "count": count}
    unnest = read_option_file(unnest, unnest_file)
    repeat = read_option_file(repeat, repeat_file)
    only = read_option_file(only, only_file)

    for name in selection:
        table = analyzer.spec[name]
        if table.total_rows == 0:
            click.echo(
                _("Ignoring empty table {}").format(click.style(name,
                                                                fg="red")))
            continue

        unnest = [col for col in unnest if col in table.combined_columns]
        if unnest:
            click.echo(
                _("Unnesting columns {} for table {}").format(
                    click.style(",".join(unnest), fg="cyan"),
                    click.style(name, fg="cyan")))

        only = [col for col in only if col in table]
        if only:
            click.echo(
                _("Using only columns {} for table {}").format(
                    click.style(",".join(only), fg="cyan"),
                    click.style(name, fg="cyan")))

        repeat = [col for col in repeat if col in table]
        if repeat:
            click.echo(
                _("Repeating columns {} in all child table of {}").format(
                    click.style(",".join(repeat), fg="cyan"),
                    click.style(name, fg="cyan")))

        options["selection"][name] = {
            "split": split or analyzer.spec[name].should_split,
            "pretty_headers": human,
            "unnest": unnest,
            "only": only,
            "repeat": repeat,
        }
    options = FlattenOptions(**options)
    flattener = FileFlattener(
        workdir,
        options,
        analyzer.spec.tables,
        root_key=root_key,
        csv=csv,
        xlsx=xlsx,
        language=language,
    )

    all_tables = chain([table for table in flattener.flattener.tables.keys()],
                       combine_choice)

    click.echo(
        _("Going to export tables: {}").format(
            click.style(",".join(all_tables), fg="magenta")))

    click.echo(_("Processed tables:"))
    for table in flattener.flattener.tables.keys():
        message = _("{}: {} rows").format(
            table, flattener.flattener.tables[table].total_rows)
        if not flattener.flattener.tables[table].is_root:
            message = "└-----" + message
            click.echo(message)
        else:
            click.echo(message)
    click.echo(_("Flattening input file"))
    with click.progressbar(
            flattener.flatten_file(filename),
            length=analyzer.spec.total_items + 1,
            width=0,
            show_percent=True,
            show_pos=True,
    ) as bar:
        for count in bar:
            bar.label = FLATTENED_LABEL.format(
                click.style(str(count + 1), fg="cyan"))

    click.secho(_("Done flattening. Flattened objects: {}").format(
        click.style(str(count + 1), fg="red")),
                fg="green")