def test_release_package_schema_with_schema_base_url(): schema_base_url = 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/' builder = ProfileBuilder('1__1__4', {}, schema_base_url=schema_base_url) result = builder.release_package_schema() # Changes `id` and `$ref`. assert result[ 'id'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-package-schema.json' # noqa: E501 assert result['properties']['releases']['items'][ '$ref'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-schema.json' # noqa: E501
def cli( filename, schema, selection, split, threshold, state_file, xlsx, csv, combine, unnest, unnest_file, only, only_file, repeat, repeat_file, count, human, language, ): """Spoonbill cli entry point""" click.echo(_("Detecting input file format")) # TODO: handle line separated json # TODO: handle single release/record ( input_format, _is_concatenated, _is_array, ) = detect_format(filename) if csv: csv = pathlib.Path(csv).resolve() if not csv.exists(): raise click.BadParameter( _("Desired location {} does not exists").format(csv)) if xlsx: xlsx = pathlib.Path(xlsx).resolve() if not xlsx.parent.exists(): raise click.BadParameter( _("Desired location {} does not exists").format(xlsx.parent)) click.echo( _("Input file is {}").format(click.style(input_format, fg="green"))) is_package = "package" in input_format combine_choice = combine if combine else "" if not is_package: # TODO: fix this click.echo("Single releases are not supported by now") return if schema: schema = resolve_file_uri(schema) if "release" in input_format: root_key = "releases" if not schema: click.echo( _("No schema provided, using version {}").format( click.style(CURRENT_SCHEMA_TAG, fg="cyan"))) profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {}) schema = profile.release_package_schema() else: root_key = "records" if not schema: click.echo( _("No schema provided, using version {}").format( click.style(CURRENT_SCHEMA_TAG, fg="cyan"))) profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {}) schema = profile.record_package_schema() title = schema.get("title", "").lower() if not title: raise ValueError( _("Incomplete schema, please make sure your data is correct")) if "package" in title: # TODO: is is a good way to get release/record schema schema = schema["properties"][root_key]["items"] path = pathlib.Path(filename) workdir = path.parent filename = path.name selection = selection or ROOT_TABLES.keys() combine = combine or COMBINED_TABLES.keys() root_tables = get_selected_tables(ROOT_TABLES, selection) combined_tables = get_selected_tables(COMBINED_TABLES, combine) if state_file: click.secho(_("Restoring from provided state file"), bold=True) analyzer = FileAnalyzer(workdir, state_file=state_file) else: click.secho( _("State file not supplied, going to analyze input file first"), bold=True) analyzer = FileAnalyzer( workdir, schema=schema, root_key=root_key, root_tables=root_tables, combined_tables=combined_tables, language=language, table_threshold=threshold, ) click.echo(_("Analyze options:")) click.echo( _(" - table threshold => {}").format( click.style(str(threshold), fg="cyan"))) click.echo( _(" - language => {}").format( click.style(language, fg="cyan"))) click.echo( _("Processing file: {}").format(click.style(str(path), fg="cyan"))) total = path.stat().st_size progress = 0 # Progress bar not showing with small files # https://github.com/pallets/click/pull/1296/files with click.progressbar(width=0, show_percent=True, show_pos=True, length=total) as bar: for read, number in analyzer.analyze_file(filename, with_preview=True): bar.label = ANALYZED_LABEL.format( click.style(str(number), fg="cyan")) bar.update(read - progress) progress = read click.secho(_("Done processing. Analyzed objects: {}").format( click.style(str(number + 1), fg="red")), fg="green") state_file = pathlib.Path(f"{filename}.state") state_file_path = workdir / state_file click.echo( _("Dumping analyzed data to '{}'").format( click.style(str(state_file_path.absolute()), fg="cyan"))) analyzer.dump_to_file(state_file) click.echo( _("Flattening file: {}").format(click.style(str(path), fg="cyan"))) if unnest and unnest_file: raise click.UsageError( _("Conflicting options: unnest and unnest-file")) if repeat and repeat_file: raise click.UsageError( _("Conflicting options: repeat and repeat-file")) if only and only_file: raise click.UsageError(_("Conflicting options: only and only-file")) options = {"selection": {}, "count": count} unnest = read_option_file(unnest, unnest_file) repeat = read_option_file(repeat, repeat_file) only = read_option_file(only, only_file) for name in selection: table = analyzer.spec[name] if table.total_rows == 0: click.echo( _("Ignoring empty table {}").format(click.style(name, fg="red"))) continue unnest = [col for col in unnest if col in table.combined_columns] if unnest: click.echo( _("Unnesting columns {} for table {}").format( click.style(",".join(unnest), fg="cyan"), click.style(name, fg="cyan"))) only = [col for col in only if col in table] if only: click.echo( _("Using only columns {} for table {}").format( click.style(",".join(only), fg="cyan"), click.style(name, fg="cyan"))) repeat = [col for col in repeat if col in table] if repeat: click.echo( _("Repeating columns {} in all child table of {}").format( click.style(",".join(repeat), fg="cyan"), click.style(name, fg="cyan"))) options["selection"][name] = { "split": split or analyzer.spec[name].should_split, "pretty_headers": human, "unnest": unnest, "only": only, "repeat": repeat, } options = FlattenOptions(**options) flattener = FileFlattener( workdir, options, analyzer.spec.tables, root_key=root_key, csv=csv, xlsx=xlsx, language=language, ) all_tables = chain([table for table in flattener.flattener.tables.keys()], combine_choice) click.echo( _("Going to export tables: {}").format( click.style(",".join(all_tables), fg="magenta"))) click.echo(_("Processed tables:")) for table in flattener.flattener.tables.keys(): message = _("{}: {} rows").format( table, flattener.flattener.tables[table].total_rows) if not flattener.flattener.tables[table].is_root: message = "└-----" + message click.echo(message) else: click.echo(message) click.echo(_("Flattening input file")) with click.progressbar( flattener.flatten_file(filename), length=analyzer.spec.total_items + 1, width=0, show_percent=True, show_pos=True, ) as bar: for count in bar: bar.label = FLATTENED_LABEL.format( click.style(str(count + 1), fg="cyan")) click.secho(_("Done flattening. Flattened objects: {}").format( click.style(str(count + 1), fg="red")), fg="green")