def test_schema_ocds_extended_schema_file():
    data = SuppliedData.objects.create()
    with open(
            os.path.join(
                "tests",
                "fixtures",
                "tenders_releases_1_release_with_extensions_1_1.json",
            )) as fp:
        data.original_file.save("test.json", UploadedFile(fp))
        fp.seek(0)
        json_data = json.load(fp)
    schema = SchemaOCDS(release_data=json_data)
    assert not schema.extended

    schema.get_schema_obj()
    assert schema.extended
    assert not schema.extended_schema_file
    assert not schema.extended_schema_url

    schema.create_extended_schema_file(data.upload_dir(), data.upload_url())
    assert schema.extended_schema_file == os.path.join(data.upload_dir(),
                                                       "extended_schema.json")
    assert schema.extended_schema_url == os.path.join(data.upload_url(),
                                                      "extended_schema.json")

    json_data = json.loads(
        '{"version": "1.1", "extensions": [], "releases": [{"ocid": "xx"}]}')
    schema = SchemaOCDS(release_data=json_data)
    schema.get_schema_obj()
    schema.create_extended_schema_file(data.upload_dir(), data.upload_url())
    assert not schema.extended
    assert not schema.extended_schema_file
    assert not schema.extended_schema_url
Exemple #2
0
def explore_ocds(request, pk):
    context, db_data, error = explore_data_context(request, pk)
    if error:
        return error

    lib_cove_ocds_config = LibCoveOCDSConfig()
    lib_cove_ocds_config.config["current_language"] = translation.get_language(
    )
    lib_cove_ocds_config.config[
        "schema_version_choices"] = settings.COVE_CONFIG[
            "schema_version_choices"]
    lib_cove_ocds_config.config["schema_codelists"] = settings.COVE_CONFIG[
        "schema_codelists"]

    upload_dir = db_data.upload_dir()
    upload_url = db_data.upload_url()
    file_name = db_data.original_file.file.name
    file_type = context["file_type"]

    post_version_choice = request.POST.get("version")
    replace = False
    validation_errors_path = os.path.join(upload_dir,
                                          "validation_errors-3.json")

    if file_type == "json":
        # open the data first so we can inspect for record package
        with open(file_name, encoding="utf-8") as fp:
            try:
                json_data = json.load(fp,
                                      parse_float=Decimal,
                                      object_pairs_hook=OrderedDict)
            except UnicodeError as err:
                raise CoveInputDataError(
                    context={
                        'sub_title':
                        _("Sorry, we can't process that data"),
                        'link':
                        'index',
                        'link_text':
                        _('Try Again'),
                        'msg':
                        format_html(
                            _("The file that you uploaded doesn't appear to be well formed JSON. OCDS JSON follows the I-JSON format, which requires UTF-8 encoding. Ensure that your file uses UTF-8 encoding, then try uploading again."
                              '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                              '</span> <strong>Error message:</strong> {}'),
                            err),
                        'error':
                        format(err)
                    })
            except ValueError as err:
                raise CoveInputDataError(
                    context={
                        "sub_title":
                        _("Sorry, we can't process that data"),
                        "link":
                        "index",
                        "link_text":
                        _("Try Again"),
                        "msg":
                        format_html(
                            _(
                                "We think you tried to upload a JSON file, but it is not well formed JSON."
                                '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                                "</span> <strong>Error message:</strong> {}",
                            ),
                            err,
                        ),
                        "error":
                        format(err),
                    })

            if not isinstance(json_data, dict):
                raise CoveInputDataError(
                    context={
                        "sub_title":
                        _("Sorry, we can't process that data"),
                        "link":
                        "index",
                        "link_text":
                        _("Try Again"),
                        "msg":
                        _("OCDS JSON should have an object as the top level, the JSON you supplied does not."
                          ),
                    })

            version_in_data = json_data.get("version", "")
            db_data.data_schema_version = version_in_data
            select_version = post_version_choice or db_data.schema_version
            schema_ocds = SchemaOCDS(select_version=select_version,
                                     release_data=json_data,
                                     lib_cove_ocds_config=lib_cove_ocds_config,
                                     record_pkg="records" in json_data)

            if schema_ocds.missing_package:
                exceptions.raise_missing_package_error()
            if schema_ocds.invalid_version_argument:
                # This shouldn't happen unless the user sends random POST data.
                exceptions.raise_invalid_version_argument(post_version_choice)
            if schema_ocds.invalid_version_data:
                if isinstance(version_in_data, str) and re.compile(
                        "^\d+\.\d+\.\d+$").match(version_in_data):
                    exceptions.raise_invalid_version_data_with_patch(
                        version_in_data)
                else:
                    if not isinstance(version_in_data, str):
                        version_in_data = "{} (it must be a string)".format(
                            str(version_in_data))
                    context["unrecognized_version_data"] = version_in_data

            if schema_ocds.version != db_data.schema_version:
                replace = True
            if schema_ocds.extensions:
                schema_ocds.create_extended_schema_file(upload_dir, upload_url)
            url = schema_ocds.extended_schema_file or schema_ocds.schema_url

            if "records" in json_data:
                context["conversion"] = None
            else:
                # Replace the spreadsheet conversion only if it exists already.
                converted_path = os.path.join(upload_dir, "flattened")
                replace_converted = replace and os.path.exists(converted_path +
                                                               ".xlsx")

                with warnings.catch_warnings():
                    warnings.filterwarnings(
                        'ignore'
                    )  # flattentool uses UserWarning, so can't set a specific category

                    convert_json_context = convert_json(
                        upload_dir,
                        upload_url,
                        file_name,
                        lib_cove_ocds_config,
                        schema_url=url,
                        replace=replace_converted,
                        request=request,
                        flatten=request.POST.get("flatten"),
                    )

                context.update(convert_json_context)

    else:
        # Use the lowest release pkg schema version accepting 'version' field
        metatab_schema_url = SchemaOCDS(
            select_version="1.1",
            lib_cove_ocds_config=lib_cove_ocds_config).pkg_schema_url
        metatab_data = get_spreadsheet_meta_data(upload_dir, file_name,
                                                 metatab_schema_url, file_type)
        if "version" not in metatab_data:
            metatab_data["version"] = "1.0"
        else:
            db_data.data_schema_version = metatab_data["version"]

        select_version = post_version_choice or db_data.schema_version
        schema_ocds = SchemaOCDS(
            select_version=select_version,
            release_data=metatab_data,
            lib_cove_ocds_config=lib_cove_ocds_config,
        )

        # Unlike for JSON data case above, do not check for missing data package
        if schema_ocds.invalid_version_argument:
            # This shouldn't happen unless the user sends random POST data.
            exceptions.raise_invalid_version_argument(post_version_choice)
        if schema_ocds.invalid_version_data:
            version_in_data = metatab_data.get("version")
            if re.compile("^\d+\.\d+\.\d+$").match(version_in_data):
                exceptions.raise_invalid_version_data_with_patch(
                    version_in_data)
            else:
                context["unrecognized_version_data"] = version_in_data

        # Replace json conversion when user chooses a different schema version.
        if db_data.schema_version and schema_ocds.version != db_data.schema_version:
            replace = True

        if schema_ocds.extensions:
            schema_ocds.create_extended_schema_file(upload_dir, upload_url)
        url = schema_ocds.extended_schema_file or schema_ocds.schema_url
        pkg_url = schema_ocds.pkg_schema_url

        context.update(
            convert_spreadsheet(
                upload_dir,
                upload_url,
                file_name,
                file_type,
                lib_cove_ocds_config,
                schema_url=url,
                pkg_schema_url=pkg_url,
                replace=replace,
            ))

        with open(context["converted_path"], encoding="utf-8") as fp:
            json_data = json.load(fp,
                                  parse_float=Decimal,
                                  object_pairs_hook=OrderedDict)

    if replace:
        if os.path.exists(validation_errors_path):
            os.remove(validation_errors_path)

    context = common_checks_ocds(context, upload_dir, json_data, schema_ocds)

    if schema_ocds.json_deref_error:
        exceptions.raise_json_deref_error(schema_ocds.json_deref_error)

    context.update({
        "data_schema_version":
        db_data.data_schema_version,
        "first_render":
        not db_data.rendered,
        "validation_errors_grouped":
        group_validation_errors(context["validation_errors"]),
    })

    schema_version = getattr(schema_ocds, "version", None)
    if schema_version:
        db_data.schema_version = schema_version
    if not db_data.rendered:
        db_data.rendered = True

    db_data.save()

    if "records" in json_data:
        ocds_show_schema = SchemaOCDS(record_pkg=True)
        ocds_show_deref_schema = ocds_show_schema.get_schema_obj(deref=True)
        template = "cove_ocds/explore_record.html"
        if hasattr(json_data, "get") and hasattr(json_data.get("records"),
                                                 "__iter__"):
            context["records"] = json_data["records"]
        else:
            context["records"] = []
        if isinstance(json_data["records"],
                      list) and len(json_data["records"]) < 100:
            context["ocds_show_data"] = ocds_show_data(json_data,
                                                       ocds_show_deref_schema)
    else:
        ocds_show_schema = SchemaOCDS(record_pkg=False)
        ocds_show_deref_schema = ocds_show_schema.get_schema_obj(deref=True)
        template = "cove_ocds/explore_release.html"
        if hasattr(json_data, "get") and hasattr(json_data.get("releases"),
                                                 "__iter__"):
            context["releases"] = json_data["releases"]
            if (isinstance(json_data["releases"], list)
                    and len(json_data["releases"]) < 100):
                context["ocds_show_data"] = ocds_show_data(
                    json_data, ocds_show_deref_schema)

            # Parse release dates into objects so the template can format them.
            for release in context["releases"]:
                if hasattr(release, "get") and release.get("date"):
                    if validate_rfc3339(release["date"]):
                        release["date"] = parser.parse(release["date"])
                    else:
                        release["date"] = None
            if context.get("releases_aggregates"):
                date_fields = [
                    "max_award_date",
                    "max_contract_date",
                    "max_release_date",
                    "max_tender_date",
                    "min_award_date",
                    "min_contract_date",
                    "min_release_date",
                    "min_tender_date",
                ]
                for field in date_fields:
                    if context["releases_aggregates"].get(field):
                        if validate_rfc3339(
                                context["releases_aggregates"][field]):
                            context["releases_aggregates"][
                                field] = parser.parse(
                                    context["releases_aggregates"][field])
                        else:
                            context["releases_aggregates"][field] = None
        else:
            context["releases"] = []

    return render(request, template, context)
def ocds_json_output(
    output_dir,
    file,
    schema_version,
    convert,
    cache_schema=False,
    file_type=None,
    json_data=None,
    lib_cove_ocds_config=None,
    record_pkg=False,
):

    if not lib_cove_ocds_config:
        lib_cove_ocds_config = LibCoveOCDSConfig()

    # cache_schema is a deprecated option - now set cache_all_requests in the config instead.
    if cache_schema:
        lib_cove_ocds_config.config["cache_all_requests"] = True

    context = {}
    if not file_type:
        file_type = get_file_type(file)
    context = {"file_type": file_type}

    if file_type == "json":
        if not json_data:
            with open(file, encoding="utf-8") as fp:
                try:
                    json_data = json.load(fp, object_pairs_hook=OrderedDict)
                except ValueError:
                    raise APIException("The file looks like invalid json")

        schema_ocds = SchemaOCDS(
            schema_version, json_data, lib_cove_ocds_config=lib_cove_ocds_config, record_pkg=record_pkg
        )

        if schema_ocds.invalid_version_data:
            msg = "\033[1;31mThe schema version in your data is not valid. Accepted values: {}\033[1;m"
            raise APIException(msg.format(str(list(schema_ocds.version_choices.keys()))))
        if schema_ocds.extensions:
            schema_ocds.create_extended_schema_file(output_dir, "")

        url = schema_ocds.extended_schema_file or schema_ocds.schema_url

        if convert:
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore")  # flattentool uses UserWarning, so we can't set a specific category

                context.update(
                    convert_json(output_dir, "", file, lib_cove_ocds_config, schema_url=url, flatten=True, cache=False)
                )

    else:
        metatab_schema_url = SchemaOCDS(select_version="1.1", lib_cove_ocds_config=lib_cove_ocds_config).pkg_schema_url
        metatab_data = get_spreadsheet_meta_data(output_dir, file, metatab_schema_url, file_type=file_type)
        schema_ocds = SchemaOCDS(schema_version, release_data=metatab_data, lib_cove_ocds_config=lib_cove_ocds_config)

        if schema_ocds.invalid_version_data:
            msg = "\033[1;31mThe schema version in your data is not valid. Accepted values: {}\033[1;m"
            raise APIException(msg.format(str(list(schema_ocds.version_choices.keys()))))
        if schema_ocds.extensions:
            schema_ocds.create_extended_schema_file(output_dir, "")

        url = schema_ocds.extended_schema_file or schema_ocds.schema_url
        pkg_url = schema_ocds.pkg_schema_url

        context.update(
            convert_spreadsheet(
                output_dir,
                "",
                file,
                file_type,
                lib_cove_ocds_config,
                schema_url=url,
                pkg_schema_url=pkg_url,
                cache=False,
            )
        )

        with open(context["converted_path"], encoding="utf-8") as fp:
            json_data = json.load(fp, object_pairs_hook=OrderedDict)

    context = context_api_transform(
        common_checks_ocds(context, output_dir, json_data, schema_ocds, api=True, cache=False)
    )

    if file_type == "xlsx":
        # Remove unwanted files in the output
        # TODO: can we do this by no writing the files in the first place?
        os.remove(os.path.join(output_dir, "heading_source_map.json"))
        os.remove(os.path.join(output_dir, "cell_source_map.json"))

    return context