Esempio n. 1
0
def test_patched_release_schema_with_release_schema_patch_url():
    url = 'https://raw.githubusercontent.com/open-contracting-extensions/ocds_coveredBy_extension/master/release-schema.json'  # noqa: E501
    builder = ProfileBuilder('1__1__4', [url])
    result = builder.patched_release_schema()

    assert '$schema' in result
    assert 'coveredBy' in result['definitions']['Tender']['properties']
Esempio n. 2
0
def test_extensions():
    builder = ProfileBuilder('1__1__4', {
        'charges': 'master',
        'location': 'v1.1.4'
    })
    result = list(builder.extensions())

    assert len(result) == 2
    assert result[0].as_dict() == {
        'id':
        'charges',
        'date':
        '',
        'version':
        'master',
        'base_url':
        'https://raw.githubusercontent.com/open-contracting-extensions/ocds_charges_extension/master/',
        'download_url':
        'https://github.com/open-contracting-extensions/ocds_charges_extension/archive/master.zip',
    }
    assert result[1].as_dict() == {
        'id':
        'location',
        'date':
        '2019-02-25',
        'version':
        'v1.1.4',
        'base_url':
        'https://raw.githubusercontent.com/open-contracting-extensions/ocds_location_extension/v1.1.4/',
        'download_url':
        'https://api.github.com/repos/open-contracting-extensions/ocds_location_extension/zipball/v1.1.4',  # noqa: E501
    }
Esempio n. 3
0
def test_patched_release_schema_with_download_url():
    url = 'https://github.com/open-contracting-extensions/ocds_coveredBy_extension/archive/master.zip'
    builder = ProfileBuilder('1__1__4', [url])
    result = builder.patched_release_schema()

    assert '$schema' in result
    assert 'coveredBy' in result['definitions']['Tender']['properties']
Esempio n. 4
0
def test_patched_release_schema_with_absolute_path():
    url = Path(path('ocds_coveredBy_extension')).resolve().as_uri()
    builder = ProfileBuilder('1__1__4', [url])
    result = builder.patched_release_schema()

    assert '$schema' in result
    assert 'coveredBy' in result['definitions']['Tender']['properties']
Esempio n. 5
0
def test_get_standard_file_contents():
    builder = ProfileBuilder('1__1__4', {})
    data = builder.get_standard_file_contents('release-schema.json')
    # Repeat requests should return the same result.
    data = builder.get_standard_file_contents('release-schema.json')

    assert json.loads(data)
Esempio n. 6
0
def test_patched_release_schema_with_schema_base_url():
    schema_base_url = 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/'
    builder = ProfileBuilder('1__1__4', {}, schema_base_url=schema_base_url)
    result = builder.patched_release_schema()

    # Changes `id`.
    assert result[
        'id'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-schema.json'  # noqa: E501
Esempio n. 7
0
def test_merge_with_schema():
    builder = ProfileBuilder('1__1__4', {'additionalContactPoint': 'master'})
    schema = builder.patched_release_schema()

    data = json.loads(
        read('release-package_additional-contact-points.json'))['releases']
    compiled_release = list(merge(data, schema=schema))[0]

    assert compiled_release == json.loads(read('compile_extensions.json'))
Esempio n. 8
0
def test_patched_release_schema_with_extension_field():
    builder = ProfileBuilder('1__1__4', {'location': 'v1.1.4'})
    result = builder.patched_release_schema(extension_field='extension')

    definition = result['definitions']['Location']
    assert definition['extension'] == 'Location'
    assert definition['properties']['geometry']['extension'] == 'Location'
    assert definition['properties']['geometry']['properties']['type'][
        'extension'] == 'Location'
Esempio n. 9
0
def test_release_package_schema_with_schema_base_url():
    schema_base_url = 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/'
    builder = ProfileBuilder('1__1__4', {}, schema_base_url=schema_base_url)
    result = builder.release_package_schema()

    # Changes `id` and `$ref`.
    assert result[
        'id'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-package-schema.json'  # noqa: E501
    assert result['properties']['releases']['items'][
        '$ref'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-schema.json'  # noqa: E501
Esempio n. 10
0
    def get_patched_schema(self):
        schema_response = requests.get(self.schema_url)
        schema = schema_response.json()

        builder = ProfileBuilder(None, self.extensions_info.extension_urls)
        schema = builder.patched_release_schema(
            schema=schema, extension_field=self.extension_field)
        schema = jsonref.JsonRef.replace_refs(schema)
        with open('release-schema.json', 'w') as f:
            jsonref.dump(schema, f)
        return schema
Esempio n. 11
0
def test_patched_release_schema_with_extension_field_and_language():
    builder = ProfileBuilder('1__1__4', [
        'https://extensions.open-contracting.org/en/extensions/location/master/'
    ])
    result = builder.patched_release_schema(extension_field='extension',
                                            language='es')

    definition = result['definitions']['Location']
    assert definition['extension'] == 'Ubicación'
    assert definition['properties']['geometry']['extension'] == 'Ubicación'
    assert definition['properties']['geometry']['properties']['type'][
        'extension'] == 'Ubicación'
Esempio n. 12
0
def test_extension_codelists(caplog):
    caplog.set_level(logging.INFO, logger='ocdsextensionregistry')

    # Note: We can't yet test, using real data, whether an error is raised if a codelist replacement either doesn't
    # contain added codes, or contains removed codes. If we were to use test data, we could create a test registry
    # and test extensions, or mock HTTP requests…. For now, additions were tested manually. We also can't yet test
    # whether an error is raised if two codelist replacements differ.

    # charges and tariffs both have chargePaidBy.csv, but the content is identical, so should not error. ppp has
    # documentType.csv and tariffs has +documentType.csv, but documentType.csv contains the codes added by
    # +documentType.csv, so should not error. ppp and enquiries both have +partyRole.csv.
    builder = ProfileBuilder(
        '1__1__4',
        {
            'https://raw.githubusercontent.com/open-contracting-extensions/ocds_ppp_extension/70c5cb759d4739d1eca5db832e723afb69bbdae0/',  # noqa: E501
            'https://github.com/open-contracting-extensions/ocds_enquiry_extension/archive/v1.1.4.zip',
            'https://github.com/open-contracting-extensions/ocds_charges_extension/archive/master.zip',
            'https://github.com/open-contracting-extensions/ocds_tariffs_extension/archive/1.1.zip',
        })
    result = sorted(builder.extension_codelists())
    plus_party_role = next(codelist for codelist in result
                           if codelist.name == '+partyRole.csv')

    # Collects codelists.
    assert len(result) == 9
    assert [codelist.name for codelist in result] == sorted([
        '+milestoneType.csv',
        '+partyRole.csv',
        '+releaseTag.csv',
        '-partyRole.csv',
        'documentType.csv',
        'initiationType.csv',
    ] + new_extension_codelists)

    # Preserves content.
    assert result[0].name == '+milestoneType.csv'
    assert len(result[0]) == 2
    assert len(result[0][0]) == 4
    assert result[0][0]['Code'] == 'procurement'
    assert result[0][0]['Title'] == 'Procurement'
    assert result[0][0]['Description'].startswith(
        'Events taking place during the procurement which are not ')
    assert result[0][0]['Source'] == ''

    # Combines codelist additions and removals.
    assert len(plus_party_role) == 13
    assert sorted(plus_party_role)[-1]['Code'] == 'socialWitness'

    # Logs ignored codelists.
    assert len(caplog.records) == 1
    assert caplog.records[-1].levelname == 'INFO'
    assert caplog.records[
        -1].message == 'documentType.csv has the codes added by +documentType.csv - ignoring +documentType.csv'  # noqa: E501
Esempio n. 13
0
def test_patched_release_schema():
    # Use the ppp extension to test null values.
    builder = ProfileBuilder(
        '1__1__5',
        {
            'https://raw.githubusercontent.com/open-contracting-extensions/ocds_ppp_extension/70c5cb759d4739d1eca5db832e723afb69bbdae0/',  # noqa: E501
            'https://github.com/open-contracting-extensions/ocds_location_extension/archive/v1.1.5.zip',
        })
    result = builder.patched_release_schema()

    # Patches core.
    assert '$schema' in result
    assert 'Location' in result['definitions']

    # Removes null'ed fields.
    assert 'buyer' not in result['properties']
Esempio n. 14
0
def test_release_schema_patch():
    # Use the ppp extension to test null values.
    builder = ProfileBuilder(
        '1__1__4',
        {
            'https://raw.githubusercontent.com/open-contracting-extensions/ocds_ppp_extension/70c5cb759d4739d1eca5db832e723afb69bbdae0/',  # noqa: E501
            'https://github.com/open-contracting-extensions/ocds_location_extension/archive/v1.1.4.zip',
        })
    result = builder.release_schema_patch()

    # Merges patches.
    assert 'Location' in result['definitions']

    # Preserves null values.
    assert result['properties']['buyer'] is None
    assert 'REPLACE_WITH_NULL' not in json.dumps(result)
Esempio n. 15
0
    def parse_schema(self, input_format, schema=None):
        if schema:
            schema = resolve_file_uri(schema)
        if "release" in input_format:
            pkg_type = "releases"
            getter = attrgetter("release_package_schema")
        else:
            pkg_type = "records"
            getter = attrgetter("record_package_schema")
        url = DEFAULT_SCHEMA_URL[pkg_type].get(
            self.language[:2], DEFAULT_SCHEMA_URL[pkg_type]["en"])
        if not schema:
            LOGGER.info(
                _("No schema provided, using version {}").format(
                    CURRENT_SCHEMA_TAG))
            profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {},
                                     schema_base_url=url)
            schema = getter(profile)()
        title = schema.get("title", "").lower()
        if not title:
            raise ValueError(
                _("Incomplete schema, please make sure your data is correct"))
        if "package" in title:
            # TODO: is is a good way to get release/record schema
            schema = jsonref.JsonRef.replace_refs(schema)
            schema = schema["properties"][pkg_type]["items"]

        self.schema = schema
        self.pkg_type = pkg_type
Esempio n. 16
0
def test_record_package_schema_with_schema_base_url_and_embed():
    schema_base_url = 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/'
    builder = ProfileBuilder('1__1__4', {}, schema_base_url=schema_base_url)
    result = builder.record_package_schema(embed=True)

    # Changes `id` and `$ref`.
    assert result[
        'id'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/record-package-schema.json'  # noqa: E501
    assert result['definitions']['record']['properties']['compiledRelease'][
        'id'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-schema.json'  # noqa: E501
    assert result['definitions']['record']['properties']['releases']['oneOf'][
        1]['items'][
            'id'] == 'https://standard.open-contracting.org/profiles/ppp/schema/1__0__0__beta/release-schema.json'  # noqa: E501
    assert '$ref' not in result['definitions']['record']['properties'][
        'compiledRelease']
    assert '$ref' not in result['definitions']['record']['properties'][
        'releases']['oneOf'][1]['items']
Esempio n. 17
0
def test_standard_codelists():
    builder = ProfileBuilder('1__1__4', {})
    result = builder.standard_codelists()

    # Collects codelists.
    assert len(result) == 19
    assert [codelist.name for codelist in result] == standard_codelists

    # Preserves content.
    assert result[0].name == 'awardCriteria.csv'
    assert len(result[0]) == 8
    assert len(result[0][0]) == 4
    assert result[0][0]['Code'] == 'priceOnly'
    assert result[0][0]['Title'] == 'Price only'
    assert result[0][0]['Description'].startswith(
        'The award will be made to the qualified bid with the lowest ')
    assert result[0][0]['Deprecated'] == ''
Esempio n. 18
0
def test_patched_codelists(caplog):
    caplog.set_level(logging.INFO, logger='ocdsextensionregistry')

    builder = ProfileBuilder(
        '1__1__4',
        [
            'https://raw.githubusercontent.com/open-contracting-extensions/ocds_ppp_extension/70c5cb759d4739d1eca5db832e723afb69bbdae0/',  # noqa: E501
            'https://github.com/open-contracting-extensions/ocds_charges_extension/archive/master.zip',
            'https://github.com/open-contracting-extensions/ocds_tariffs_extension/archive/1.1.zip',
        ])
    result = builder.patched_codelists()
    party_role = next(codelist for codelist in result
                      if codelist.name == 'partyRole.csv')
    initiation_type = next(codelist for codelist in result
                           if codelist.name == 'initiationType.csv')

    # Collects codelists.
    assert len(result) == 22
    assert [codelist.name for codelist in result
            ] == standard_codelists + new_extension_codelists

    # Preserves content.
    assert result[0].name == 'awardCriteria.csv'
    assert len(result[0]) == 8
    assert len(result[0][0]) == 4
    assert result[0][0]['Code'] == 'priceOnly'
    assert result[0][0]['Title'] == 'Price only'
    assert result[0][0]['Description'].startswith(
        'The award will be made to the qualified bid with the lowest ')
    assert result[0][0]['Deprecated'] == ''

    # Adds codes.
    assert any(row['Code'] == 'publicAuthority' for row in party_role)

    # Removes codes.
    assert not any(row['Code'] == 'buyer' for row in party_role)

    # Replaces list.
    assert all(row['Code'] == 'ppp' for row in initiation_type)

    # Logs ignored codelists.
    assert len(caplog.records) == 1
    assert caplog.records[-1].levelname == 'INFO'
    assert caplog.records[
        -1].message == 'documentType.csv has the codes added by +documentType.csv - ignoring +documentType.csv'  # noqa: E501
Esempio n. 19
0
def get_schema(language, pkg_type):
    url = DEFAULT_SCHEMA_URL[pkg_type][language]
    getter = attrgetter(
        "release_package_schema") if "releases" in pkg_type else attrgetter(
            "record_package_schema")
    profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {}, schema_base_url=url)
    schema = getter(profile)()
    title = schema.get("title", "").lower()
    if "package" in title:
        schema = jsonref.JsonRef.replace_refs(schema)
        schema = schema["properties"][pkg_type]["items"]
    return schema
Esempio n. 20
0
    def handle(self):
        with open(self.args.file) as f:
            schema = json.load(f)

        if self.args.extension:
            builder = ProfileBuilder(None, self.args.extension)
            schema = builder.patched_release_schema(
                schema=schema, extension_field=self.args.extension_field)

        base_uri = pathlib.Path(os.path.realpath(self.args.file)).as_uri()
        if not self.args.no_replace_refs:
            schema = jsonref.JsonRef.replace_refs(schema, base_uri=base_uri)

        try:
            mapping_sheet(schema,
                          sys.stdout,
                          order_by=self.args.order_by,
                          infer_required=self.args.infer_required,
                          extension_field=self.args.extension_field,
                          include_deprecated=not self.args.no_deprecated,
                          include_definitions=self.args.no_replace_refs)
        except MissingColumnError as e:
            raise CommandError(str(e)) from e
Esempio n. 21
0
def merge(data,
          uri='',
          publisher=None,
          published_date='',
          version=DEFAULT_VERSION,
          schema=None,
          return_versioned_release=False,
          return_package=False,
          use_linked_releases=False,
          streaming=False):
    """
    Merges release packages and individual releases.

    By default, yields compiled releases. If ``return_versioned_release`` is ``True``, yields versioned releases. If
    ``return_package`` is ``True``, wraps the compiled releases (and versioned releases if ``return_versioned_release``
    is ``True``) in a record package.

    If ``return_package`` is set and ``publisher`` isn't set, the output record package will have the same publisher as
    the last input release package.

    :param data: an iterable of release packages and individual releases
    :param str uri: if ``return_package`` is ``True``, the record package's ``uri``
    :param dict publisher: if ``return_package`` is ``True``, the record package's ``publisher``
    :param str published_date: if ``return_package`` is ``True``, the record package's ``publishedDate``
    :param str version: if ``return_package`` is ``True``, the record package's ``version``
    :param dict schema: the URL, path or dict of the patched release schema to use
    :param bool return_package: wrap the compiled releases in a record package
    :param bool use_linked_releases: if ``return_package`` is ``True``, use linked releases instead of full releases,
        if the input is a release package
    :param bool return_versioned_release: if ``return_package`` is ``True``, include versioned releases in the record
        package; otherwise, yield versioned releases instead of compiled releases
    :param bool streaming: if ``return_package`` is ``True``, set the package's records to a generator (this only works
        if the calling code exhausts the generator before ``merge`` returns)
    :raises InconsistentVersionError: if the versions are inconsistent across packages to merge
    :raises MissingOcidKeyError: if the release is missing an ``ocid`` field
    """
    with Packager() as packager:
        packager.add(data)

        if not schema and packager.version:
            prefix = packager.version.replace('.', '__') + '__'
            tag = next(tag for tag in reversed(get_tags())
                       if tag.startswith(prefix))
            schema = get_release_schema_url(tag)

            if packager.package['extensions']:
                builder = ProfileBuilder(tag,
                                         list(packager.package['extensions']))
                schema = builder.patched_release_schema()

        merger = Merger(schema)

        if return_package:
            packager.package['uri'] = uri
            packager.package['publishedDate'] = published_date
            packager.package['version'] = version
            if publisher:
                packager.package['publisher'] = publisher

            yield from packager.output_package(
                merger,
                return_versioned_release=return_versioned_release,
                use_linked_releases=use_linked_releases,
                streaming=streaming)
        else:
            yield from packager.output_releases(
                merger, return_versioned_release=return_versioned_release)
Esempio n. 22
0
def get_extended_mapping_sheet(extensions, version):
    builder = ProfileBuilder(version, extensions)
    schema = jsonref.JsonRef.replace_refs(builder.patched_release_schema())
    return _get_mapping_sheet(schema)
import json
import sys

import requests
from ocdsextensionregistry import ProfileBuilder

url = 'https://raw.githubusercontent.com/open-contracting-extensions/european-union/latest/docs/extension_versions.json'  # noqa: E501

builder = ProfileBuilder('1__1__5', requests.get(url).json())

schema = builder.patched_release_schema(extension_field='extension')

json.dump(schema, sys.stdout, ensure_ascii=False, indent=2)
Esempio n. 24
0
def update(ppp_base_url):
    """
    Aligns OC4IDS with OCDS. It uses OCDS for PPPs as a basis, as it includes most definitions and codelists needed in
    OC4IDS. It copies definitions and codelists across, making modifications as required.

    Run this command for every release of OCDS for PPPs, review any changes to schemas or codelists, and update the
    command as needed.

    Some OC4IDS-specific definitions have fields with the same names as in OCDS-specific definitions, notably:

    - procurementMethod
    - procurementMethodDetails
    - tenderers

    The descriptions of most other such fields have diverged. As such, the command makes no effort to copy the
    descriptions of such fields, and instead leaves this up to the editor.
    """
    def copy_def(definition, replacements=None):
        value = deepcopy(ppp_schema['definitions'][definition])
        schema['definitions'][definition] = value
        if replacements:
            for keys, replacement in replacements.items():
                leaf = keys[-1]
                for key in keys[:-1]:
                    value = value[key]
                value[leaf] = replacement(value[leaf])

    ocds_base_url = 'https://standard.open-contracting.org/1.1/en/'

    builder = ProfileBuilder('1__1__5', {'budget': 'master'})
    ppp_schema = get(f'{ppp_base_url}release-schema.json').json(
        object_pairs_hook=OrderedDict)
    ppp_schema = builder.patched_release_schema(schema=ppp_schema)

    schema_dir = basedir / 'schema' / 'project-level'
    codelists_dir = schema_dir / 'codelists'

    with (schema_dir / 'project-schema.json').open() as f:
        schema = json.load(f, object_pairs_hook=OrderedDict)

    infra_codelists = {
        'contractingProcessStatus.csv',
        'contractNature.csv',
        'metricID.csv',
        'modificationType.csv',
        'projectSector.csv',
        'projectStatus.csv',
        'projectType.csv',
        'relatedProjectScheme.csv',
        'relatedProject.csv',
    }
    ocds_codelists = {
        'currency.csv',
        'documentType.csv',
        'geometryType.csv',
        'locationGazetteers.csv',
        'method.csv',
        'partyRole.csv',
        'releaseTag.csv',
        'unitClassificationScheme.csv',
    }
    compare([path.name for path in codelists_dir.iterdir()], infra_codelists,
            ocds_codelists, 'schema/project-level/codelists', 'codelists')

    infra_definitions = {
        'ContractingProcess',
        'ContractingProcessSummary',  # Similar to individual release in OCDS
        'LinkedRelease',  # Similar to linked release in OCDS
        'Modification',
        'RelatedProject',  # Similar to relatedProcess in OCDS
        'Person',
    }
    ocds_definitions = {
        'Period',
        'Classification',
        'Location',
        'Value',
        'Organization',
        'OrganizationReference',
        'Address',
        'ContactPoint',
        'BudgetBreakdown',
        'Document',
        'Identifier',
        'Metric',
        'Observation',
        'Transaction',
    }
    compare(schema['definitions'], infra_definitions, ocds_definitions,
            'schema/project-level/project-schema.json#/definitions',
            'definitions')

    # Originally from https://docs.google.com/spreadsheets/d/1ttXgMmmLvqBlPRi_4jAJhIobjnCiwMv13YwGfFOnoJk/edit#gid=0
    ignore = {
        # https://github.com/open-contracting/infrastructure/issues/269
        'finalAudit',
        # https://github.com/open-contracting/standard/issues/870
        'contractSchedule',
        # PPP-specific code or description
        'needsAssessment',
        'projectAdditionality',
        'financeAdditionality',
        'pppModeRationale',
        'riskComparison',
        'discountRate',
        'equityTransferCaps',
        'financeArrangements',
        'guaranteeReports',
        'grants',
        'servicePayments',
        'landTransfer',
        'assetTransfer',
        'revenueShare',
        'otherGovernmentSupport',
        'tariffMethod',
        'tariffReview',
        'tariffs',
        'tariffIllustration',
        'handover',
        'financialStatement',
    }

    # Copy the OCDS codelists.
    for basename in ocds_codelists:
        path = schema_dir / 'codelists' / basename

        if basename in ('documentType.csv', 'partyRole.csv'):
            with open(path) as f:
                reader = csv.DictReader(f)
                fieldnames = reader.fieldnames

                oc4ids_rows = []
                oc4ids_codes = []
                for row in reader:
                    if row['Source'] == 'OC4IDS':
                        oc4ids_rows.append(row)
                        oc4ids_codes.append(row['Code'])

        with open(path, 'w') as f:
            if basename == 'documentType.csv':
                io = StringIO()
                writer = csv.DictWriter(io,
                                        fieldnames,
                                        lineterminator='\n',
                                        extrasaction='ignore')
                writer.writeheader()
                seen = []

                # Add codes from OCDS for PPPs.
                reader = csv_reader(f'{ppp_base_url}codelists/{basename}')
                for row in reader:
                    if row['Code'] not in ignore:
                        seen.append(row['Code'])
                        # These codes' descriptions are entirely new.
                        if row['Code'] in ('environmentalImpact', ):
                            row = next(oc4ids_row for oc4ids_row in oc4ids_rows
                                       if oc4ids_row['Code'] == row['Code'])
                        else:
                            edit_code(row, oc4ids_codes, 'OCDS for PPPs')
                        writer.writerow(row)

                # Add codes from OCDS.
                reader = csv_reader(
                    f'{ocds_base_url}codelists/documentType.csv')
                for row in reader:
                    if row['Code'] not in seen and row['Code'] not in ignore:
                        seen.append(row['Code'])
                        edit_code(row, oc4ids_codes, 'OCDS')
                        writer.writerow(row)

                # Add pre-existing codes from OC4IDS.
                writer.writerows(row for row in oc4ids_rows
                                 if row['Code'] not in seen)

                text = io.getvalue()
            elif basename == 'partyRole.csv':
                io = StringIO()
                writer = csv.DictWriter(io,
                                        fieldnames,
                                        lineterminator='\n',
                                        extrasaction='ignore')
                writer.writeheader()
                seen = []

                # Add codes from OCDS.
                reader = csv_reader(f'{ocds_base_url}codelists/partyRole.csv')
                for row in reader:
                    if row['Code'] not in seen:
                        seen.append(row['Code'])
                        edit_code(row, oc4ids_codes, 'OCDS')
                        writer.writerow(row)

                # Add pre-existing codes from OC4IDS.
                writer.writerows(row for row in oc4ids_rows
                                 if row['Code'] not in seen)

                text = io.getvalue()
            else:
                text = get(f'{ppp_base_url}codelists/{basename}').text

            f.write(text)

    # The following definitions follow the same order as in project-schema.json.

    copy_def(
        'Period',
        {
            # Refer to project.
            (
                'description', ):
            lambda s: s.replace('contracting process',
                                'project or contracting process'),
        })

    copy_def(
        'Classification',
        {
            # Remove line item classifications from the definition.
            ('properties', 'scheme', 'description'):
            lambda s: s[:s.index(' For line item classifications,')],
        })
    # Remove the `itemClassificationScheme.csv` codelist.
    del (schema['definitions']['Classification']['properties']['scheme']
         ['codelist'])
    del (schema['definitions']['Classification']['properties']['scheme']
         ['openCodelist'])

    copy_def('Location')
    # noqa: Original from ocds_location_extension:     "The location where activity related to this tender, contract or license will be delivered, or will take place. A location can be described by either a geometry (point location, line or polygon), or a gazetteer entry, or both."
    schema['definitions']['Location'][
        'description'] = "The location where activity related to this project will be delivered, or will take place. A location may be described using a geometry (point location, line or polygon), a gazetteer entry, an address, or a combination of these."  # noqa: E501
    # Add id to Location.
    schema['definitions']['Location']['properties']['id'] = {
        'title': 'Identifier',
        'description':
        'A local identifier for this location, unique within the array this location appears in.',
        'type': 'string',
        'minLength': 1,
    }
    # Add address to Location.
    schema['definitions']['Location']['properties']['address'] = {
        'title': 'Address',
        'description': 'A physical address where works will take place.',
        '$ref': '#/definitions/Address',
    }
    schema['definitions']['Location']['properties'].move_to_end('id',
                                                                last=False)
    schema['definitions']['Location']['required'] = ['id']

    # Set stricter validation on gazetteer identifiers
    schema['definitions']['Location']['properties']['gazetteer']['properties'][
        'identifiers']['uniqueItems'] = True

    copy_def('Value')

    copy_def(
        'Organization',
        {
            # Refer to project instead of contracting process, link to infrastructure codelist instead of PPP codelist.
            ('properties', 'roles', 'description'):
            lambda s: s.replace('contracting process', 'project').replace(
                'profiles/ppp/latest/en/',
                'infrastructure/{{version}}/{{lang}}/')  # noqa: E501
        })
    # Remove unneeded extensions and details from Organization.
    del (schema['definitions']['Organization']['properties']['shareholders'])
    del (schema['definitions']['Organization']['properties']
         ['beneficialOwnership'])
    del (schema['definitions']['Organization']['properties']['details'])

    # Set stricter validation on party roles
    schema['definitions']['Organization']['properties']['roles'][
        'uniqueItems'] = True

    # Add `people` property to OrganizationReference
    schema['definitions']['Organization']['properties']['people'] = {
        "title": "People",
        "description":
        "People associated with, representing, or working on behalf of this organization in respect of this project.",  # noqa: E501
        "type": "array",
        "items": {
            "$ref": "#/definitions/Person"
        },
        "uniqueItems": True
    }

    copy_def('OrganizationReference')

    copy_def('Address')

    copy_def(
        'ContactPoint',
        {
            # Refer to project instead of contracting process.
            ('properties', 'name', 'description'):
            lambda s: s.replace('contracting process', 'project'),
        })

    copy_def('BudgetBreakdown')

    copy_def(
        'Document',
        {
            # Link to infrastructure codelist instead of PPP codelist
            ('properties', 'documentType', 'description'):
            lambda s: s.
            replace('profiles/ppp/latest/en/',
                    'infrastructure/{{version}}/{{lang}}/'),  # noqa: E501
        })
    # noqa: Original from standard:                                                 "A short description of the document. We recommend descriptions do not exceed 250 words. In the event the document is not accessible online, the description field can be used to describe arrangements for obtaining a copy of the document.",
    schema['definitions']['Document']['properties']['description'][
        'description'] = "Where a link to a full document is provided, the description should provide a 1 - 3 paragraph summary of the information the document contains, and the `pageStart` field should be used to make sure readers can find the correct section of the document containing more information. Where there is no linked document available, the description field may contain all the information required by the current `documentType`. \n\nLine breaks in text (represented in JSON using `\\n\\n`) must be respected by systems displaying this information, and systems may also support basic HTML tags (H1-H6, B, I, U, strong, A and optionally IMG) or [markdown syntax](https://github.com/adam-p/markdown-here/wiki/Markdown-Cheatsheet) for formatting. "  # noqa: E501
    # noqa: Original from standard:                                         " direct link to the document or attachment. The server providing access to this document should be configured to correctly report the document mime type."
    schema['definitions']['Document']['properties']['url'][
        'description'] = "This should be a direct link to the document or web page where the information described by the current documentType exists."  # noqa: E501

    copy_def('Identifier')

    copy_def(
        'Metric', {
            ('properties', 'id', 'description'):
            lambda s: s.replace('contracting process',
                                'contracting process or project')
        }),  # noqa: E501

    schema['definitions']['Metric'][
        'description'] = "Metrics are used to set out forecast and actual metrics targets for a project: for example, planned and actual physical and financial progress over time."  # noqa: E501
    # noqa: Original from standard: "Metrics are used to set out targets and results from a contracting process. During the planning and tender sections, a metric indicates the anticipated results. In award and contract sections it indicates the awarded/contracted results. In the implementation section it is used to provide updates on actually delivered results, also known as outputs."

    copy_def('Observation')
    # Remove the `relatedImplementationMilestone` property
    del (schema['definitions']['Observation']['properties']
         ['relatedImplementationMilestone'])

    copy_def('Transaction')

    remove_null_and_pattern_properties(schema)
    remove_integer_identifier_types(schema)
    remove_deprecated_properties(schema)
    add_validation_properties(schema)

    with (schema_dir / 'project-schema.json').open('w') as f:
        json.dump(schema, f, ensure_ascii=False, indent=2)
        f.write('\n')
Esempio n. 25
0
File: cli.py Progetto: lttga/test2
def cli(
    filename,
    schema,
    selection,
    split,
    threshold,
    state_file,
    xlsx,
    csv,
    combine,
    unnest,
    unnest_file,
    only,
    only_file,
    repeat,
    repeat_file,
    count,
    human,
    language,
):
    """Spoonbill cli entry point"""
    click.echo(_("Detecting input file format"))
    # TODO: handle line separated json
    # TODO: handle single release/record
    (
        input_format,
        _is_concatenated,
        _is_array,
    ) = detect_format(filename)
    if csv:
        csv = pathlib.Path(csv).resolve()
        if not csv.exists():
            raise click.BadParameter(
                _("Desired location {} does not exists").format(csv))
    if xlsx:
        xlsx = pathlib.Path(xlsx).resolve()
        if not xlsx.parent.exists():
            raise click.BadParameter(
                _("Desired location {} does not exists").format(xlsx.parent))
    click.echo(
        _("Input file is {}").format(click.style(input_format, fg="green")))
    is_package = "package" in input_format
    combine_choice = combine if combine else ""
    if not is_package:
        # TODO: fix this
        click.echo("Single releases are not supported by now")
        return
    if schema:
        schema = resolve_file_uri(schema)
    if "release" in input_format:
        root_key = "releases"
        if not schema:
            click.echo(
                _("No schema provided, using version {}").format(
                    click.style(CURRENT_SCHEMA_TAG, fg="cyan")))
            profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {})
            schema = profile.release_package_schema()
    else:
        root_key = "records"
        if not schema:
            click.echo(
                _("No schema provided, using version {}").format(
                    click.style(CURRENT_SCHEMA_TAG, fg="cyan")))
            profile = ProfileBuilder(CURRENT_SCHEMA_TAG, {})
            schema = profile.record_package_schema()
    title = schema.get("title", "").lower()
    if not title:
        raise ValueError(
            _("Incomplete schema, please make sure your data is correct"))
    if "package" in title:
        # TODO: is is a good way to get release/record schema
        schema = schema["properties"][root_key]["items"]

    path = pathlib.Path(filename)
    workdir = path.parent
    filename = path.name
    selection = selection or ROOT_TABLES.keys()
    combine = combine or COMBINED_TABLES.keys()
    root_tables = get_selected_tables(ROOT_TABLES, selection)
    combined_tables = get_selected_tables(COMBINED_TABLES, combine)

    if state_file:
        click.secho(_("Restoring from provided state file"), bold=True)
        analyzer = FileAnalyzer(workdir, state_file=state_file)
    else:
        click.secho(
            _("State file not supplied, going to analyze input file first"),
            bold=True)
        analyzer = FileAnalyzer(
            workdir,
            schema=schema,
            root_key=root_key,
            root_tables=root_tables,
            combined_tables=combined_tables,
            language=language,
            table_threshold=threshold,
        )
        click.echo(_("Analyze options:"))
        click.echo(
            _(" - table threshold => {}").format(
                click.style(str(threshold), fg="cyan")))
        click.echo(
            _(" - language        => {}").format(
                click.style(language, fg="cyan")))
        click.echo(
            _("Processing file: {}").format(click.style(str(path), fg="cyan")))
        total = path.stat().st_size
        progress = 0
        # Progress bar not showing with small files
        # https://github.com/pallets/click/pull/1296/files
        with click.progressbar(width=0,
                               show_percent=True,
                               show_pos=True,
                               length=total) as bar:
            for read, number in analyzer.analyze_file(filename,
                                                      with_preview=True):
                bar.label = ANALYZED_LABEL.format(
                    click.style(str(number), fg="cyan"))
                bar.update(read - progress)
                progress = read
        click.secho(_("Done processing. Analyzed objects: {}").format(
            click.style(str(number + 1), fg="red")),
                    fg="green")
        state_file = pathlib.Path(f"{filename}.state")
        state_file_path = workdir / state_file
        click.echo(
            _("Dumping analyzed data to '{}'").format(
                click.style(str(state_file_path.absolute()), fg="cyan")))
        analyzer.dump_to_file(state_file)

    click.echo(
        _("Flattening file: {}").format(click.style(str(path), fg="cyan")))

    if unnest and unnest_file:
        raise click.UsageError(
            _("Conflicting options: unnest and unnest-file"))
    if repeat and repeat_file:
        raise click.UsageError(
            _("Conflicting options: repeat and repeat-file"))
    if only and only_file:
        raise click.UsageError(_("Conflicting options: only and only-file"))

    options = {"selection": {}, "count": count}
    unnest = read_option_file(unnest, unnest_file)
    repeat = read_option_file(repeat, repeat_file)
    only = read_option_file(only, only_file)

    for name in selection:
        table = analyzer.spec[name]
        if table.total_rows == 0:
            click.echo(
                _("Ignoring empty table {}").format(click.style(name,
                                                                fg="red")))
            continue

        unnest = [col for col in unnest if col in table.combined_columns]
        if unnest:
            click.echo(
                _("Unnesting columns {} for table {}").format(
                    click.style(",".join(unnest), fg="cyan"),
                    click.style(name, fg="cyan")))

        only = [col for col in only if col in table]
        if only:
            click.echo(
                _("Using only columns {} for table {}").format(
                    click.style(",".join(only), fg="cyan"),
                    click.style(name, fg="cyan")))

        repeat = [col for col in repeat if col in table]
        if repeat:
            click.echo(
                _("Repeating columns {} in all child table of {}").format(
                    click.style(",".join(repeat), fg="cyan"),
                    click.style(name, fg="cyan")))

        options["selection"][name] = {
            "split": split or analyzer.spec[name].should_split,
            "pretty_headers": human,
            "unnest": unnest,
            "only": only,
            "repeat": repeat,
        }
    options = FlattenOptions(**options)
    flattener = FileFlattener(
        workdir,
        options,
        analyzer.spec.tables,
        root_key=root_key,
        csv=csv,
        xlsx=xlsx,
        language=language,
    )

    all_tables = chain([table for table in flattener.flattener.tables.keys()],
                       combine_choice)

    click.echo(
        _("Going to export tables: {}").format(
            click.style(",".join(all_tables), fg="magenta")))

    click.echo(_("Processed tables:"))
    for table in flattener.flattener.tables.keys():
        message = _("{}: {} rows").format(
            table, flattener.flattener.tables[table].total_rows)
        if not flattener.flattener.tables[table].is_root:
            message = "└-----" + message
            click.echo(message)
        else:
            click.echo(message)
    click.echo(_("Flattening input file"))
    with click.progressbar(
            flattener.flatten_file(filename),
            length=analyzer.spec.total_items + 1,
            width=0,
            show_percent=True,
            show_pos=True,
    ) as bar:
        for count in bar:
            bar.label = FLATTENED_LABEL.format(
                click.style(str(count + 1), fg="cyan"))

    click.secho(_("Done flattening. Flattened objects: {}").format(
        click.style(str(count + 1), fg="red")),
                fg="green")