def get_removed_fields(extension_version, lang):
    """
    Returns a dictionary of deprecation status and field tables. Each table is a list of fields. Each field is a
    dictionary with "definition_path", "path" and "url" (if available) keys. All values are translated.
    """
    tables = defaultdict(list)

    schema = _patch_schema(extension_version, 'en', include_test_dependencies=True)
    sources = _get_sources(schema, lang)

    for field in get_schema_fields(extension_version['schemas']['release-schema.json'][lang]):
        # If the field isn't removed.
        if field.schema is not None:
            continue

        original_field = _add_link_to_original_field(field, schema, sources)

        if original_field.get('deprecated'):
            group = 'deprecated'
        else:
            group = 'active'

        d = field.asdict(exclude=('definition_pointer', 'pointer', 'schema', 'required', 'deprecated', 'multilingual'))
        tables[group].append(d)

    return tables
def get_schema_tables(extension_version, lang):
    """
    Returns a dictionary of definition names and field tables. Each table is a list of fields. Each field is a
    dictionary with "definition_path", "path", "schema", "multilingual", "title", "description", "types" and "source"
    (if available) keys. All values are translated.

    The "description" (rendered from Markdown) and "types" values may contain HTML. The "description" includes any
    deprecation information.
    """
    tables = {}

    if not extension_version['schemas']['release-schema.json']:
        return tables

    schema = _patch_schema(extension_version, 'en', include_test_dependencies=True)
    sources = _get_sources(schema, lang)

    for field in get_schema_fields(extension_version['schemas']['release-schema.json'][lang]):
        # If the field is removed.
        if field.schema is None:
            continue

        key = field.definition_path
        if not key:
            key = 'Release'

        if key not in tables:
            tables[key] = {'fields': []}
            if field.definition_path in sources:
                tables[key]['source'] = sources[field.definition_path]

        try:
            _add_link_to_original_field(field, schema, sources)
        except jsonpointer.JsonPointerException:
            pass

        d = field.asdict(sep='.', exclude=('definition_pointer', 'pointer', 'required', 'deprecated'))
        d['title'] = field.schema.get('title', '')
        d['description'] = markdown(field.schema.get('description', ''))
        d['types'] = gettext(' or ').join(_get_types(field.schema, sources, extension_version, lang))
        tables[key]['fields'].append(d)

    return tables
Example #3
0
def mapping_sheet(schema, io, order_by=None, infer_required=False, extension_field=None, include_deprecated=True,
                  include_definitions=False):
    """
    Writes information about all field paths in a JSON Schema to a CSV file.

    :param dict schema: a JSON schema
    :param io: a file-like object to which to write the rows
    :param str order_by: the column by which to sort the rows
    :param bool infer_required: whether to infer that a field is required if "null" is not in its ``type``
    :param str extension_field: the property in the JSON schema containing the name of the extension in which each
                                field was defined
    :param bool include_deprecated: whether to include any deprecated fields
    :param bool include_definitions: whether to traverse the "definitions" property

    The CSV's columns are:

    :``section``: The first part of the JSON path to the field in the data, e.g. ``tender``
    :``path``: The JSON path to the field in the data, e.g. ``tender/id``
    :``title``: The field's ``title`` in the JSON schema.  If the field has no ``title``, defaults to the field's name
      followed by "*".
    :``description``: The field's ``description`` in the JSON schema. URLs are removed (see the ``links`` column).
    :``type``: A comma-separated list of the field's ``type`` in the JSON schema, excluding "null". If the field has no
      ``type``, defaults to "unknown".
    :``range``: The field's allowed number of occurrences.

      * "0..1" if the field defines an optional literal value.
      * "0..n" if the field defines an optional array.
      * "1..1" if the field defines a required literal value.
      * "1..n" if the field defines a required array.
    :``values``: If the field's schema sets:

      * ``format``: the ``format``
      * ``pattern``: the ``pattern``
      * ``enum``: "Enum: " followed by the ``enum`` as a comma-separated list, excluding ``null``
      * ``items/enum``: "Enum: " followed by the ``items/enum`` as a comma-separated list, excluding ``null``
    :``links``: The URLs extracted from the field's ``description``
    :``deprecated``: The OCDS minor version in which the field (or its parent) was deprecated
    :``deprecationNotes``: The explanation for the deprecation of the field
    :``extension``: The name of the extension that introduced the JSON path (see the ``extension_field`` parameter)

    :raises MissingColumnError: if the column by which to order is missing
    """
    rows = []
    rows_by_path = {}
    for field in get_schema_fields(schema):
        if not include_definitions and field.definition_pointer_components:
            continue

        prop = field.schema
        field.sep = '/'

        # If the field uses `$ref`, add an extra row for it. This makes it easier to use as a header for the object.
        # It also preserves the different titles and descriptions of the referrer and referee.
        if hasattr(prop, '__reference__'):
            reference = dict(prop.__reference__)
            prop = dict(prop)
            if extension_field in reference:
                prop[extension_field] = reference[extension_field]
            if 'type' not in reference and 'type' in prop:
                reference['type'] = prop['type']
            _add_row(rows, rows_by_path, field, reference, extension_field, infer_required=infer_required,
                     include_deprecated=include_deprecated)

        _add_row(rows, rows_by_path, field, prop, extension_field, infer_required=infer_required,
                 include_deprecated=include_deprecated)

        # If the field is an array, add an extra row for it. This makes it easier to use as a header for the object.
        if 'items' in prop and 'properties' in prop['items'] and 'title' in prop['items']:
            _add_row(rows, rows_by_path, field, prop['items'], extension_field, row={
                'path': field.path,
                'title': prop['items']['title'],
                'description': prop['items'].get('description', ''),
                'type': prop['items']['type'],
            }, include_deprecated=include_deprecated)

    if order_by:
        try:
            rows.sort(key=lambda row: row[order_by])
        except KeyError as e:
            raise MissingColumnError("the column '{}' doesn't exist – did you make a typo?".format(order_by)) from e

    fieldnames = ['section', 'path', 'title', 'description', 'type', 'range', 'values', 'links', 'deprecated',
                  'deprecationNotes']
    if extension_field:
        fieldnames.append(extension_field)

    writer = csv.DictWriter(io, fieldnames)
    writer.writeheader()
    writer.writerows(rows)
Example #4
0
    def generate_mapping_sheets(self):
        # get schema
        schema = self.get_patched_schema()

        mapping_sheetnames = ('general', 'planning', 'tender', 'awards',
                              'contracts', 'implementation')

        sheetnames = mapping_sheetnames + ('schema', 'schema_extensions')

        # create list for each mapping sheet
        sheets = {x: [] for x in sheetnames}
        sheet_headers = {x: [] for x in mapping_sheetnames}

        extension_rows = {
            x: OrderedDict()
            for x in ('general', 'planning', 'tender', 'awards', 'contracts',
                      'implementation', 'parties')
        }

        # use the mapping sheet to load the schema and schema_extensions tabs
        header = []
        with open(self.mapping_sheet_file) as csvfile:
            readme = csv.reader(csvfile, dialect='excel')
            header = next(readme)

            sheets['schema'].append(header[:-1])

            for row in readme:
                url = row[7]
                url = url.replace('1.1-dev', '1.1.5')
                row[7] = url
                if row[10]:
                    sheets['schema_extensions'].append(row)
                else:
                    sheets['schema'].append(row[:-1])

        # move the extension column to the beginning
        sheets['schema_extensions'] = [
            row[-1:] + row[1:-1] for row in sheets['schema_extensions']
        ]

        # sort the Extension Schemas by extension, stage and path
        sheets['schema_extensions'].sort(key=itemgetter(0, 1))

        # add header
        sheets['schema_extensions'] = [header[-1:] + header[1:-1]
                                       ] + sheets['schema_extensions']

        # create list for fields to repeat on parties sheet
        parties_rows = []

        # create list for organization references to add to parties sheet
        org_refs = []
        org_refs_extensions = OrderedDict()

        # set default depth for row grouping in Google Sheets
        depth = 0

        # regular expression to find links in schema descriptions
        INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)')

        # remove links from top-level schema description
        links = dict(INLINE_LINK_RE.findall(schema['description']))
        for key, link in links.items():
            schema['description'] = schema['description'].replace(
                '[' + key + '](' + link + ')', key)

        # add header rows to each sheet
        headers = [
            'column_headers', depth,
            self.get_string('path_header'),
            self.get_string('title_header'),
            self.get_string('description_header'),
            self.get_string('mapping_header'),
            self.get_string('example_header'),
            self.get_string('notes_header')
        ]

        # add row to mapping sheet for each field in the schema
        for field in get_schema_fields(schema):

            # skip definitions section of schema, and deprecated fields
            if field.definition_pointer_components or field.deprecated:
                continue

            # set separator to use in field paths in output
            field.sep = '/'

            # is this field from an extension?
            try:
                field_extension = self.field_extensions[field.path]
            except:
                field_extension = ''

            # is this field a top-level stage?
            field_is_stage = field.path in ('planning', 'tender', 'awards',
                                            'contracts',
                                            'contracts/implementation')

            # set formatting keys for use in Google Sheets script
            if field_is_stage:
                formatKey = 'title'
            elif field.schema['type'] in ('object', 'array'):
                formatKey = 'span'
            else:
                formatKey = 'field'

            if field_extension:
                formatPrefix = 'extension_'
            elif field.required:
                formatPrefix = 'required_'
            else:
                formatPrefix = ''

            # add organization references to list for use in parties mapping sheet
            is_org_reference = (hasattr(field.schema, '__reference__') and field.schema.__reference__['$ref'] == '#/definitions/' + self.get_string('organization_reference_code')) \
                    or ('items' in field.schema and 'title' in field.schema['items'] and field.schema['items']['title'] == self.get_string('organization_reference_title'))

            if is_org_reference:
                row = [formatPrefix + formatKey, 1, field.path]

                if field_extension:
                    # if the org reference belongs to an extension, save it in a separate dict
                    # with the name of the extension
                    if field_extension not in org_refs_extensions.keys():
                        org_refs_extensions[field_extension] = []
                    org_refs_extensions[field_extension].append(row)
                else:
                    org_refs.append(row)

            try:
                path = field.path[:field.path.index('/')]
            except:
                path = field.path

            if path in ('planning', 'tender', 'awards'):
                sheet = sheets[path]
                sheetname = path
            elif path == 'contracts':
                if 'contracts/implementation' in field.path:
                    sheet = sheets['implementation']
                    sheetname = 'implementation'
                else:
                    sheet = sheets['contracts']
                    sheetname = 'contracts'
            elif path == 'parties':
                sheet = parties_rows
                sheetname = 'parties'
            else:
                sheet = sheets['general']
                sheetname = 'general'

            if formatKey == 'title':
                sheet_headers[sheetname].append([
                    formatKey, depth,
                    '{}: {}'.format(self.get_string('standard_name'),
                                    field.schema['title'])
                ])
                sheet_headers[sheetname].append(
                    ['subtitle', depth, field.schema['description']])
                continue
            else:
                row = [formatPrefix + formatKey, depth, field.path]

            if field_extension:

                if not field_extension in extension_rows[sheetname].keys():
                    extension_rows[sheetname][field_extension] = []
                extension_rows[sheetname][field_extension].append(row)
            else:
                # add row to mapping sheet
                sheet.append(row)

        # add a static header for the General sheet

        sheet_headers['general'].append([
            'title', depth, '{}: {}'.format(self.get_string('standard_name'),
                                            self.get_string('general_title'))
        ])
        sheet_headers['general'].append(
            ['subtitle', depth,
             self.get_string('general_help_text')])

        # add headers for each sheet
        for name in mapping_sheetnames:
            sheets[name] = sheet_headers[name] + [headers] + sheets[name]

        # repeat fields from parties section for each organization reference
        sheets['general'].append([
            'subtitle', depth,
            self.get_string('parties_description')
        ])  # description of the parties section

        for ref in org_refs:
            ref[0] = 'ref_span'
            sheets['general'].append(ref)
            sheets['general'].extend(parties_rows[1:])

        # add organizations from extensions

        extension_parties_rows = [['extension_' + x[0], x[1], x[2]]
                                  for x in parties_rows[1:]]

        for extension_name, orgs in org_refs_extensions.items():
            # insert extension name
            if not extension_name in extension_rows['general'].keys():
                extension_rows['general'][extension_name] = []

            # insert organizations
            for org in orgs:
                extension_rows['general'][extension_name].append(org)
                extension_rows['general'][extension_name].extend(
                    extension_parties_rows)

        for name in mapping_sheetnames:

            if len(extension_rows[name].keys()):
                # add extension section

                # add section title
                sheets[name].append(
                    ['section', 0,
                     self.get_string('extension_section')])

                for extension_name, rows in extension_rows[name].items():
                    text = extension_name + ': ' + self.extensions_info.get_description(
                        extension_name)

                    sheets[name].append(['extension', 0, text])
                    sheets[name].extend(rows)

            # add additional fields section to each sheet
            sheets[name].append(
                ['section', 0,
                 self.get_string('additional_fields_note')])

            for i in range(4):
                sheets[name].append(['additional_field', 0])  # was 1

            # make all rows have the same number of columns
            # (required for CSV parsing script in Google Sheets)
            for row in sheets[name]:
                if len(row) < len(headers):
                    for i in range(len(headers) - len(row)):
                        row.append('')

        return self._save_sheets(sheets)
Example #5
0
def test_example_backticks():
    exceptions = {
        'ocds_pagination_extension': {
            # Example query string parameters.
            'offset',
            'offset=NUMBER',
            'page',
            'page=1',
            'page=NUMBER',
            'since',
            'since=TIMESTAMP',
            # Changelog entries for non-existent or removed fields.
            'links.all',
            'packageMetadata',
        },

        # Substring of pattern property.
        'ocds_exchangeRate_extension': {
            'CODE',
        },

        # Cross-references to other extensions.
        'ocds_contract_signatories_extension': {
            'preferredBidders',
            'publicAuthority',
        },

        # Changelog entries for non-existent or removed fields or codelists.
        'ocds_bid_extension': {
            'BidsStatistic.requirementResponses',
        },
        'ocds_eu_extension': {
            'minimumValue',
            'Lot.minimumValue',
        },
        'ocds_lots_extension': {
            'LotDetails',
            'Bid.relatedLots',
            'Finance.relatedLots',
        },
        'ocds_ppp_extension': {
            'initiationType.csv',
        },
        'ocds_project_extension': {
            'Project.source',
            'Project.project',
        },
        'ocds_qualification_extension': {
            'PreQualification.procurementMethodRationale',
            'PreQualification.awardCriteriaDetails',
        },
        'ocds_shareholders_extension': {
            'Organization.beneficialOwnership',
        },
        'ocds_submissionTerms_extension': {
            'requiresGuarantees',
        },
    }

    # Add JSON null, JSON booleans, and a jsonmerge field from OCDS 1.0.
    literals = {'null', 'true', 'false', 'mergeStrategy'}
    patterns = set()

    # Add JSON Schema properties.
    url = 'https://raw.githubusercontent.com/open-contracting/standard/1.1/schema/meta-schema.json'
    literals.update(http_get(url).json()['properties'])

    # Add codelist columns.
    url = 'https://raw.githubusercontent.com/open-contracting/standard-maintenance-scripts/main/schema/codelist-schema.json'  # noqa: E501
    literals.update(http_get(url).json()['items']['properties'])

    # Add codelist names.
    metadata = read_metadata()
    literals.update(metadata.get('codelists', []))

    # Add JSON paths, field names and definition names.
    for basename in ('release-schema.json', 'release-package-schema.json',
                     'record-package-schema.json'):
        if not os.path.isfile(os.path.join(cwd, basename)):
            continue

        schema = jsonref.JsonRef.replace_refs(patch_schema(basename))
        for field in get_schema_fields(schema):
            if 'patternProperties' in field.pointer_components:
                literal, pattern = re.search(r'^(.*)\(\^?(.+)\$?\)$',
                                             field.path).groups()
                patterns.add(
                    re.compile(r'^' + re.escape(literal) + pattern + r'$'))
            else:
                literals.add(field.path)  # e.g. tender.id
                if len(field.path_components) > 1:
                    literals.add(field.path_components[-1])  # e.g. scale
                if field.definition_path_components:
                    literals.add(field.definition_path)  # e.g. Lot
                    literals.add(
                        f'{field.definition_path}.{field.path}')  # e.g. Lot.id
                if 'codelist' in field.schema:
                    literals.add(field.schema['codelist'])
                    literals.add(f"+{field.schema['codelist']}")
                    literals.add(f"-{field.schema['codelist']}")

    errors = 0

    for text in re.findall(r'`([^`\n]+)`', read_readme(), re.DOTALL):
        if (text not in literals
                and not any(re.search(pattern, text) for pattern in patterns)
                # e.g. `"uniqueItems": true`
                and not text.startswith('"') and text not in exceptions.get(
                    repo_name, [])):
            errors += 1
            warnings.warn(f'README.md: "{text}" term is not in schema')

    assert errors == 0, 'README.md: Backtick terms are invalid. See warnings below.'