def get_removed_fields(extension_version, lang): """ Returns a dictionary of deprecation status and field tables. Each table is a list of fields. Each field is a dictionary with "definition_path", "path" and "url" (if available) keys. All values are translated. """ tables = defaultdict(list) schema = _patch_schema(extension_version, 'en', include_test_dependencies=True) sources = _get_sources(schema, lang) for field in get_schema_fields(extension_version['schemas']['release-schema.json'][lang]): # If the field isn't removed. if field.schema is not None: continue original_field = _add_link_to_original_field(field, schema, sources) if original_field.get('deprecated'): group = 'deprecated' else: group = 'active' d = field.asdict(exclude=('definition_pointer', 'pointer', 'schema', 'required', 'deprecated', 'multilingual')) tables[group].append(d) return tables
def get_schema_tables(extension_version, lang): """ Returns a dictionary of definition names and field tables. Each table is a list of fields. Each field is a dictionary with "definition_path", "path", "schema", "multilingual", "title", "description", "types" and "source" (if available) keys. All values are translated. The "description" (rendered from Markdown) and "types" values may contain HTML. The "description" includes any deprecation information. """ tables = {} if not extension_version['schemas']['release-schema.json']: return tables schema = _patch_schema(extension_version, 'en', include_test_dependencies=True) sources = _get_sources(schema, lang) for field in get_schema_fields(extension_version['schemas']['release-schema.json'][lang]): # If the field is removed. if field.schema is None: continue key = field.definition_path if not key: key = 'Release' if key not in tables: tables[key] = {'fields': []} if field.definition_path in sources: tables[key]['source'] = sources[field.definition_path] try: _add_link_to_original_field(field, schema, sources) except jsonpointer.JsonPointerException: pass d = field.asdict(sep='.', exclude=('definition_pointer', 'pointer', 'required', 'deprecated')) d['title'] = field.schema.get('title', '') d['description'] = markdown(field.schema.get('description', '')) d['types'] = gettext(' or ').join(_get_types(field.schema, sources, extension_version, lang)) tables[key]['fields'].append(d) return tables
def mapping_sheet(schema, io, order_by=None, infer_required=False, extension_field=None, include_deprecated=True, include_definitions=False): """ Writes information about all field paths in a JSON Schema to a CSV file. :param dict schema: a JSON schema :param io: a file-like object to which to write the rows :param str order_by: the column by which to sort the rows :param bool infer_required: whether to infer that a field is required if "null" is not in its ``type`` :param str extension_field: the property in the JSON schema containing the name of the extension in which each field was defined :param bool include_deprecated: whether to include any deprecated fields :param bool include_definitions: whether to traverse the "definitions" property The CSV's columns are: :``section``: The first part of the JSON path to the field in the data, e.g. ``tender`` :``path``: The JSON path to the field in the data, e.g. ``tender/id`` :``title``: The field's ``title`` in the JSON schema. If the field has no ``title``, defaults to the field's name followed by "*". :``description``: The field's ``description`` in the JSON schema. URLs are removed (see the ``links`` column). :``type``: A comma-separated list of the field's ``type`` in the JSON schema, excluding "null". If the field has no ``type``, defaults to "unknown". :``range``: The field's allowed number of occurrences. * "0..1" if the field defines an optional literal value. * "0..n" if the field defines an optional array. * "1..1" if the field defines a required literal value. * "1..n" if the field defines a required array. :``values``: If the field's schema sets: * ``format``: the ``format`` * ``pattern``: the ``pattern`` * ``enum``: "Enum: " followed by the ``enum`` as a comma-separated list, excluding ``null`` * ``items/enum``: "Enum: " followed by the ``items/enum`` as a comma-separated list, excluding ``null`` :``links``: The URLs extracted from the field's ``description`` :``deprecated``: The OCDS minor version in which the field (or its parent) was deprecated :``deprecationNotes``: The explanation for the deprecation of the field :``extension``: The name of the extension that introduced the JSON path (see the ``extension_field`` parameter) :raises MissingColumnError: if the column by which to order is missing """ rows = [] rows_by_path = {} for field in get_schema_fields(schema): if not include_definitions and field.definition_pointer_components: continue prop = field.schema field.sep = '/' # If the field uses `$ref`, add an extra row for it. This makes it easier to use as a header for the object. # It also preserves the different titles and descriptions of the referrer and referee. if hasattr(prop, '__reference__'): reference = dict(prop.__reference__) prop = dict(prop) if extension_field in reference: prop[extension_field] = reference[extension_field] if 'type' not in reference and 'type' in prop: reference['type'] = prop['type'] _add_row(rows, rows_by_path, field, reference, extension_field, infer_required=infer_required, include_deprecated=include_deprecated) _add_row(rows, rows_by_path, field, prop, extension_field, infer_required=infer_required, include_deprecated=include_deprecated) # If the field is an array, add an extra row for it. This makes it easier to use as a header for the object. if 'items' in prop and 'properties' in prop['items'] and 'title' in prop['items']: _add_row(rows, rows_by_path, field, prop['items'], extension_field, row={ 'path': field.path, 'title': prop['items']['title'], 'description': prop['items'].get('description', ''), 'type': prop['items']['type'], }, include_deprecated=include_deprecated) if order_by: try: rows.sort(key=lambda row: row[order_by]) except KeyError as e: raise MissingColumnError("the column '{}' doesn't exist – did you make a typo?".format(order_by)) from e fieldnames = ['section', 'path', 'title', 'description', 'type', 'range', 'values', 'links', 'deprecated', 'deprecationNotes'] if extension_field: fieldnames.append(extension_field) writer = csv.DictWriter(io, fieldnames) writer.writeheader() writer.writerows(rows)
def generate_mapping_sheets(self): # get schema schema = self.get_patched_schema() mapping_sheetnames = ('general', 'planning', 'tender', 'awards', 'contracts', 'implementation') sheetnames = mapping_sheetnames + ('schema', 'schema_extensions') # create list for each mapping sheet sheets = {x: [] for x in sheetnames} sheet_headers = {x: [] for x in mapping_sheetnames} extension_rows = { x: OrderedDict() for x in ('general', 'planning', 'tender', 'awards', 'contracts', 'implementation', 'parties') } # use the mapping sheet to load the schema and schema_extensions tabs header = [] with open(self.mapping_sheet_file) as csvfile: readme = csv.reader(csvfile, dialect='excel') header = next(readme) sheets['schema'].append(header[:-1]) for row in readme: url = row[7] url = url.replace('1.1-dev', '1.1.5') row[7] = url if row[10]: sheets['schema_extensions'].append(row) else: sheets['schema'].append(row[:-1]) # move the extension column to the beginning sheets['schema_extensions'] = [ row[-1:] + row[1:-1] for row in sheets['schema_extensions'] ] # sort the Extension Schemas by extension, stage and path sheets['schema_extensions'].sort(key=itemgetter(0, 1)) # add header sheets['schema_extensions'] = [header[-1:] + header[1:-1] ] + sheets['schema_extensions'] # create list for fields to repeat on parties sheet parties_rows = [] # create list for organization references to add to parties sheet org_refs = [] org_refs_extensions = OrderedDict() # set default depth for row grouping in Google Sheets depth = 0 # regular expression to find links in schema descriptions INLINE_LINK_RE = re.compile(r'\[([^\]]+)\]\(([^)]+)\)') # remove links from top-level schema description links = dict(INLINE_LINK_RE.findall(schema['description'])) for key, link in links.items(): schema['description'] = schema['description'].replace( '[' + key + '](' + link + ')', key) # add header rows to each sheet headers = [ 'column_headers', depth, self.get_string('path_header'), self.get_string('title_header'), self.get_string('description_header'), self.get_string('mapping_header'), self.get_string('example_header'), self.get_string('notes_header') ] # add row to mapping sheet for each field in the schema for field in get_schema_fields(schema): # skip definitions section of schema, and deprecated fields if field.definition_pointer_components or field.deprecated: continue # set separator to use in field paths in output field.sep = '/' # is this field from an extension? try: field_extension = self.field_extensions[field.path] except: field_extension = '' # is this field a top-level stage? field_is_stage = field.path in ('planning', 'tender', 'awards', 'contracts', 'contracts/implementation') # set formatting keys for use in Google Sheets script if field_is_stage: formatKey = 'title' elif field.schema['type'] in ('object', 'array'): formatKey = 'span' else: formatKey = 'field' if field_extension: formatPrefix = 'extension_' elif field.required: formatPrefix = 'required_' else: formatPrefix = '' # add organization references to list for use in parties mapping sheet is_org_reference = (hasattr(field.schema, '__reference__') and field.schema.__reference__['$ref'] == '#/definitions/' + self.get_string('organization_reference_code')) \ or ('items' in field.schema and 'title' in field.schema['items'] and field.schema['items']['title'] == self.get_string('organization_reference_title')) if is_org_reference: row = [formatPrefix + formatKey, 1, field.path] if field_extension: # if the org reference belongs to an extension, save it in a separate dict # with the name of the extension if field_extension not in org_refs_extensions.keys(): org_refs_extensions[field_extension] = [] org_refs_extensions[field_extension].append(row) else: org_refs.append(row) try: path = field.path[:field.path.index('/')] except: path = field.path if path in ('planning', 'tender', 'awards'): sheet = sheets[path] sheetname = path elif path == 'contracts': if 'contracts/implementation' in field.path: sheet = sheets['implementation'] sheetname = 'implementation' else: sheet = sheets['contracts'] sheetname = 'contracts' elif path == 'parties': sheet = parties_rows sheetname = 'parties' else: sheet = sheets['general'] sheetname = 'general' if formatKey == 'title': sheet_headers[sheetname].append([ formatKey, depth, '{}: {}'.format(self.get_string('standard_name'), field.schema['title']) ]) sheet_headers[sheetname].append( ['subtitle', depth, field.schema['description']]) continue else: row = [formatPrefix + formatKey, depth, field.path] if field_extension: if not field_extension in extension_rows[sheetname].keys(): extension_rows[sheetname][field_extension] = [] extension_rows[sheetname][field_extension].append(row) else: # add row to mapping sheet sheet.append(row) # add a static header for the General sheet sheet_headers['general'].append([ 'title', depth, '{}: {}'.format(self.get_string('standard_name'), self.get_string('general_title')) ]) sheet_headers['general'].append( ['subtitle', depth, self.get_string('general_help_text')]) # add headers for each sheet for name in mapping_sheetnames: sheets[name] = sheet_headers[name] + [headers] + sheets[name] # repeat fields from parties section for each organization reference sheets['general'].append([ 'subtitle', depth, self.get_string('parties_description') ]) # description of the parties section for ref in org_refs: ref[0] = 'ref_span' sheets['general'].append(ref) sheets['general'].extend(parties_rows[1:]) # add organizations from extensions extension_parties_rows = [['extension_' + x[0], x[1], x[2]] for x in parties_rows[1:]] for extension_name, orgs in org_refs_extensions.items(): # insert extension name if not extension_name in extension_rows['general'].keys(): extension_rows['general'][extension_name] = [] # insert organizations for org in orgs: extension_rows['general'][extension_name].append(org) extension_rows['general'][extension_name].extend( extension_parties_rows) for name in mapping_sheetnames: if len(extension_rows[name].keys()): # add extension section # add section title sheets[name].append( ['section', 0, self.get_string('extension_section')]) for extension_name, rows in extension_rows[name].items(): text = extension_name + ': ' + self.extensions_info.get_description( extension_name) sheets[name].append(['extension', 0, text]) sheets[name].extend(rows) # add additional fields section to each sheet sheets[name].append( ['section', 0, self.get_string('additional_fields_note')]) for i in range(4): sheets[name].append(['additional_field', 0]) # was 1 # make all rows have the same number of columns # (required for CSV parsing script in Google Sheets) for row in sheets[name]: if len(row) < len(headers): for i in range(len(headers) - len(row)): row.append('') return self._save_sheets(sheets)
def test_example_backticks(): exceptions = { 'ocds_pagination_extension': { # Example query string parameters. 'offset', 'offset=NUMBER', 'page', 'page=1', 'page=NUMBER', 'since', 'since=TIMESTAMP', # Changelog entries for non-existent or removed fields. 'links.all', 'packageMetadata', }, # Substring of pattern property. 'ocds_exchangeRate_extension': { 'CODE', }, # Cross-references to other extensions. 'ocds_contract_signatories_extension': { 'preferredBidders', 'publicAuthority', }, # Changelog entries for non-existent or removed fields or codelists. 'ocds_bid_extension': { 'BidsStatistic.requirementResponses', }, 'ocds_eu_extension': { 'minimumValue', 'Lot.minimumValue', }, 'ocds_lots_extension': { 'LotDetails', 'Bid.relatedLots', 'Finance.relatedLots', }, 'ocds_ppp_extension': { 'initiationType.csv', }, 'ocds_project_extension': { 'Project.source', 'Project.project', }, 'ocds_qualification_extension': { 'PreQualification.procurementMethodRationale', 'PreQualification.awardCriteriaDetails', }, 'ocds_shareholders_extension': { 'Organization.beneficialOwnership', }, 'ocds_submissionTerms_extension': { 'requiresGuarantees', }, } # Add JSON null, JSON booleans, and a jsonmerge field from OCDS 1.0. literals = {'null', 'true', 'false', 'mergeStrategy'} patterns = set() # Add JSON Schema properties. url = 'https://raw.githubusercontent.com/open-contracting/standard/1.1/schema/meta-schema.json' literals.update(http_get(url).json()['properties']) # Add codelist columns. url = 'https://raw.githubusercontent.com/open-contracting/standard-maintenance-scripts/main/schema/codelist-schema.json' # noqa: E501 literals.update(http_get(url).json()['items']['properties']) # Add codelist names. metadata = read_metadata() literals.update(metadata.get('codelists', [])) # Add JSON paths, field names and definition names. for basename in ('release-schema.json', 'release-package-schema.json', 'record-package-schema.json'): if not os.path.isfile(os.path.join(cwd, basename)): continue schema = jsonref.JsonRef.replace_refs(patch_schema(basename)) for field in get_schema_fields(schema): if 'patternProperties' in field.pointer_components: literal, pattern = re.search(r'^(.*)\(\^?(.+)\$?\)$', field.path).groups() patterns.add( re.compile(r'^' + re.escape(literal) + pattern + r'$')) else: literals.add(field.path) # e.g. tender.id if len(field.path_components) > 1: literals.add(field.path_components[-1]) # e.g. scale if field.definition_path_components: literals.add(field.definition_path) # e.g. Lot literals.add( f'{field.definition_path}.{field.path}') # e.g. Lot.id if 'codelist' in field.schema: literals.add(field.schema['codelist']) literals.add(f"+{field.schema['codelist']}") literals.add(f"-{field.schema['codelist']}") errors = 0 for text in re.findall(r'`([^`\n]+)`', read_readme(), re.DOTALL): if (text not in literals and not any(re.search(pattern, text) for pattern in patterns) # e.g. `"uniqueItems": true` and not text.startswith('"') and text not in exceptions.get( repo_name, [])): errors += 1 warnings.warn(f'README.md: "{text}" term is not in schema') assert errors == 0, 'README.md: Backtick terms are invalid. See warnings below.'