def explore_360(request, pk, template='cove_360/explore.html'): schema_360 = Schema360() context, db_data, error = explore_data_context(request, pk) if error: return error upload_dir = db_data.upload_dir() upload_url = db_data.upload_url() file_name = db_data.original_file.file.name file_type = context['file_type'] if file_type == 'json': # open the data first so we can inspect for record package with open(file_name, encoding='utf-8') as fp: try: json_data = json.load(fp, parse_float=Decimal) except ValueError as err: raise CoveInputDataError(context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _(format_html('We think you tried to upload a JSON file, but it is not well formed JSON.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}', err)), 'error': format(err) }) if not isinstance(json_data, dict): raise CoveInputDataError(context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _('360Giving JSON should have an object as the top level, the JSON you supplied does not.'), }) context.update(convert_json(upload_dir, upload_url, file_name, schema_url=schema_360.release_schema_url, request=request, flatten=request.POST.get('flatten'))) else: context.update(convert_spreadsheet(upload_dir, upload_url, file_name, file_type, schema_360.release_schema_url, schema_360.release_pkg_schema_url)) with open(context['converted_path'], encoding='utf-8') as fp: json_data = json.load(fp, parse_float=Decimal) context = common_checks_360(context, upload_dir, json_data, schema_360) if hasattr(json_data, 'get') and hasattr(json_data.get('grants'), '__iter__'): context['grants'] = json_data['grants'] else: context['grants'] = [] context['first_render'] = not db_data.rendered if not db_data.rendered: db_data.rendered = True db_data.save() return render(request, template, context)
def raise_invalid_version_argument(version): raise CoveInputDataError(context={ 'sub_title': _('Unrecognised version of the schema'), 'link': 'index', 'link_text': _('Try Again'), 'msg': _(format_html('We think you tried to run your data against an unrecognised version of ' 'the schema.\n\n<span class="glyphicon glyphicon-exclamation-sign" ' 'aria-hidden="true"></span> <strong>Error message:</strong> <em>{}</em> is ' 'not a recognised choice for the schema version', version)), 'error': _('{} is not a valid schema version'.format(version)) })
def raise_json_deref_error(error): raise CoveInputDataError(context={ 'sub_title': _('JSON reference error'), 'link': 'index', 'link_text': _('Try Again'), 'msg': _(format_html('We have detected a JSON reference error in the schema. This <em> may be ' '</em> due to some extension trying to resolve non-existing references. ' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> <em>{}</em>', error)), 'error': _('{}'.format(error)) })
def raise_invalid_version_data_with_patch(version): raise CoveInputDataError(context={ 'sub_title': _('Version format does not comply with the schema'), 'link': 'index', 'link_text': _('Try Again'), 'msg': _(format_html('The value for the <em>"version"</em> field in your data follows the ' '<em>major.minor.patch</em> pattern but according to the schema the patch digit ' 'shouldn\'t be included (e.g. <em>"1.1.0"</em> should appear as <em>"1.1"</em> in ' 'your data as the validator always uses the latest patch release for a major.minor ' 'version).\n\nPlease get rid of the patch digit and try again.\n\n<span class="glyphicon ' 'glyphicon-exclamation-sign" aria-hidden="true"></span> <strong>Error message: ' '</strong> <em>{}</em> format does not comply with the schema', version)), 'error': _('{} is not a valid schema version'.format(version)) })
def common_checks_context_iati(db_data, data_file, file_type): schema_aiti = SchemaIATI() lxml_errors = {} cell_source_map = {} validation_errors_path = os.path.join(db_data.upload_dir(), 'validation_errors-2.json') with open(data_file) as fp, open(schema_aiti.activity_schema) as schema_fp: try: tree = etree.parse(fp) except lxml.etree.XMLSyntaxError as err: raise CoveInputDataError(context={ 'sub_title': _("Sorry we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _('We think you tried to upload a XML file, but it is not well formed XML.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}'.format(err)), 'error': format(err) }) schema_tree = etree.parse(schema_fp) schema = lxml.etree.XMLSchema(schema_tree) schema.validate(tree) lxml_errors = lxml_errors_generator(schema.error_log) errors_all = format_lxml_errors(lxml_errors) if file_type != 'xml': with open(os.path.join(db_data.upload_dir(), 'cell_source_map.json')) as cell_source_map_fp: cell_source_map = json.load(cell_source_map_fp) if os.path.exists(validation_errors_path): with open(validation_errors_path) as validation_error_fp: validation_errors = json.load(validation_error_fp) else: validation_errors = get_xml_validation_errors(errors_all, file_type, cell_source_map) with open(validation_errors_path, 'w+') as validation_error_fp: validation_error_fp.write(json.dumps(validation_errors)) db_data.rendered = True return { 'validation_errors': sorted(validation_errors.items()), 'validation_errors_count': sum(len(value) for value in validation_errors.values()), 'cell_source_map': cell_source_map, 'first_render': False }
def raise_missing_package_error(): raise CoveInputDataError(context={ 'sub_title': _('Missing OCDS package'), 'link': 'index', 'link_text': _('Try Again'), 'msg': _(mark_safe('We could not detect a package structure at the top-level of your data. ' 'OCDS releases and records should be published within a <a href="http://' 'standard.open-contracting.org/latest/en/schema/release_package/">release ' 'package </a> or <a href="http://standard.open-contracting.org/latest/en' '/schema/record_package/"> record package</a> to provide important meta-' 'data. For more information, please refer to the <a href="http://standard.' 'open-contracting.org/latest/en/getting_started/releases_and_records/"> ' 'Releases and Records section </a> in the OCDS documentation.\n\n<span ' 'class="glyphicon glyphicon-exclamation-sign" aria-hidden="true"></span> ' '<strong>Error message:</strong> <em>Missing OCDS package</em>')), 'error': _('Missing OCDS package') })
def explore_360(request, pk, template='cove_360/explore.html'): schema_360 = Schema360() context, db_data, error = explore_data_context(request, pk) if error: return error file_type = context['file_type'] if file_type == 'json': # open the data first so we can inspect for record package with open(db_data.original_file.file.name, encoding='utf-8') as fp: try: json_data = json.load(fp) except ValueError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _('We think you tried to upload a JSON file, but it is not well formed JSON.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}'.format( err)), 'error': format(err) }) context.update( convert_json(request, db_data, schema_360.release_schema_url)) else: context.update( convert_spreadsheet(request, db_data, file_type, schema_360.release_schema_url)) with open(context['converted_path'], encoding='utf-8') as fp: json_data = json.load(fp) context = common_checks_360(context, db_data, json_data, schema_360) return render(request, template, context)
def convert_json(upload_dir, upload_url, file_name, schema_url=None, replace=False, request=None, flatten=False, cache=True, xml=False): context = {} converted_path = os.path.join(upload_dir, 'flattened') flatten_kwargs = dict(output_name=converted_path, main_sheet_name=config['root_list_path'], root_list_path=config['root_list_path'], root_id=config['root_id'], schema=schema_url) if xml: flatten_kwargs['xml'] = True flatten_kwargs['id_name'] = config.get('id_name', 'id') try: conversion_warning_cache_path = os.path.join( upload_dir, 'conversion_warning_messages.json') conversion_exists = os.path.exists(converted_path + '.xlsx') if not conversion_exists or replace: with warnings.catch_warnings(record=True) as conversion_warnings: if flatten or (replace and conversion_exists): flattentool.flatten(file_name, **flatten_kwargs) else: return {'conversion': 'flattenable'} context[ 'conversion_warning_messages'] = filter_conversion_warnings( conversion_warnings) if cache: with open(conversion_warning_cache_path, 'w+') as fp: json.dump(context['conversion_warning_messages'], fp) elif os.path.exists(conversion_warning_cache_path): with open(conversion_warning_cache_path) as fp: context['conversion_warning_messages'] = json.load(fp) context['converted_file_size'] = os.path.getsize(converted_path + '.xlsx') conversion_warning_cache_path_titles = os.path.join( upload_dir, 'conversion_warning_messages_titles.json') if config['convert_titles']: with warnings.catch_warnings( record=True) as conversion_warnings_titles: flatten_kwargs.update( dict(output_name=converted_path + '-titles', use_titles=True)) if not os.path.exists(converted_path + '-titles.xlsx') or replace: flattentool.flatten(file_name, **flatten_kwargs) context[ 'conversion_warning_messages_titles'] = filter_conversion_warnings( conversion_warnings_titles) with open(conversion_warning_cache_path_titles, 'w+') as fp: json.dump( context['conversion_warning_messages_titles'], fp) elif os.path.exists(conversion_warning_cache_path_titles): with open(conversion_warning_cache_path_titles) as fp: context[ 'conversion_warning_messages_titles'] = json.load( fp) context['converted_file_size_titles'] = os.path.getsize( converted_path + '-titles.xlsx') except BadlyFormedJSONError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _('We think you tried to upload a JSON file, but it is not well formed JSON.\n\nError message: {}' .format(err)) }) except Exception as err: logger.exception(err, extra={ 'request': request, }) return {'conversion': 'flatten', 'conversion_error': repr(err)} context.update({ 'conversion': 'flatten', 'converted_path': converted_path, 'converted_url': '{}{}flattened'.format(upload_url, '' if upload_url.endswith('/') else '/') }) return context
def explore_ocds(request, pk): context, db_data, error = explore_data_context(request, pk) if error: return error upload_dir = db_data.upload_dir() upload_url = db_data.upload_url() file_name = db_data.original_file.file.name file_type = context['file_type'] post_version_choice = request.POST.get('version') replace = False validation_errors_path = os.path.join(upload_dir, 'validation_errors-3.json') if file_type == 'json': # open the data first so we can inspect for record package with open(file_name, encoding='utf-8') as fp: try: json_data = json.load(fp, parse_float=Decimal) except ValueError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _('We think you tried to upload a JSON file, but it is not well formed JSON.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}'.format( err)), 'error': format(err) }) if not isinstance(json_data, dict): raise CoveInputDataError( context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _('OCDS JSON should have an object as the top level, the JSON you supplied does not.' ), }) version_in_data = json_data.get('version', '') db_data.data_schema_version = version_in_data select_version = post_version_choice or db_data.schema_version schema_ocds = SchemaOCDS(select_version=select_version, release_data=json_data) if schema_ocds.missing_package: exceptions.raise_missing_package_error() if schema_ocds.invalid_version_argument: # This shouldn't happen unless the user sends random POST data. exceptions.raise_invalid_version_argument(post_version_choice) if schema_ocds.invalid_version_data: if isinstance(version_in_data, str) and re.compile( '^\d+\.\d+\.\d+$').match(version_in_data): exceptions.raise_invalid_version_data_with_patch( version_in_data) else: if not isinstance(version_in_data, str): version_in_data = '{} (it must be a string)'.format( str(version_in_data)) context['unrecognized_version_data'] = version_in_data if schema_ocds.version != db_data.schema_version: replace = True if schema_ocds.extensions: schema_ocds.create_extended_release_schema_file( upload_dir, upload_url) url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url if 'records' in json_data: context['conversion'] = None else: # Replace the spreadsheet conversion only if it exists already. converted_path = os.path.join(upload_dir, 'flattened') replace_converted = replace and os.path.exists(converted_path + '.xlsx') context.update( convert_json(upload_dir, upload_url, file_name, schema_url=url, replace=replace_converted, request=request, flatten=request.POST.get('flatten'))) else: # Use the lowest release pkg schema version accepting 'version' field metatab_schema_url = SchemaOCDS( select_version='1.1').release_pkg_schema_url metatab_data = get_spreadsheet_meta_data(upload_dir, file_name, metatab_schema_url, file_type) if 'version' not in metatab_data: metatab_data['version'] = '1.0' else: db_data.data_schema_version = metatab_data['version'] select_version = post_version_choice or db_data.schema_version schema_ocds = SchemaOCDS(select_version=select_version, release_data=metatab_data) # Unlike for JSON data case above, do not check for missing data package if schema_ocds.invalid_version_argument: # This shouldn't happen unless the user sends random POST data. exceptions.raise_invalid_version_argument(post_version_choice) if schema_ocds.invalid_version_data: version_in_data = metatab_data.get('version') if re.compile('^\d+\.\d+\.\d+$').match(version_in_data): exceptions.raise_invalid_version_data_with_patch( version_in_data) else: context['unrecognized_version_data'] = version_in_data # Replace json conversion when user chooses a different schema version. if db_data.schema_version and schema_ocds.version != db_data.schema_version: replace = True if schema_ocds.extensions: schema_ocds.create_extended_release_schema_file( upload_dir, upload_url) url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url pkg_url = schema_ocds.release_pkg_schema_url context.update( convert_spreadsheet(upload_dir, upload_url, file_name, file_type, schema_url=url, pkg_schema_url=pkg_url, replace=replace)) with open(context['converted_path'], encoding='utf-8') as fp: json_data = json.load(fp, parse_float=Decimal) if replace: if os.path.exists(validation_errors_path): os.remove(validation_errors_path) context = common_checks_ocds(context, upload_dir, json_data, schema_ocds) if schema_ocds.json_deref_error: exceptions.raise_json_deref_error(schema_ocds.json_deref_error) context.update({ 'data_schema_version': db_data.data_schema_version, 'first_render': not db_data.rendered }) schema_version = getattr(schema_ocds, 'version', None) if schema_version: db_data.schema_version = schema_version if not db_data.rendered: db_data.rendered = True db_data.save() if 'records' in json_data: template = 'cove_ocds/explore_record.html' if hasattr(json_data, 'get') and hasattr(json_data.get('records'), '__iter__'): context['records'] = json_data['records'] else: context['records'] = [] else: template = 'cove_ocds/explore_release.html' if hasattr(json_data, 'get') and hasattr(json_data.get('releases'), '__iter__'): context['releases'] = json_data['releases'] else: context['releases'] = [] return render(request, template, context)
def common_checks_context_iati(context, upload_dir, data_file, file_type, api=False, openag=False, orgids=False): '''TODO: this function is trying to do too many things. Separate some of its logic into smaller functions doing one single thing each. ''' schema_iati = SchemaIATI() cell_source_map = {} validation_errors_path = os.path.join(upload_dir, 'validation_errors-3.json') with open(data_file, 'rb') as fp: try: tree = etree.parse(fp) except lxml.etree.XMLSyntaxError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _( format_html( 'We think you tried to upload a XML file, but it is not well formed XML.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}', err)), 'error': format(err) }) except UnicodeDecodeError as err: raise CoveInputDataError( context={ 'sub_title': _("Sorry, we can't process that data"), 'link': 'index', 'link_text': _('Try Again'), 'msg': _( format_html( 'We think you tried to upload a XML file, but the encoding is incorrect.' '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">' '</span> <strong>Error message:</strong> {}', err)), 'error': format(err) }) if tree.getroot().tag == 'iati-organisations': schema_path = schema_iati.organisation_schema schema_name = 'Organisation' # rulesets don't support orgnisation files properly yet # so disable rather than give partial information ruleset_disabled = True else: schema_path = schema_iati.activity_schema schema_name = 'Activity' ruleset_disabled = False errors_all, invalid_data = validate_against_schema(schema_path, tree) return_on_error = [{ 'message': 'There was a problem running ruleset checks', 'exception': True }] # Validation errors if file_type != 'xml': with open(os.path.join(upload_dir, 'cell_source_map.json')) as cell_source_map_fp: cell_source_map = json.load(cell_source_map_fp) if os.path.exists(validation_errors_path): with open(validation_errors_path) as validation_error_fp: validation_errors = json.load(validation_error_fp) else: validation_errors = get_xml_validation_errors(errors_all, file_type, cell_source_map) if not api: with open(validation_errors_path, 'w+') as validation_error_fp: validation_error_fp.write(json.dumps(validation_errors)) # Ruleset errors if ruleset_disabled: ruleset_errors = None else: ruleset_errors = get_iati_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset'), api=api, ignore_errors=invalid_data, return_on_error=return_on_error) if openag: ruleset_errors_ag = get_openag_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset_openang'), ignore_errors=invalid_data, return_on_error=return_on_error) context.update({'ruleset_errors_openag': ruleset_errors_ag}) if orgids: ruleset_errors_orgids = get_orgids_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset_orgids'), ignore_errors=invalid_data, return_on_error=return_on_error) context.update({'ruleset_errors_orgids': ruleset_errors_orgids}) context.update({ 'validation_errors': sorted(validation_errors.items()), 'ruleset_errors': ruleset_errors }) if not api: context.update({ 'validation_errors_count': sum(len(value) for value in validation_errors.values()), 'cell_source_map': cell_source_map, 'first_render': False, 'schema_name': schema_name, 'ruleset_disabled': ruleset_disabled }) if ruleset_errors: ruleset_errors_by_activity = get_iati_ruleset_errors( tree, os.path.join(upload_dir, 'ruleset'), group_by='activity', ignore_errors=invalid_data, return_on_error=return_on_error) context['ruleset_errors'] = [ ruleset_errors, ruleset_errors_by_activity ] count_ruleset_errors = 0 if isinstance(ruleset_errors, dict): for rules in ruleset_errors.values(): for errors in rules.values(): count_ruleset_errors += len(errors) context['ruleset_errors_count'] = count_ruleset_errors return context