예제 #1
0
def explore_360(request, pk, template='cove_360/explore.html'):
    schema_360 = Schema360()
    context, db_data, error = explore_data_context(request, pk)
    if error:
        return error

    upload_dir = db_data.upload_dir()
    upload_url = db_data.upload_url()
    file_name = db_data.original_file.file.name
    file_type = context['file_type']

    if file_type == 'json':
        # open the data first so we can inspect for record package
        with open(file_name, encoding='utf-8') as fp:
            try:
                json_data = json.load(fp, parse_float=Decimal)
            except ValueError as err:
                raise CoveInputDataError(context={
                    'sub_title': _("Sorry, we can't process that data"),
                    'link': 'index',
                    'link_text': _('Try Again'),
                    'msg': _(format_html('We think you tried to upload a JSON file, but it is not well formed JSON.'
                             '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                             '</span> <strong>Error message:</strong> {}', err)),
                    'error': format(err)
                })
            if not isinstance(json_data, dict):
                raise CoveInputDataError(context={
                    'sub_title': _("Sorry, we can't process that data"),
                    'link': 'index',
                    'link_text': _('Try Again'),
                    'msg': _('360Giving JSON should have an object as the top level, the JSON you supplied does not.'),
                })

            context.update(convert_json(upload_dir, upload_url, file_name, schema_url=schema_360.release_schema_url,
                                        request=request, flatten=request.POST.get('flatten')))

    else:
        context.update(convert_spreadsheet(upload_dir, upload_url, file_name, file_type, schema_360.release_schema_url, schema_360.release_pkg_schema_url))
        with open(context['converted_path'], encoding='utf-8') as fp:
            json_data = json.load(fp, parse_float=Decimal)

    context = common_checks_360(context, upload_dir, json_data, schema_360)

    if hasattr(json_data, 'get') and hasattr(json_data.get('grants'), '__iter__'):
        context['grants'] = json_data['grants']
    else:
        context['grants'] = []

    context['first_render'] = not db_data.rendered
    if not db_data.rendered:
        db_data.rendered = True
    db_data.save()

    return render(request, template, context)
예제 #2
0
def raise_invalid_version_argument(version):
    raise CoveInputDataError(context={
        'sub_title': _('Unrecognised version of the schema'),
        'link': 'index',
        'link_text': _('Try Again'),
        'msg': _(format_html('We think you tried to run your data against an unrecognised version of '
                 'the schema.\n\n<span class="glyphicon glyphicon-exclamation-sign" '
                 'aria-hidden="true"></span> <strong>Error message:</strong> <em>{}</em> is '
                 'not a recognised choice for the schema version', version)),
        'error': _('{} is not a valid schema version'.format(version))
    })
예제 #3
0
def raise_json_deref_error(error):
    raise CoveInputDataError(context={
        'sub_title': _('JSON reference error'),
        'link': 'index',
        'link_text': _('Try Again'),
        'msg': _(format_html('We have detected a JSON reference error in the schema. This <em> may be '
                 '</em> due to some extension trying to resolve non-existing references. '
                 '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                 '</span> <strong>Error message:</strong> <em>{}</em>', error)),
        'error': _('{}'.format(error))
    })
예제 #4
0
def raise_invalid_version_data_with_patch(version):
    raise CoveInputDataError(context={
        'sub_title': _('Version format does not comply with the schema'),
        'link': 'index',
        'link_text': _('Try Again'),
        'msg': _(format_html('The value for the <em>"version"</em> field in your data follows the '
                 '<em>major.minor.patch</em> pattern but according to the schema the patch digit '
                 'shouldn\'t be included (e.g. <em>"1.1.0"</em> should appear as <em>"1.1"</em> in '
                 'your data as the validator always uses the latest patch release for a major.minor '
                 'version).\n\nPlease get rid of the patch digit and try again.\n\n<span class="glyphicon '
                 'glyphicon-exclamation-sign" aria-hidden="true"></span> <strong>Error message: '
                 '</strong> <em>{}</em> format does not comply with the schema', version)),
        'error': _('{} is not a valid schema version'.format(version))
    })
예제 #5
0
def common_checks_context_iati(db_data, data_file, file_type):
    schema_aiti = SchemaIATI()
    lxml_errors = {}
    cell_source_map = {}
    validation_errors_path = os.path.join(db_data.upload_dir(), 'validation_errors-2.json')

    with open(data_file) as fp, open(schema_aiti.activity_schema) as schema_fp:
        try:
            tree = etree.parse(fp)
        except lxml.etree.XMLSyntaxError as err:
            raise CoveInputDataError(context={
                'sub_title': _("Sorry we can't process that data"),
                'link': 'index',
                'link_text': _('Try Again'),
                'msg': _('We think you tried to upload a XML file, but it is not well formed XML.'
                         '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                         '</span> <strong>Error message:</strong> {}'.format(err)),
                'error': format(err)
            })
        schema_tree = etree.parse(schema_fp)
        schema = lxml.etree.XMLSchema(schema_tree)
        schema.validate(tree)
        lxml_errors = lxml_errors_generator(schema.error_log)

    errors_all = format_lxml_errors(lxml_errors)

    if file_type != 'xml':
        with open(os.path.join(db_data.upload_dir(), 'cell_source_map.json')) as cell_source_map_fp:
            cell_source_map = json.load(cell_source_map_fp)

    if os.path.exists(validation_errors_path):
        with open(validation_errors_path) as validation_error_fp:
            validation_errors = json.load(validation_error_fp)
    else:
        validation_errors = get_xml_validation_errors(errors_all, file_type, cell_source_map)

        with open(validation_errors_path, 'w+') as validation_error_fp:
            validation_error_fp.write(json.dumps(validation_errors))

    db_data.rendered = True

    return {
        'validation_errors': sorted(validation_errors.items()),
        'validation_errors_count': sum(len(value) for value in validation_errors.values()),
        'cell_source_map': cell_source_map,
        'first_render': False
    }
예제 #6
0
def raise_missing_package_error():
    raise CoveInputDataError(context={
        'sub_title': _('Missing OCDS package'),
        'link': 'index',
        'link_text': _('Try Again'),
        'msg': _(mark_safe('We could not detect a package structure at the top-level of your data. '
                 'OCDS releases and records should be published within a <a href="http://'
                 'standard.open-contracting.org/latest/en/schema/release_package/">release '
                 'package </a> or <a href="http://standard.open-contracting.org/latest/en'
                 '/schema/record_package/"> record package</a> to provide important meta-'
                 'data. For more information, please refer to the <a href="http://standard.'
                 'open-contracting.org/latest/en/getting_started/releases_and_records/"> '
                 'Releases and Records section </a> in the OCDS documentation.\n\n<span '
                 'class="glyphicon glyphicon-exclamation-sign" aria-hidden="true"></span> '
                 '<strong>Error message:</strong> <em>Missing OCDS package</em>')),
        'error': _('Missing OCDS package')
    })
예제 #7
0
파일: views.py 프로젝트: tobybatch/cove
def explore_360(request, pk, template='cove_360/explore.html'):
    schema_360 = Schema360()
    context, db_data, error = explore_data_context(request, pk)
    if error:
        return error
    file_type = context['file_type']

    if file_type == 'json':
        # open the data first so we can inspect for record package
        with open(db_data.original_file.file.name, encoding='utf-8') as fp:
            try:
                json_data = json.load(fp)
            except ValueError as err:
                raise CoveInputDataError(
                    context={
                        'sub_title':
                        _("Sorry we can't process that data"),
                        'link':
                        'index',
                        'link_text':
                        _('Try Again'),
                        'msg':
                        _('We think you tried to upload a JSON file, but it is not well formed JSON.'
                          '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                          '</span> <strong>Error message:</strong> {}'.format(
                              err)),
                        'error':
                        format(err)
                    })
            context.update(
                convert_json(request, db_data, schema_360.release_schema_url))
    else:
        context.update(
            convert_spreadsheet(request, db_data, file_type,
                                schema_360.release_schema_url))
        with open(context['converted_path'], encoding='utf-8') as fp:
            json_data = json.load(fp)

    context = common_checks_360(context, db_data, json_data, schema_360)
    return render(request, template, context)
예제 #8
0
def convert_json(upload_dir,
                 upload_url,
                 file_name,
                 schema_url=None,
                 replace=False,
                 request=None,
                 flatten=False,
                 cache=True,
                 xml=False):
    context = {}
    converted_path = os.path.join(upload_dir, 'flattened')

    flatten_kwargs = dict(output_name=converted_path,
                          main_sheet_name=config['root_list_path'],
                          root_list_path=config['root_list_path'],
                          root_id=config['root_id'],
                          schema=schema_url)

    if xml:
        flatten_kwargs['xml'] = True
        flatten_kwargs['id_name'] = config.get('id_name', 'id')

    try:
        conversion_warning_cache_path = os.path.join(
            upload_dir, 'conversion_warning_messages.json')
        conversion_exists = os.path.exists(converted_path + '.xlsx')
        if not conversion_exists or replace:
            with warnings.catch_warnings(record=True) as conversion_warnings:
                if flatten or (replace and conversion_exists):
                    flattentool.flatten(file_name, **flatten_kwargs)
                else:
                    return {'conversion': 'flattenable'}
                context[
                    'conversion_warning_messages'] = filter_conversion_warnings(
                        conversion_warnings)

            if cache:
                with open(conversion_warning_cache_path, 'w+') as fp:
                    json.dump(context['conversion_warning_messages'], fp)

        elif os.path.exists(conversion_warning_cache_path):
            with open(conversion_warning_cache_path) as fp:
                context['conversion_warning_messages'] = json.load(fp)

        context['converted_file_size'] = os.path.getsize(converted_path +
                                                         '.xlsx')
        conversion_warning_cache_path_titles = os.path.join(
            upload_dir, 'conversion_warning_messages_titles.json')

        if config['convert_titles']:
            with warnings.catch_warnings(
                    record=True) as conversion_warnings_titles:
                flatten_kwargs.update(
                    dict(output_name=converted_path + '-titles',
                         use_titles=True))
                if not os.path.exists(converted_path +
                                      '-titles.xlsx') or replace:
                    flattentool.flatten(file_name, **flatten_kwargs)
                    context[
                        'conversion_warning_messages_titles'] = filter_conversion_warnings(
                            conversion_warnings_titles)
                    with open(conversion_warning_cache_path_titles,
                              'w+') as fp:
                        json.dump(
                            context['conversion_warning_messages_titles'], fp)
                elif os.path.exists(conversion_warning_cache_path_titles):
                    with open(conversion_warning_cache_path_titles) as fp:
                        context[
                            'conversion_warning_messages_titles'] = json.load(
                                fp)

            context['converted_file_size_titles'] = os.path.getsize(
                converted_path + '-titles.xlsx')

    except BadlyFormedJSONError as err:
        raise CoveInputDataError(
            context={
                'sub_title':
                _("Sorry, we can't process that data"),
                'link':
                'index',
                'link_text':
                _('Try Again'),
                'msg':
                _('We think you tried to upload a JSON file, but it is not well formed JSON.\n\nError message: {}'
                  .format(err))
            })
    except Exception as err:
        logger.exception(err, extra={
            'request': request,
        })
        return {'conversion': 'flatten', 'conversion_error': repr(err)}
    context.update({
        'conversion':
        'flatten',
        'converted_path':
        converted_path,
        'converted_url':
        '{}{}flattened'.format(upload_url,
                               '' if upload_url.endswith('/') else '/')
    })
    return context
예제 #9
0
def explore_ocds(request, pk):
    context, db_data, error = explore_data_context(request, pk)
    if error:
        return error

    upload_dir = db_data.upload_dir()
    upload_url = db_data.upload_url()
    file_name = db_data.original_file.file.name
    file_type = context['file_type']

    post_version_choice = request.POST.get('version')
    replace = False
    validation_errors_path = os.path.join(upload_dir,
                                          'validation_errors-3.json')

    if file_type == 'json':
        # open the data first so we can inspect for record package
        with open(file_name, encoding='utf-8') as fp:
            try:
                json_data = json.load(fp, parse_float=Decimal)
            except ValueError as err:
                raise CoveInputDataError(
                    context={
                        'sub_title':
                        _("Sorry, we can't process that data"),
                        'link':
                        'index',
                        'link_text':
                        _('Try Again'),
                        'msg':
                        _('We think you tried to upload a JSON file, but it is not well formed JSON.'
                          '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                          '</span> <strong>Error message:</strong> {}'.format(
                              err)),
                        'error':
                        format(err)
                    })

            if not isinstance(json_data, dict):
                raise CoveInputDataError(
                    context={
                        'sub_title':
                        _("Sorry, we can't process that data"),
                        'link':
                        'index',
                        'link_text':
                        _('Try Again'),
                        'msg':
                        _('OCDS JSON should have an object as the top level, the JSON you supplied does not.'
                          ),
                    })

            version_in_data = json_data.get('version', '')
            db_data.data_schema_version = version_in_data
            select_version = post_version_choice or db_data.schema_version
            schema_ocds = SchemaOCDS(select_version=select_version,
                                     release_data=json_data)

            if schema_ocds.missing_package:
                exceptions.raise_missing_package_error()
            if schema_ocds.invalid_version_argument:
                # This shouldn't happen unless the user sends random POST data.
                exceptions.raise_invalid_version_argument(post_version_choice)
            if schema_ocds.invalid_version_data:
                if isinstance(version_in_data, str) and re.compile(
                        '^\d+\.\d+\.\d+$').match(version_in_data):
                    exceptions.raise_invalid_version_data_with_patch(
                        version_in_data)
                else:
                    if not isinstance(version_in_data, str):
                        version_in_data = '{} (it must be a string)'.format(
                            str(version_in_data))
                    context['unrecognized_version_data'] = version_in_data

            if schema_ocds.version != db_data.schema_version:
                replace = True
            if schema_ocds.extensions:
                schema_ocds.create_extended_release_schema_file(
                    upload_dir, upload_url)
            url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url

            if 'records' in json_data:
                context['conversion'] = None
            else:

                # Replace the spreadsheet conversion only if it exists already.
                converted_path = os.path.join(upload_dir, 'flattened')
                replace_converted = replace and os.path.exists(converted_path +
                                                               '.xlsx')
                context.update(
                    convert_json(upload_dir,
                                 upload_url,
                                 file_name,
                                 schema_url=url,
                                 replace=replace_converted,
                                 request=request,
                                 flatten=request.POST.get('flatten')))

    else:
        # Use the lowest release pkg schema version accepting 'version' field
        metatab_schema_url = SchemaOCDS(
            select_version='1.1').release_pkg_schema_url
        metatab_data = get_spreadsheet_meta_data(upload_dir, file_name,
                                                 metatab_schema_url, file_type)
        if 'version' not in metatab_data:
            metatab_data['version'] = '1.0'
        else:
            db_data.data_schema_version = metatab_data['version']

        select_version = post_version_choice or db_data.schema_version
        schema_ocds = SchemaOCDS(select_version=select_version,
                                 release_data=metatab_data)

        # Unlike for JSON data case above, do not check for missing data package
        if schema_ocds.invalid_version_argument:
            # This shouldn't happen unless the user sends random POST data.
            exceptions.raise_invalid_version_argument(post_version_choice)
        if schema_ocds.invalid_version_data:
            version_in_data = metatab_data.get('version')
            if re.compile('^\d+\.\d+\.\d+$').match(version_in_data):
                exceptions.raise_invalid_version_data_with_patch(
                    version_in_data)
            else:
                context['unrecognized_version_data'] = version_in_data

        # Replace json conversion when user chooses a different schema version.
        if db_data.schema_version and schema_ocds.version != db_data.schema_version:
            replace = True

        if schema_ocds.extensions:
            schema_ocds.create_extended_release_schema_file(
                upload_dir, upload_url)
        url = schema_ocds.extended_schema_file or schema_ocds.release_schema_url
        pkg_url = schema_ocds.release_pkg_schema_url

        context.update(
            convert_spreadsheet(upload_dir,
                                upload_url,
                                file_name,
                                file_type,
                                schema_url=url,
                                pkg_schema_url=pkg_url,
                                replace=replace))

        with open(context['converted_path'], encoding='utf-8') as fp:
            json_data = json.load(fp, parse_float=Decimal)

    if replace:
        if os.path.exists(validation_errors_path):
            os.remove(validation_errors_path)

    context = common_checks_ocds(context, upload_dir, json_data, schema_ocds)

    if schema_ocds.json_deref_error:
        exceptions.raise_json_deref_error(schema_ocds.json_deref_error)

    context.update({
        'data_schema_version': db_data.data_schema_version,
        'first_render': not db_data.rendered
    })

    schema_version = getattr(schema_ocds, 'version', None)
    if schema_version:
        db_data.schema_version = schema_version
    if not db_data.rendered:
        db_data.rendered = True

    db_data.save()

    if 'records' in json_data:
        template = 'cove_ocds/explore_record.html'
        if hasattr(json_data, 'get') and hasattr(json_data.get('records'),
                                                 '__iter__'):
            context['records'] = json_data['records']
        else:
            context['records'] = []
    else:
        template = 'cove_ocds/explore_release.html'
        if hasattr(json_data, 'get') and hasattr(json_data.get('releases'),
                                                 '__iter__'):
            context['releases'] = json_data['releases']
        else:
            context['releases'] = []

    return render(request, template, context)
예제 #10
0
def common_checks_context_iati(context,
                               upload_dir,
                               data_file,
                               file_type,
                               api=False,
                               openag=False,
                               orgids=False):
    '''TODO: this function is trying to do too many things. Separate some
    of its logic into smaller functions doing one single thing each.
    '''
    schema_iati = SchemaIATI()
    cell_source_map = {}
    validation_errors_path = os.path.join(upload_dir,
                                          'validation_errors-3.json')

    with open(data_file, 'rb') as fp:
        try:
            tree = etree.parse(fp)
        except lxml.etree.XMLSyntaxError as err:
            raise CoveInputDataError(
                context={
                    'sub_title':
                    _("Sorry, we can't process that data"),
                    'link':
                    'index',
                    'link_text':
                    _('Try Again'),
                    'msg':
                    _(
                        format_html(
                            'We think you tried to upload a XML file, but it is not well formed XML.'
                            '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                            '</span> <strong>Error message:</strong> {}',
                            err)),
                    'error':
                    format(err)
                })
        except UnicodeDecodeError as err:
            raise CoveInputDataError(
                context={
                    'sub_title':
                    _("Sorry, we can't process that data"),
                    'link':
                    'index',
                    'link_text':
                    _('Try Again'),
                    'msg':
                    _(
                        format_html(
                            'We think you tried to upload a XML file, but the encoding is incorrect.'
                            '\n\n<span class="glyphicon glyphicon-exclamation-sign" aria-hidden="true">'
                            '</span> <strong>Error message:</strong> {}',
                            err)),
                    'error':
                    format(err)
                })

    if tree.getroot().tag == 'iati-organisations':
        schema_path = schema_iati.organisation_schema
        schema_name = 'Organisation'
        # rulesets don't support orgnisation files properly yet
        # so disable rather than give partial information
        ruleset_disabled = True
    else:
        schema_path = schema_iati.activity_schema
        schema_name = 'Activity'
        ruleset_disabled = False
    errors_all, invalid_data = validate_against_schema(schema_path, tree)

    return_on_error = [{
        'message': 'There was a problem running ruleset checks',
        'exception': True
    }]

    # Validation errors
    if file_type != 'xml':
        with open(os.path.join(upload_dir,
                               'cell_source_map.json')) as cell_source_map_fp:
            cell_source_map = json.load(cell_source_map_fp)
    if os.path.exists(validation_errors_path):
        with open(validation_errors_path) as validation_error_fp:
            validation_errors = json.load(validation_error_fp)
    else:
        validation_errors = get_xml_validation_errors(errors_all, file_type,
                                                      cell_source_map)
        if not api:
            with open(validation_errors_path, 'w+') as validation_error_fp:
                validation_error_fp.write(json.dumps(validation_errors))

    # Ruleset errors
    if ruleset_disabled:
        ruleset_errors = None
    else:
        ruleset_errors = get_iati_ruleset_errors(
            tree,
            os.path.join(upload_dir, 'ruleset'),
            api=api,
            ignore_errors=invalid_data,
            return_on_error=return_on_error)

    if openag:
        ruleset_errors_ag = get_openag_ruleset_errors(
            tree,
            os.path.join(upload_dir, 'ruleset_openang'),
            ignore_errors=invalid_data,
            return_on_error=return_on_error)
        context.update({'ruleset_errors_openag': ruleset_errors_ag})
    if orgids:
        ruleset_errors_orgids = get_orgids_ruleset_errors(
            tree,
            os.path.join(upload_dir, 'ruleset_orgids'),
            ignore_errors=invalid_data,
            return_on_error=return_on_error)
        context.update({'ruleset_errors_orgids': ruleset_errors_orgids})

    context.update({
        'validation_errors': sorted(validation_errors.items()),
        'ruleset_errors': ruleset_errors
    })

    if not api:
        context.update({
            'validation_errors_count':
            sum(len(value) for value in validation_errors.values()),
            'cell_source_map':
            cell_source_map,
            'first_render':
            False,
            'schema_name':
            schema_name,
            'ruleset_disabled':
            ruleset_disabled
        })
        if ruleset_errors:
            ruleset_errors_by_activity = get_iati_ruleset_errors(
                tree,
                os.path.join(upload_dir, 'ruleset'),
                group_by='activity',
                ignore_errors=invalid_data,
                return_on_error=return_on_error)
            context['ruleset_errors'] = [
                ruleset_errors, ruleset_errors_by_activity
            ]

        count_ruleset_errors = 0
        if isinstance(ruleset_errors, dict):
            for rules in ruleset_errors.values():
                for errors in rules.values():
                    count_ruleset_errors += len(errors)

        context['ruleset_errors_count'] = count_ruleset_errors
    return context