Esempio n. 1
0
def compliance_check(job_id, dataset, checker):
    try:
        redis = get_current_connection()
        cs = CheckSuite()
        if dataset.startswith('http'):
            dataset = check_redirect(dataset)
        ds = cs.load_dataset(dataset)
        score_groups = cs.run(ds, [], checker)

        rpair = score_groups[checker]
        groups, errors = rpair

        aggregates = cs.build_structure(checker, groups, dataset)
        aggregates = cs.serialize(aggregates)
        aggregates['all_priorities'] = sorted(aggregates['all_priorities'], key=lambda x: x['weight'], reverse=True)
        # We use b64 to keep the filenames safe but it's helpful to the user to see
        # the filename they uploaded
        if not aggregates['source_name'].startswith('http'):
            decoded = base64.b64decode(aggregates['source_name'].split('/')[-1])
            if isinstance(decoded, str):
                aggregates['source_name'] = decoded
            else:
                aggregates['source_name'] = decoded.decode('utf-8')
        aggregates['ncdump'] = ncdump(dataset)
        buf = json.dumps(aggregates)

        redis.set('processing:job:%s' % job_id, buf, 3600)
        return True
    except Exception as e:
        redis.set('processing:job:%s' % job_id, json.dumps({"error":type(e).__name__, "message":e.message}), 3600)
        return False
Esempio n. 2
0
async def cc_report(args):

    if args.verbose > 1:
        print(f'Checking OPeNDAP URL: {url}')

    if args.format == 'summary':
        cs = CheckSuite()
        if args.criteria == 'normal':
            limit = 2
        elif args.criteria == 'strict':
            limit = 1
        elif args.criteria == 'lenient':
            limit = 3


        ds = cs.load_dataset(url)
        skip_checks = ()
        score_groups = cs.run(ds, skip_checks, *args.test)

        # Always use sorted test (groups) so they print in correct order
        reports = {}
        for checker, rpair in sorted(score_groups.items()):
            groups, _ = rpair
            _, points, out_of = cs.get_points(groups, limit)
            reports[checker] = (100 * float(points) / float(out_of))

        print((report_fmt).format(url, *[reports[t] for t in sorted(args.test)]))
        sys.stdout.flush()
    else:
        # Send the compliance report to stdout
        ComplianceChecker.run_checker(url, args.test, args.verbose, args.criteria,
                                        args.output, args.format)
def compliance_check(job_id, dataset, checker, path=None):
    '''
    Performs the Check Suite for the specified checker and sets the result in a
    redis result for the job_id

    :param str job_id: ID for the rq job
    :param dataset: Dataset handle
    :param str checker: Check Suite ID for a checker
    :param str path: Full path to dataset directory (OPeNDAP only)
    '''
    try:
        redis = get_current_connection()
        cs = CheckSuite()
        if dataset.startswith('http'):
            dataset = check_redirect(dataset)
        ds = cs.load_dataset(dataset)
        score_groups = cs.run(ds, [], checker)

        rpair = score_groups[checker]
        groups, errors = rpair

        aggregates = cs.build_structure(checker, groups, dataset)
        aggregates = cs.serialize(aggregates)
        aggregates['all_priorities'] = sorted(aggregates['all_priorities'],
                                              key=lambda x: x['weight'],
                                              reverse=True)
        # We use b64 to keep the filenames safe but it's helpful to the user to see
        # the filename they uploaded
        if not aggregates['source_name'].startswith('http'):
            decoded = base64.b64decode(
                aggregates['source_name'].split('/')[-1])
            if isinstance(decoded, str):
                aggregates['source_name'] = decoded
            else:
                aggregates['source_name'] = decoded.decode('utf-8')
        aggregates['ncdump'] = ncdump(dataset)
        buf = json.dumps(aggregates)

        # Write the report to a text file for download
        if path is None:
            # Must be a local file, get the path from the dataset
            path = os.path.dirname(dataset)
        fname = 'compliance_{}.txt'.format(job_id)
        output_filename = os.path.join(path, fname)
        with io.open(output_filename, 'w', encoding='utf-8') as f:
            with stdout_redirector(f):
                stdout_output(cs, score_groups, aggregates['source_name'])

        redis.set('processing:job:%s' % job_id, buf, 3600)

        return True

    except Exception as e:
        logger.exception("Failed to process job")
        error_message = {"error": type(e).__name__, "message": e.message}
        redis.set('processing:job:%s' % job_id, json.dumps(error_message),
                  3600)
        return False
Esempio n. 4
0
def main(args):
    if args.format == 'summary':
        hdr_fmt = '{},' * len(args.test)
        rpt_fmt = '{:.1f},' * len(args.test)
        report_fmt = '{},' + rpt_fmt[:-1]
        print(('{},' + hdr_fmt[:-1]).format('url', *sorted(args.test)))

    for cat in args.catalog_urls:
        if args.verbose > 1:
            print(f'Opening catalog_url: {cat}')
        for url in get_opendap_urls(cat):

            if args.verbose > 1:
                print(f'Checking OPeNDAP URL: {url}')

            if args.format == 'summary':
                cs = CheckSuite()
                if args.criteria == 'normal':
                    limit = 2
                elif args.criteria == 'strict':
                    limit = 1
                elif args.criteria == 'lenient':
                    limit = 3

                try:
                    ds = cs.load_dataset(url)
                except ValueError as e:
                    print(f'Failed to get report for {url}')
                    print(str(e))
                    continue
                    
                skip_checks = ()
                score_groups = cs.run(ds, skip_checks, *args.test)
                

                # Always use sorted test (groups) so they print in correct order
                reports = {}
                for checker, rpair in sorted(score_groups.items()):
                    groups, _ = rpair
                    _, points, out_of = cs.get_points(groups, limit)
                    reports[checker] = (100 * float(points) / float(out_of))

                print((report_fmt).format(url, *[reports[t] for t in sorted(args.test)]))
                sys.stdout.flush()

            else:
                # Send the compliance report to stdout
                ComplianceChecker.run_checker(url, args.test, args.verbose, args.criteria,
                                              args.output, args.format)
def compliance_check(job_id, dataset, checker):
    try:
        redis = get_current_connection()
        cs = CheckSuite()
        if dataset.startswith('http'):
            dataset = check_redirect(dataset)
        ds = cs.load_dataset(dataset)
        score_groups = cs.run(ds, checker)

        rpair = score_groups[checker]
        groups, errors = rpair

        aggregates = cs.build_structure(checker, groups, dataset)
        aggregates = cs.serialize(aggregates)
        aggregates['all_priorities'] = sorted(aggregates['all_priorities'],
                                              key=lambda x: x['weight'],
                                              reverse=True)
        # We use b64 to keep the filenames safe but it's helpful to the user to see
        # the filename they uploaded
        if not aggregates['source_name'].startswith('http'):
            decoded = base64.b64decode(
                aggregates['source_name'].split('/')[-1])
            if isinstance(decoded, str):
                aggregates['source_name'] = decoded
            else:
                aggregates['source_name'] = decoded.decode('utf-8')
        aggregates['ncdump'] = ncdump(dataset)
        buf = json.dumps(aggregates)

        redis.set('processing:job:%s' % job_id, buf, 3600)
        return True
    except Exception as e:
        redis.set(
            'processing:job:%s' % job_id,
            json.dumps({
                "error": type(e).__name__,
                "message": e.message
            }), 3600)
        return False