def check_cf_compliance(dataset): try: from compliance_checker.runner import CheckSuite, ComplianceChecker import compliance_checker except ImportError: warnings.warn('compliance_checker unavailable, skipping NetCDF-CF Compliance Checks') return if compliance_checker.__version__ < '4.0.0': warnings.warn('Please upgrade compliance-checker to 4+ version') warnings.warn('compliance_checker version is too old, skipping NetCDF-CF Compliance Checks') return skip = ['check_dimension_order', 'check_all_features_are_same_type', 'check_conventions_version', 'check_appendix_a'] cs = CheckSuite() cs.load_all_available_checkers() score_groups = cs.run(dataset, skip, 'cf') score_dict = {dataset.filepath(): score_groups} groups = ComplianceChecker.stdout_output(cs, score_dict, verbose=1, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT) assert cs.passtree(groups, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT)
def compliance_check(job_id, dataset, checker): try: redis = get_current_connection() cs = CheckSuite() if dataset.startswith('http'): dataset = check_redirect(dataset) ds = cs.load_dataset(dataset) score_groups = cs.run(ds, [], checker) rpair = score_groups[checker] groups, errors = rpair aggregates = cs.build_structure(checker, groups, dataset) aggregates = cs.serialize(aggregates) aggregates['all_priorities'] = sorted(aggregates['all_priorities'], key=lambda x: x['weight'], reverse=True) # We use b64 to keep the filenames safe but it's helpful to the user to see # the filename they uploaded if not aggregates['source_name'].startswith('http'): decoded = base64.b64decode(aggregates['source_name'].split('/')[-1]) if isinstance(decoded, str): aggregates['source_name'] = decoded else: aggregates['source_name'] = decoded.decode('utf-8') aggregates['ncdump'] = ncdump(dataset) buf = json.dumps(aggregates) redis.set('processing:job:%s' % job_id, buf, 3600) return True except Exception as e: redis.set('processing:job:%s' % job_id, json.dumps({"error":type(e).__name__, "message":e.message}), 3600) return False
async def cc_report(args): if args.verbose > 1: print(f'Checking OPeNDAP URL: {url}') if args.format == 'summary': cs = CheckSuite() if args.criteria == 'normal': limit = 2 elif args.criteria == 'strict': limit = 1 elif args.criteria == 'lenient': limit = 3 ds = cs.load_dataset(url) skip_checks = () score_groups = cs.run(ds, skip_checks, *args.test) # Always use sorted test (groups) so they print in correct order reports = {} for checker, rpair in sorted(score_groups.items()): groups, _ = rpair _, points, out_of = cs.get_points(groups, limit) reports[checker] = (100 * float(points) / float(out_of)) print((report_fmt).format(url, *[reports[t] for t in sorted(args.test)])) sys.stdout.flush() else: # Send the compliance report to stdout ComplianceChecker.run_checker(url, args.test, args.verbose, args.criteria, args.output, args.format)
def compliance_check(job_id, dataset, checker, path=None): ''' Performs the Check Suite for the specified checker and sets the result in a redis result for the job_id :param str job_id: ID for the rq job :param dataset: Dataset handle :param str checker: Check Suite ID for a checker :param str path: Full path to dataset directory (OPeNDAP only) ''' try: redis = get_current_connection() cs = CheckSuite() if dataset.startswith('http'): dataset = check_redirect(dataset) ds = cs.load_dataset(dataset) score_groups = cs.run(ds, [], checker) rpair = score_groups[checker] groups, errors = rpair aggregates = cs.build_structure(checker, groups, dataset) aggregates = cs.serialize(aggregates) aggregates['all_priorities'] = sorted(aggregates['all_priorities'], key=lambda x: x['weight'], reverse=True) # We use b64 to keep the filenames safe but it's helpful to the user to see # the filename they uploaded if not aggregates['source_name'].startswith('http'): decoded = base64.b64decode( aggregates['source_name'].split('/')[-1]) if isinstance(decoded, str): aggregates['source_name'] = decoded else: aggregates['source_name'] = decoded.decode('utf-8') aggregates['ncdump'] = ncdump(dataset) buf = json.dumps(aggregates) # Write the report to a text file for download if path is None: # Must be a local file, get the path from the dataset path = os.path.dirname(dataset) fname = 'compliance_{}.txt'.format(job_id) output_filename = os.path.join(path, fname) with io.open(output_filename, 'w', encoding='utf-8') as f: with stdout_redirector(f): stdout_output(cs, score_groups, aggregates['source_name']) redis.set('processing:job:%s' % job_id, buf, 3600) return True except Exception as e: logger.exception("Failed to process job") error_message = {"error": type(e).__name__, "message": e.message} redis.set('processing:job:%s' % job_id, json.dumps(error_message), 3600) return False
def check_cf_compliance(dataset): try: from compliance_checker.runner import CheckSuite, ComplianceChecker import compliance_checker except ImportError: warnings.warn('compliance_checker unavailable, skipping NetCDF-CF Compliance Checks') return cs = CheckSuite() cs.load_all_available_checkers() if compliance_checker.__version__ >= '2.3.0': # This skips a failing compliance check. Our files don't contain all the lats/lons # as an auxiliary cordinate var as it's unnecessary for any software we've tried. # It may be added at some point in the future, and this check should be re-enabled. score_groups = cs.run(dataset, ['check_dimension_order'], 'cf') else: warnings.warn('Please upgrade to compliance-checker 2.3.0 or higher.') score_groups = cs.run(dataset, 'cf') groups = ComplianceChecker.stdout_output(cs, score_groups, verbose=1, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT) assert cs.passtree(groups, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT)
def check_cf_compliance(dataset): try: from compliance_checker.runner import CheckSuite, ComplianceChecker except ImportError: warnings.warn('compliance_checker unavailable, skipping NetCDF-CF Compliance Checks') return cs = CheckSuite() cs.load_all_available_checkers() score_groups = cs.run(dataset, 'cf') groups = ComplianceChecker.stdout_output(cs, score_groups, verbose=1, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT) assert cs.passtree(groups, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT)
def check_cf_compliance(dataset): try: from compliance_checker.runner import CheckSuite, ComplianceChecker except ImportError: warnings.warn('compliance_checker unavailable, skipping NetCDF-CF Compliance Checks') return cs = CheckSuite() cs.load_all_available_checkers() score_groups = cs.run(dataset, 'cf') groups = ComplianceChecker.stdout_output(cs, score_groups, verbose=1, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT) assert cs.passtree(groups, limit=COMPLIANCE_CHECKER_NORMAL_LIMIT)
def main(args): if args.format == 'summary': hdr_fmt = '{},' * len(args.test) rpt_fmt = '{:.1f},' * len(args.test) report_fmt = '{},' + rpt_fmt[:-1] print(('{},' + hdr_fmt[:-1]).format('url', *sorted(args.test))) for cat in args.catalog_urls: if args.verbose > 1: print(f'Opening catalog_url: {cat}') for url in get_opendap_urls(cat): if args.verbose > 1: print(f'Checking OPeNDAP URL: {url}') if args.format == 'summary': cs = CheckSuite() if args.criteria == 'normal': limit = 2 elif args.criteria == 'strict': limit = 1 elif args.criteria == 'lenient': limit = 3 try: ds = cs.load_dataset(url) except ValueError as e: print(f'Failed to get report for {url}') print(str(e)) continue skip_checks = () score_groups = cs.run(ds, skip_checks, *args.test) # Always use sorted test (groups) so they print in correct order reports = {} for checker, rpair in sorted(score_groups.items()): groups, _ = rpair _, points, out_of = cs.get_points(groups, limit) reports[checker] = (100 * float(points) / float(out_of)) print((report_fmt).format(url, *[reports[t] for t in sorted(args.test)])) sys.stdout.flush() else: # Send the compliance report to stdout ComplianceChecker.run_checker(url, args.test, args.verbose, args.criteria, args.output, args.format)
def compliance_check(job_id, dataset, checker): try: redis = get_current_connection() cs = CheckSuite() if dataset.startswith('http'): dataset = check_redirect(dataset) ds = cs.load_dataset(dataset) score_groups = cs.run(ds, checker) rpair = score_groups[checker] groups, errors = rpair aggregates = cs.build_structure(checker, groups, dataset) aggregates = cs.serialize(aggregates) aggregates['all_priorities'] = sorted(aggregates['all_priorities'], key=lambda x: x['weight'], reverse=True) # We use b64 to keep the filenames safe but it's helpful to the user to see # the filename they uploaded if not aggregates['source_name'].startswith('http'): decoded = base64.b64decode( aggregates['source_name'].split('/')[-1]) if isinstance(decoded, str): aggregates['source_name'] = decoded else: aggregates['source_name'] = decoded.decode('utf-8') aggregates['ncdump'] = ncdump(dataset) buf = json.dumps(aggregates) redis.set('processing:job:%s' % job_id, buf, 3600) return True except Exception as e: redis.set( 'processing:job:%s' % job_id, json.dumps({ "error": type(e).__name__, "message": e.message }), 3600) return False