def main(): config = ConfigParser.ConfigParser() my_path = Path(__file__).parent.parent ini_path = os.path.join(my_path, 'config', 'server.ini') config.read(ini_path) YAML_DIR = config['SERVICE']['yaml_directory'] METRIC_YAML = config['SERVICE']['metrics_yaml'] METRIC_YML_PATH = os.path.join(my_path, YAML_DIR, METRIC_YAML) SPDX_URL = config['EXTERNAL']['spdx_license_github'] DATACITE_API_REPO = config['EXTERNAL']['datacite_api_repo'] RE3DATA_API = config['EXTERNAL']['re3data_api'] METADATACATALOG_API = config['EXTERNAL']['metadata_catalog'] isDebug = config.getboolean('SERVICE', 'debug_mode') data_files_limit = int(config['SERVICE']['data_files_limit']) metric_specification = config['SERVICE']['metric_specification'] preproc = Preprocessor() preproc.retrieve_metrics_yaml(METRIC_YML_PATH, data_files_limit, metric_specification) print('Total metrics defined: {}'.format(preproc.get_total_metrics())) isDebug = config.getboolean('SERVICE', 'debug_mode') preproc.retrieve_licenses(SPDX_URL, isDebug) preproc.retrieve_datacite_re3repos(RE3DATA_API, DATACITE_API_REPO, isDebug) preproc.retrieve_metadata_standards(METADATACATALOG_API, isDebug) preproc.retrieve_science_file_formats(isDebug) preproc.retrieve_long_term_file_formats(isDebug) print('Total SPDX licenses : {}'.format(preproc.get_total_licenses())) print('Total re3repositories found from datacite api : {}'.format( len(preproc.getRE3repositories()))) print('Total subjects area of imported metadata standards : {}'.format( len(preproc.metadata_standards))) start = False usedatacite = True tracemalloc.start() n = 1 for identifier in testpids: print(identifier) print(n) n += 1 if identifier == startpid or not startpid: start = True if start: ft = FAIRCheck(uid=identifier, test_debug=debug, metadata_service_url=metadata_service_endpoint, metadata_service_type=metadata_service_type, use_datacite=usedatacite) #ft = FAIRCheck(uid=identifier, test_debug=True, use_datacite=usedatacite) uid_result, pid_result = ft.check_unique_persistent() ft.retrieve_metadata_embedded(ft.extruct_result) include_embedded = True if ft.repeat_pid_check: uid_result, pid_result = ft.check_unique_persistent() ft.retrieve_metadata_external() core_metadata_result = ft.check_minimal_metatadata() content_identifier_included_result = ft.check_content_identifier_included( ) access_level_result = ft.check_data_access_level() license_result = ft.check_license() relatedresources_result = ft.check_relatedresources() check_searchable_result = ft.check_searchable() data_content_metadata = ft.check_data_content_metadata() data_file_format_result = ft.check_data_file_format() community_standards_result = ft.check_community_metadatastandards() data_provenance_result = ft.check_data_provenance() formal_representation_result = ft.check_formal_metadata() semantic_vocabulary_result = ft.check_semantic_vocabulary() metadata_preserved_result = ft.check_metadata_preservation() standard_protocol_metadata_result = ft.check_standardised_protocol_metadata( ) standard_protocol_data_result = ft.check_standardised_protocol_data( ) results = [ uid_result, pid_result, core_metadata_result, content_identifier_included_result, check_searchable_result, access_level_result, formal_representation_result, semantic_vocabulary_result, license_result, data_file_format_result, data_provenance_result, relatedresources_result, community_standards_result, data_content_metadata, metadata_preserved_result, standard_protocol_data_result, standard_protocol_metadata_result ] #results=[core_metadata_result,uid_result, pid_result] #print(ft.metadata_merged) debug_messages = ft.get_log_messages_dict() ft.logger_message_stream.flush() ft.get_assessment_summary(results) for res_k, res_v in enumerate(results): if ft.isDebug: debug_list = debug_messages.get(res_v['metric_identifier']) #debug_list= ft.msg_filter.getMessage(res_v['metric_identifier']) if debug_list is not None: results[res_k]['test_debug'] = debug_messages.get( res_v['metric_identifier']) else: results[res_k]['test_debug'] = [ 'INFO: No debug messages received' ] else: results[res_k]['test_debug'] = ['INFO: Debugging disabled'] debug_messages = {} print(json.dumps(results, indent=4, sort_keys=True)) #remove unused logger handlers and filters to avoid memory leaks ft.logger.handlers = [ft.logger.handlers[-1]] #ft.logger.filters = [ft.logger.filters] current, peak = tracemalloc.get_traced_memory() print( f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB" ) snapshot = tracemalloc.take_snapshot() top_stats = snapshot.statistics('traceback') # pick the biggest memory block stat = top_stats[0] print("%s memory blocks: %.1f KiB" % (stat.count, stat.size / 1024)) for line in stat.traceback.format(): print(line) for i, stat in enumerate(snapshot.statistics('filename')[:5], 1): print(i, str(stat)) #preproc.logger. gc.collect() tracemalloc.stop()
def assess_by_id(body): # noqa: E501 """assess_by_id Evaluate FAIRness of a data object based on its identifier # noqa: E501 :param body: :type body: dict | bytes :rtype: FAIRResults """ if connexion.request.is_json: debug = True results = [] body = Body.from_dict(connexion.request.get_json()) identifier = body.object_identifier debug = body.test_debug metadata_service_endpoint = body.metadata_service_endpoint oaipmh_endpoint = body.oaipmh_endpoint metadata_service_type = body.metadata_service_type usedatacite = body.use_datacite logger = Preprocessor.logger logger.info('Assessment target: ' + identifier) print('Assessment target: ', identifier, flush=True) ft = FAIRCheck(uid=identifier, test_debug=debug, metadata_service_url=metadata_service_endpoint, metadata_service_type=metadata_service_type, use_datacite=usedatacite, oaipmh_endpoint=oaipmh_endpoint) # set target for remote logging remote_log_host, remote_log_path = Preprocessor.remote_log_host, Preprocessor.remote_log_path #print(remote_log_host, remote_log_path) if remote_log_host and remote_log_path: ft.set_remote_logging_target(remote_log_host, remote_log_path) uid_result, pid_result = ft.check_unique_persistent() ft.retrieve_metadata_embedded(ft.extruct_result) if ft.repeat_pid_check: uid_result, pid_result = ft.check_unique_persistent() include_embedded = True ft.retrieve_metadata_external() if ft.repeat_pid_check: uid_result, pid_result = ft.check_unique_persistent() core_metadata_result = ft.check_minimal_metatadata() content_identifier_included_result = ft.check_content_identifier_included( ) access_level_result = ft.check_data_access_level() license_result = ft.check_license() related_resources_result = ft.check_relatedresources() check_searchable_result = ft.check_searchable() data_content_result = ft.check_data_content_metadata() data_file_format_result = ft.check_data_file_format() community_standards_result = ft.check_community_metadatastandards() data_provenance_result = ft.check_data_provenance() formal_metadata_result = ft.check_formal_metadata() semantic_vocab_result = ft.check_semantic_vocabulary() metadata_preserved_result = ft.check_metadata_preservation() standard_protocol_data_result = ft.check_standardised_protocol_data() standard_protocol_metadata_result = ft.check_standardised_protocol_metadata( ) results.append(uid_result) results.append(pid_result) results.append(core_metadata_result) results.append(content_identifier_included_result) results.append(check_searchable_result) results.append(access_level_result) results.append(formal_metadata_result) results.append(semantic_vocab_result) results.append(related_resources_result) results.append(data_content_result) results.append(license_result) results.append(data_provenance_result) results.append(community_standards_result) results.append(data_file_format_result) results.append(standard_protocol_data_result) results.append(standard_protocol_metadata_result) debug_messages = ft.get_log_messages_dict() ft.logger_message_stream.flush() summary = ft.get_assessment_summary(results) for res_k, res_v in enumerate(results): if ft.isDebug: debug_list = debug_messages.get(res_v['metric_identifier']) # debug_list= ft.msg_filter.getMessage(res_v['metric_identifier']) if debug_list is not None: results[res_k]['test_debug'] = debug_messages.get( res_v['metric_identifier']) else: results[res_k]['test_debug'] = [ 'INFO: No debug messages received' ] else: results[res_k]['test_debug'] = ['INFO: Debugging disabled'] debug_messages = {} ft.logger.handlers = [ft.logger.handlers[-1]] #timestmp = datetime.datetime.now().replace(microsecond=0).isoformat() timestmp = datetime.datetime.now().replace(microsecond=0).isoformat( ) + "Z" # use timestamp format from RFC 3339 as specified in openapi3 metric_spec = Preprocessor.metric_specification metric_version = os.path.basename(Preprocessor.METRIC_YML_PATH) totalmetrics = len(results) request = body.to_dict() if ft.pid_url: idhelper = IdentifierHelper(ft.pid_url) request[ 'normalized_object_identifier'] = idhelper.get_normalized_id() final_response = FAIRResults(request=request, timestamp=timestmp, software_version=ft.FUJI_VERSION, test_id=ft.test_id, metric_version=metric_version, metric_specification=metric_spec, total_metrics=totalmetrics, results=results, summary=summary) return final_response