Ejemplo n.º 1
0
def main():
    config = ConfigParser.ConfigParser()
    my_path = Path(__file__).parent.parent
    ini_path = os.path.join(my_path, 'config', 'server.ini')
    config.read(ini_path)
    YAML_DIR = config['SERVICE']['yaml_directory']
    METRIC_YAML = config['SERVICE']['metrics_yaml']
    METRIC_YML_PATH = os.path.join(my_path, YAML_DIR, METRIC_YAML)
    SPDX_URL = config['EXTERNAL']['spdx_license_github']
    DATACITE_API_REPO = config['EXTERNAL']['datacite_api_repo']
    RE3DATA_API = config['EXTERNAL']['re3data_api']
    METADATACATALOG_API = config['EXTERNAL']['metadata_catalog']
    isDebug = config.getboolean('SERVICE', 'debug_mode')

    preproc = Preprocessor()
    preproc.retrieve_metrics_yaml(METRIC_YML_PATH)
    print('Total metrics defined: {}'.format(preproc.get_total_metrics()))

    isDebug = config.getboolean('SERVICE', 'debug_mode')
    preproc.retrieve_licenses(SPDX_URL, isDebug)
    preproc.retrieve_datacite_re3repos(RE3DATA_API, DATACITE_API_REPO, isDebug)
    preproc.retrieve_metadata_standards(METADATACATALOG_API, isDebug)

    print('Total SPDX licenses : {}'.format(preproc.get_total_licenses()))
    print('Total re3repositories found from datacite api : {}'.format(
        len(preproc.getRE3repositories())))
    print('Total subjects area of imported metadata standards : {}'.format(
        len(preproc.metadata_standards)))

    ft = FAIRCheck(uid=identifier, oai=oai_pmh, test_debug=debug)
    uid_result, pid_result = ft.check_unique_persistent()
    core_metadata_result = ft.check_minimal_metatadata()
    content_identifier_included_result = ft.check_content_identifier_included()
    check_searchable_result = ft.check_searchable()
    license_result = ft.check_license()
    relatedresources_result = ft.check_relatedresources()
    results = [
        uid_result, pid_result, core_metadata_result,
        content_identifier_included_result, license_result
    ]
    # put the debug messages at the right place...
    for result_index, result in enumerate(results):
        results[result_index]['test_debug'] = ft.msg_filter.getMessage(
            result.get('metric_identifier'))

    print(json.dumps(results, indent=4, sort_keys=True))
Ejemplo n.º 2
0
def main():
    config = ConfigParser.ConfigParser()
    my_path = Path(__file__).parent.parent
    ini_path = os.path.join(my_path, 'config', 'server.ini')
    config.read(ini_path)
    YAML_DIR = config['SERVICE']['yaml_directory']
    METRIC_YAML = config['SERVICE']['metrics_yaml']
    METRIC_YML_PATH = os.path.join(my_path, YAML_DIR, METRIC_YAML)
    SPDX_URL = config['EXTERNAL']['spdx_license_github']
    DATACITE_API_REPO = config['EXTERNAL']['datacite_api_repo']
    RE3DATA_API = config['EXTERNAL']['re3data_api']
    METADATACATALOG_API = config['EXTERNAL']['metadata_catalog']
    isDebug = config.getboolean('SERVICE', 'debug_mode')

    preproc = Preprocessor()
    preproc.retrieve_metrics_yaml(METRIC_YML_PATH)
    print('Total metrics defined: {}'.format(preproc.get_total_metrics()))

    isDebug = config.getboolean('SERVICE', 'debug_mode')
    preproc.retrieve_licenses(SPDX_URL, isDebug)
    preproc.retrieve_datacite_re3repos(RE3DATA_API, DATACITE_API_REPO, isDebug)
    preproc.retrieve_metadata_standards(METADATACATALOG_API, isDebug)
    preproc.retrieve_science_file_formats(isDebug)
    preproc.retrieve_long_term_file_formats(isDebug)

    print('Total SPDX licenses : {}'.format(preproc.get_total_licenses()))
    print('Total re3repositories found from datacite api : {}'.format(
        len(preproc.getRE3repositories())))
    print('Total subjects area of imported metadata standards : {}'.format(
        len(preproc.metadata_standards)))
    start = False
    for identifier in testpids:
        print(identifier)
        if identifier == startpid or not startpid:
            start = True
        if start:
            ft = FAIRCheck(uid=identifier, test_debug=debug)
            uid_result, pid_result = ft.check_unique_persistent()
            core_metadata_result = ft.check_minimal_metatadata()
            content_identifier_included_result = ft.check_content_identifier_included(
            )
            check_searchable_result = ft.check_searchable()
            license_result = ft.check_license()
            relatedresources_result = ft.check_relatedresources()
            access_level_result = ft.check_data_access_level()
            data_file_format_result = ft.check_data_file_format()
            data_provenance_result = ft.check_data_provenance()
            community_standards_result = ft.check_community_metadatastandards()
            data_content_metadata = ft.check_data_content_metadata()
            results = [
                uid_result, pid_result, core_metadata_result,
                content_identifier_included_result, check_searchable_result,
                access_level_result, license_result, data_file_format_result,
                data_provenance_result, community_standards_result,
                data_content_metadata
            ]
            #results=[data_file_format_result]
            print(json.dumps(results, indent=4, sort_keys=True))
Ejemplo n.º 3
0
def main():
    config = ConfigParser.ConfigParser()
    my_path = Path(__file__).parent.parent
    ini_path = os.path.join(my_path, 'config', 'server.ini')
    config.read(ini_path)
    YAML_DIR = config['SERVICE']['yaml_directory']
    METRIC_YAML = config['SERVICE']['metrics_yaml']
    METRIC_YML_PATH = os.path.join(my_path, YAML_DIR, METRIC_YAML)
    SPDX_URL = config['EXTERNAL']['spdx_license_github']
    DATACITE_API_REPO = config['EXTERNAL']['datacite_api_repo']
    RE3DATA_API = config['EXTERNAL']['re3data_api']
    METADATACATALOG_API = config['EXTERNAL']['metadata_catalog']
    isDebug = config.getboolean('SERVICE', 'debug_mode')
    data_files_limit = int(config['SERVICE']['data_files_limit'])
    metric_specification = config['SERVICE']['metric_specification']

    preproc = Preprocessor()
    preproc.retrieve_metrics_yaml(METRIC_YML_PATH, data_files_limit,
                                  metric_specification)
    print('Total metrics defined: {}'.format(preproc.get_total_metrics()))

    isDebug = config.getboolean('SERVICE', 'debug_mode')
    preproc.retrieve_licenses(SPDX_URL, isDebug)
    preproc.retrieve_datacite_re3repos(RE3DATA_API, DATACITE_API_REPO, isDebug)
    preproc.retrieve_metadata_standards(METADATACATALOG_API, isDebug)
    preproc.retrieve_science_file_formats(isDebug)
    preproc.retrieve_long_term_file_formats(isDebug)

    print('Total SPDX licenses : {}'.format(preproc.get_total_licenses()))
    print('Total re3repositories found from datacite api : {}'.format(
        len(preproc.getRE3repositories())))
    print('Total subjects area of imported metadata standards : {}'.format(
        len(preproc.metadata_standards)))
    start = False
    usedatacite = True
    tracemalloc.start()
    n = 1
    for identifier in testpids:

        print(identifier)
        print(n)
        n += 1
        if identifier == startpid or not startpid:
            start = True
        if start:
            ft = FAIRCheck(uid=identifier,
                           test_debug=debug,
                           metadata_service_url=metadata_service_endpoint,
                           metadata_service_type=metadata_service_type,
                           use_datacite=usedatacite)

            #ft = FAIRCheck(uid=identifier,  test_debug=True, use_datacite=usedatacite)

            uid_result, pid_result = ft.check_unique_persistent()
            ft.retrieve_metadata_embedded(ft.extruct_result)
            include_embedded = True
            if ft.repeat_pid_check:
                uid_result, pid_result = ft.check_unique_persistent()
            ft.retrieve_metadata_external()

            core_metadata_result = ft.check_minimal_metatadata()
            content_identifier_included_result = ft.check_content_identifier_included(
            )
            access_level_result = ft.check_data_access_level()
            license_result = ft.check_license()
            relatedresources_result = ft.check_relatedresources()
            check_searchable_result = ft.check_searchable()
            data_content_metadata = ft.check_data_content_metadata()
            data_file_format_result = ft.check_data_file_format()
            community_standards_result = ft.check_community_metadatastandards()
            data_provenance_result = ft.check_data_provenance()
            formal_representation_result = ft.check_formal_metadata()
            semantic_vocabulary_result = ft.check_semantic_vocabulary()
            metadata_preserved_result = ft.check_metadata_preservation()
            standard_protocol_metadata_result = ft.check_standardised_protocol_metadata(
            )
            standard_protocol_data_result = ft.check_standardised_protocol_data(
            )

            results = [
                uid_result, pid_result, core_metadata_result,
                content_identifier_included_result, check_searchable_result,
                access_level_result, formal_representation_result,
                semantic_vocabulary_result, license_result,
                data_file_format_result, data_provenance_result,
                relatedresources_result, community_standards_result,
                data_content_metadata, metadata_preserved_result,
                standard_protocol_data_result,
                standard_protocol_metadata_result
            ]
            #results=[core_metadata_result,uid_result, pid_result]
            #print(ft.metadata_merged)
            debug_messages = ft.get_log_messages_dict()
            ft.logger_message_stream.flush()
            ft.get_assessment_summary(results)
            for res_k, res_v in enumerate(results):
                if ft.isDebug:
                    debug_list = debug_messages.get(res_v['metric_identifier'])
                    #debug_list= ft.msg_filter.getMessage(res_v['metric_identifier'])
                    if debug_list is not None:
                        results[res_k]['test_debug'] = debug_messages.get(
                            res_v['metric_identifier'])
                    else:
                        results[res_k]['test_debug'] = [
                            'INFO: No debug messages received'
                        ]
                else:
                    results[res_k]['test_debug'] = ['INFO: Debugging disabled']
                    debug_messages = {}
            print(json.dumps(results, indent=4, sort_keys=True))
            #remove unused logger handlers and filters to avoid memory leaks
            ft.logger.handlers = [ft.logger.handlers[-1]]
            #ft.logger.filters = [ft.logger.filters]
            current, peak = tracemalloc.get_traced_memory()
            print(
                f"Current memory usage is {current / 10 ** 6}MB; Peak was {peak / 10 ** 6}MB"
            )
            snapshot = tracemalloc.take_snapshot()
            top_stats = snapshot.statistics('traceback')

            # pick the biggest memory block
            stat = top_stats[0]
            print("%s memory blocks: %.1f KiB" %
                  (stat.count, stat.size / 1024))
            for line in stat.traceback.format():
                print(line)

            for i, stat in enumerate(snapshot.statistics('filename')[:5], 1):
                print(i, str(stat))

            #preproc.logger.
            gc.collect()
    tracemalloc.stop()
Ejemplo n.º 4
0
def main():
    config = ConfigParser.ConfigParser()
    my_path = Path(__file__).parent.parent
    ini_path = os.path.join(my_path,'config','server.ini')
    config.read(ini_path)
    YAML_DIR = config['SERVICE']['yaml_directory']
    METRIC_YAML = config['SERVICE']['metrics_yaml']
    METRIC_YML_PATH = os.path.join(my_path, YAML_DIR , METRIC_YAML)
    SPDX_URL = config['EXTERNAL']['spdx_license_github']
    DATACITE_API_REPO = config['EXTERNAL']['datacite_api_repo']
    RE3DATA_API = config['EXTERNAL']['re3data_api']
    METADATACATALOG_API = config['EXTERNAL']['metadata_catalog']
    isDebug = config.getboolean('SERVICE', 'debug_mode')
    data_files_limit = int(config['SERVICE']['data_files_limit'])
    metric_specification = config['SERVICE']['metric_specification']

    preproc = Preprocessor()
    preproc.retrieve_metrics_yaml(METRIC_YML_PATH, data_files_limit,metric_specification)
    print('Total metrics defined: {}'.format(preproc.get_total_metrics()))

    isDebug = config.getboolean('SERVICE', 'debug_mode')
    preproc.retrieve_licenses(SPDX_URL, isDebug)
    preproc.retrieve_datacite_re3repos(RE3DATA_API, DATACITE_API_REPO, isDebug)
    preproc.retrieve_metadata_standards(METADATACATALOG_API, isDebug)
    preproc.retrieve_science_file_formats(isDebug)
    preproc.retrieve_long_term_file_formats(isDebug)

    print('Total SPDX licenses : {}'.format(preproc.get_total_licenses()))
    print('Total re3repositories found from datacite api : {}'.format(len(preproc.getRE3repositories())))
    print('Total subjects area of imported metadata standards : {}'.format(len(preproc.metadata_standards)))
    start=False
    for identifier in testpids:
        print (identifier)
        if identifier==startpid or not startpid:
            start=True
        if start:
            ft = FAIRCheck(uid=identifier,  test_debug=True, use_datacite=False)
            uid_result, pid_result = ft.check_unique_persistent()
            core_metadata_result = ft.check_minimal_metatadata()
            content_identifier_included_result = ft.check_content_identifier_included()
            access_level_result=ft.check_data_access_level()
            license_result = ft.check_license()
            relatedresources_result = ft.check_relatedresources()
            check_searchable_result = ft.check_searchable()
            data_file_format_result=ft.check_data_file_format()
            community_standards_result=ft.check_community_metadatastandards()
            data_provenance_result=ft.check_data_provenance()
            data_content_metadata = ft.check_data_content_metadata()
            formal_representation_result=ft.check_formal_metadata()
            semantic_vocabulary_result =ft.check_semantic_vocabulary()
            metadata_preserved_result = ft.check_metadata_preservation()
            standard_protocol_data_result = ft.check_standardised_protocol_data()
            standard_protocol_metadata_result = ft.check_standardised_protocol_metadata()
            results = [uid_result, pid_result, core_metadata_result, content_identifier_included_result, check_searchable_result, access_level_result, formal_representation_result,semantic_vocabulary_result, license_result, data_file_format_result,data_provenance_result,relatedresources_result,community_standards_result,data_content_metadata,metadata_preserved_result, standard_protocol_data_result,standard_protocol_metadata_result]
            #results=[core_metadata_result,uid_result, pid_result]
            #print(ft.metadata_merged)
            for res_k, res_v in enumerate(results):
                if ft.isDebug:
                    debug_list= ft.msg_filter.getMessage(res_v['metric_identifier'])
                    if debug_list is not None:
                        results[res_k]['test_debug'] = ft.msg_filter.getMessage(res_v['metric_identifier'])
                    else:
                        results[res_k]['test_debug'] =['INFO: No debug messages received']
                else:
                    results[res_k]['test_debug'] = ['INFO: Debugging disabled']

            print(json.dumps(results, indent=4, sort_keys=True))
Ejemplo n.º 5
0
def assess_by_id(body):  # noqa: E501
    """assess_by_id

    Evaluate FAIRness of a data object based on its identifier # noqa: E501

    :param body: 
    :type body: dict | bytes

    :rtype: FAIRResults
    """

    if connexion.request.is_json:
        debug = True
        results = []
        body = Body.from_dict(connexion.request.get_json())
        identifier=body.object_identifier
        debug = body.test_debug
        oai = body.oaipmh_endpoint
        ft = FAIRCheck(uid=identifier, test_debug=debug, oaipmh=oai)

        uid_result, pid_result = ft.check_unique_persistent()
        core_metadata_result = ft.check_minimal_metatadata()
        content_identifier_included_result = ft.check_content_identifier_included()
        access_level_result = ft.check_data_access_level()
        license_result = ft.check_license()
        related_resources_result = ft.check_relatedresources()
        check_searchable_result = ft.check_searchable()
        data_file_format_result = ft.check_data_file_format()
        community_standards_result = ft.check_community_metadatastandards()
        data_provenance_result = ft.check_data_provenance()
        data_content_result = ft.check_data_content_metadata()
        formal_metadata_result = ft.check_formal_metadata()
        semantic_vocab_result = ft.check_semantic_vocabulary()
        metadata_preserved_result = ft.check_metadata_preservation()
        standard_protocol_data_result = ft.check_standardised_protocol_data()
        standard_protocol_metadata_result = ft.check_standardised_protocol_metadata()

        results.append(uid_result)
        results.append(pid_result)
        results.append(core_metadata_result)
        results.append(content_identifier_included_result)
        results.append(check_searchable_result)
        results.append(access_level_result)
        results.append(formal_metadata_result)
        results.append(semantic_vocab_result)
        results.append(related_resources_result)
        results.append(data_content_result)
        results.append(license_result)
        results.append(data_provenance_result)
        results.append(community_standards_result)
        results.append(data_file_format_result)
        results.append(standard_protocol_data_result)
        results.append(standard_protocol_metadata_result)
        for res_k, res_v in enumerate(results):
            if ft.isDebug:
                debug_list = ft.msg_filter.getMessage(res_v['metric_identifier'])
                if debug_list is not None:
                    results[res_k]['test_debug'] = ft.msg_filter.getMessage(res_v['metric_identifier'])
                else:
                    results[res_k]['test_debug'] = ['INFO: No debug messages received']

        timestmp = datetime.datetime.now().replace(microsecond=0).isoformat()
        metric_spec = Preprocessor.metric_specification
        metric_version = os.path.basename(Preprocessor.METRIC_YML_PATH)
        totalmetrics = len(results)
        final_response = FAIRResults(timestamp= timestmp, test_id= ft.test_id, metric_version=metric_version, metric_specification=metric_spec, total_metrics=totalmetrics, results=results)
    return final_response
Ejemplo n.º 6
0
def assess_by_id(body):  # noqa: E501
    """assess_by_id

    Evaluate FAIRness of a data object based on its identifier # noqa: E501

    :param body: 
    :type body: dict | bytes

    :rtype: FAIRResults
    """

    if connexion.request.is_json:
        debug = True
        results = []
        body = Body.from_dict(connexion.request.get_json())
        identifier = body.object_identifier
        debug = body.test_debug
        metadata_service_endpoint = body.metadata_service_endpoint
        oaipmh_endpoint = body.oaipmh_endpoint
        metadata_service_type = body.metadata_service_type
        usedatacite = body.use_datacite
        logger = Preprocessor.logger
        logger.info('Assessment target: ' + identifier)
        print('Assessment target: ', identifier, flush=True)
        ft = FAIRCheck(uid=identifier,
                       test_debug=debug,
                       metadata_service_url=metadata_service_endpoint,
                       metadata_service_type=metadata_service_type,
                       use_datacite=usedatacite,
                       oaipmh_endpoint=oaipmh_endpoint)
        # set target for remote logging
        remote_log_host, remote_log_path = Preprocessor.remote_log_host, Preprocessor.remote_log_path
        #print(remote_log_host, remote_log_path)
        if remote_log_host and remote_log_path:
            ft.set_remote_logging_target(remote_log_host, remote_log_path)
        uid_result, pid_result = ft.check_unique_persistent()
        ft.retrieve_metadata_embedded(ft.extruct_result)
        if ft.repeat_pid_check:
            uid_result, pid_result = ft.check_unique_persistent()
        include_embedded = True
        ft.retrieve_metadata_external()
        if ft.repeat_pid_check:
            uid_result, pid_result = ft.check_unique_persistent()

        core_metadata_result = ft.check_minimal_metatadata()
        content_identifier_included_result = ft.check_content_identifier_included(
        )
        access_level_result = ft.check_data_access_level()
        license_result = ft.check_license()
        related_resources_result = ft.check_relatedresources()
        check_searchable_result = ft.check_searchable()
        data_content_result = ft.check_data_content_metadata()
        data_file_format_result = ft.check_data_file_format()
        community_standards_result = ft.check_community_metadatastandards()
        data_provenance_result = ft.check_data_provenance()
        formal_metadata_result = ft.check_formal_metadata()
        semantic_vocab_result = ft.check_semantic_vocabulary()
        metadata_preserved_result = ft.check_metadata_preservation()
        standard_protocol_data_result = ft.check_standardised_protocol_data()
        standard_protocol_metadata_result = ft.check_standardised_protocol_metadata(
        )

        results.append(uid_result)
        results.append(pid_result)
        results.append(core_metadata_result)
        results.append(content_identifier_included_result)
        results.append(check_searchable_result)
        results.append(access_level_result)
        results.append(formal_metadata_result)
        results.append(semantic_vocab_result)
        results.append(related_resources_result)
        results.append(data_content_result)
        results.append(license_result)
        results.append(data_provenance_result)
        results.append(community_standards_result)
        results.append(data_file_format_result)
        results.append(standard_protocol_data_result)
        results.append(standard_protocol_metadata_result)
        debug_messages = ft.get_log_messages_dict()
        ft.logger_message_stream.flush()
        summary = ft.get_assessment_summary(results)
        for res_k, res_v in enumerate(results):
            if ft.isDebug:
                debug_list = debug_messages.get(res_v['metric_identifier'])
                # debug_list= ft.msg_filter.getMessage(res_v['metric_identifier'])
                if debug_list is not None:
                    results[res_k]['test_debug'] = debug_messages.get(
                        res_v['metric_identifier'])
                else:
                    results[res_k]['test_debug'] = [
                        'INFO: No debug messages received'
                    ]
            else:
                results[res_k]['test_debug'] = ['INFO: Debugging disabled']
                debug_messages = {}
        ft.logger.handlers = [ft.logger.handlers[-1]]
        #timestmp = datetime.datetime.now().replace(microsecond=0).isoformat()
        timestmp = datetime.datetime.now().replace(microsecond=0).isoformat(
        ) + "Z"  # use timestamp format from RFC 3339 as specified in openapi3
        metric_spec = Preprocessor.metric_specification
        metric_version = os.path.basename(Preprocessor.METRIC_YML_PATH)
        totalmetrics = len(results)
        request = body.to_dict()
        if ft.pid_url:
            idhelper = IdentifierHelper(ft.pid_url)
            request[
                'normalized_object_identifier'] = idhelper.get_normalized_id()
        final_response = FAIRResults(request=request,
                                     timestamp=timestmp,
                                     software_version=ft.FUJI_VERSION,
                                     test_id=ft.test_id,
                                     metric_version=metric_version,
                                     metric_specification=metric_spec,
                                     total_metrics=totalmetrics,
                                     results=results,
                                     summary=summary)
    return final_response
Ejemplo n.º 7
0
def assess_by_id(body):  # noqa: E501
    """assess_by_id

    Evaluate FAIRness of a data object based on its identifier # noqa: E501

    :param body: 
    :type body: dict | bytes

    :rtype: FAIRResults
    """

    if connexion.request.is_json:
        debug = True
        results = []
        body = Body.from_dict(connexion.request.get_json())
        identifier = body.object_identifier
        debug = body.test_debug
        oai = body.oaipmh_endpoint
        ft = FAIRCheck(uid=identifier, test_debug=debug, oaipmh=oai)

        uid_result, pid_result = ft.check_unique_persistent()
        core_metadata_result = ft.check_minimal_metatadata()
        content_identifier_included_result = ft.check_content_identifier_included(
        )
        check_searchable_result = ft.check_searchable()
        access_result = ft.check_data_access_level()
        formal_metadata_result = ft.check_formal_metadata()
        semantic_vocab_result = ft.check_semantic_vocabulary()
        relatedresources_result = ft.check_relatedresources()
        datacontent_result = ft.check_data_content_metadata()
        license_result = ft.check_license()
        provenance_result = ft.check_data_provenance()
        community_standards_result = ft.check_community_metadatastandards()
        fileformat_result = ft.check_data_file_format()

        results.append(uid_result)
        results.append(pid_result)
        results.append(core_metadata_result)
        results.append(content_identifier_included_result)
        results.append(check_searchable_result)
        results.append(access_result)
        results.append(formal_metadata_result)
        results.append(semantic_vocab_result)
        results.append(relatedresources_result)
        results.append(datacontent_result)
        results.append(license_result)
        results.append(provenance_result)
        results.append(community_standards_result)
        results.append(fileformat_result)

        #timestmp = datetime.datetime.now().replace(microsecond=0).isoformat()
        timestmp = datetime.datetime.now().replace(microsecond=0).isoformat(
        ) + "Z"  # generate RFC 3339 timestamp as specified in openapi3
        metric_spec = Preprocessor.metric_specification
        totalmetrics = len(results)
        final_response = FAIRResults(timestamp=timestmp,
                                     metric_specification=metric_spec,
                                     total_metrics=totalmetrics,
                                     results=results)
    return final_response