Example #1
0
def metric_138_institution(doc):
    institutions = set(creators['name'] for node in jsonld_frame(
        doc, {
            '@type': 'Dataset',
            'creators': {
                '@type': 'Organization',
                'name': {},
            }
        })['@graph'] if node['creators']
                       for creators in force_list(node['creators'])
                       if creators['name'])
    affiliated_institutions = list(
        map(
            json.loads,
            set(
                json.dumps({
                    'person': creators['fullName'],
                    'organization': affiliations['name']
                }) for node in jsonld_frame(
                    doc, {
                        '@type': 'Dataset',
                        'creators': {
                            '@type': 'Person',
                            'fullName': {},
                            'affiliations': {
                                '@type': 'Organization',
                                'name': {}
                            }
                        }
                    })['@graph'] if node['creators']
                for creators in force_list(node['creators'])
                if creators['fullName'] and creators['affiliations']
                for affiliations in force_list(creators['affiliations'])
                if affiliations['name'])))
    if institutions:
        yield {
            'value':
            1,
            'comment':
            'Found institution(s): {}'.format(', '.join(institutions), ),
        }
    elif affiliated_institutions:
        yield {
            'value':
            0.75,
            'comment':
            'Found affiliated institution(s): {}'.format(
                ', '.join(
                    '{} <{}>'.format(person['person'], person['organization'])
                    for person in affiliated_institutions), ),
        }
    else:
        yield {
            'value': 0,
            'comment': 'No institution was identified',
        }
def metric_145_landing_page(doc):
  landingPages = set(
    node['access']['landingPage']
    for node in jsonld_frame(doc, {
      '@type': 'DatasetDistribution',
      'access': {
        'landingPage': {},
      }
    })['@graph']
    if node['access'] and node['access']['landingPage']
  )
  if landingPages:
    for landingPage in landingPages:
      if requests.get(landingPage).status_code < 400:
        yield {
          'value': 1,
          'comment': 'Landing page found {} and seems to be accessible'.format(landingPage)
        }
      else:
        yield {
          'value': 0.75,
          'comment': 'Landing page found {} but seems to report a problem'.format(landingPage)
        }
  else:
    yield {
      'value': 0,
      'comment': 'Could not identify any landing pages'
    }
def metric_136_program(doc):
  programs = set(
    node['program']['name']
    for node in jsonld_frame(doc, {
      '@type': 'Dataset',
      'program': {
        'name': {}
      }
    })['@graph']
    if node['program']
    for program in force_list(node['program'])
    if program['name']
  )
  if programs:
    yield {
      'value': 1,
      'comment': 'Identified program(s): {}'.format(
        ', '.join(programs)
      )
    }
  else:
    yield {
      'value': 0,
      'comment': 'Could not identify any programs',
    }
def metric_104_doi(doc):
    dois = set(node['identifier'] for node in jsonld_frame(
        doc, {
            '@type':
            ['Identifier', 'relatedIdentifier', 'alternateIdentifiers'],
            'description': ['doi', 'DOI'],
            'identifier': {}
        })['@graph'] if node['identifier'] and node['description'])
    if dois:
        for doi in dois:
            try:
                urllib.request.urlopen(doi if doi.startswith('http') else
                                       'https://doi.org/{}'.format(doi))
                yield {
                    'value': 1,
                    'comment': 'DOI {} was identified and verified'.format(doi)
                }
            except urllib.error.HTTPError:
                yield {
                    'value':
                    0.25,
                    'comment':
                    'DOI {} was identified but could not be verified'.format(
                        doi)
                }
    else:
        yield {
            'value': 0,
            'comment': 'No DOIs could be identified',
        }
Example #5
0
def metric_27_contact_pi(doc):
  people = list(map(json.loads, set(
    json.dumps({
      'fullName': creators['fullName'],
      'roles': creators.get('roles', []),
    })
    for node in jsonld_frame(doc, {
      '@type': 'Dataset',
      'creators': {
        '@type': 'Person',
        'fullName': {},
        'roles': {
          '@default': []
        },
      }
    })['@graph']
    if node['creators']
    for creators in force_list(node['creators'])
    if creators['fullName']
  )))
  PIs = [
    person
    for person in people
    if 'Principal Investigator' in person['roles']
  ]
  if PIs:
    yield {
      'value': 1,
      'comment': 'Found PI(s): {}'.format(
        ', '.join([person['fullName'] for person in PIs])
      ),
    }
  elif people:
    yield {
      'value': 0.5,
      'comment': 'Found {}, but cannot determine a PI'.format(
        [
          person['fullName'] + ('(' + ', '.join(person['roles']) + ')') if person['roles'] else '' 
          for person in people
        ]
      ),
    }
  else:
    yield {
      'value': 0,
      'comment': 'No PI or people could be identified'
    }
Example #6
0
def metric_137_project(doc):
    projects = set(storedIn['name'] for node in jsonld_frame(
        doc, {
            '@type': 'Dataset',
            'storedIn': {
                '@type': 'DataRepository',
                'name': {}
            }
        })['@graph'] if node['storedIn']
                   for storedIn in force_list(node['storedIn'])
                   if storedIn['name'])
    if projects:
        yield {
            'value': 1,
            'comment': 'Identified project(s): {}'.format(', '.join(projects)),
        }
    else:
        yield {
            'value': 0,
            'comment': 'Could not identify any projects',
        }
def metric_110_access_protocol(doc):
    access_protocols = set(
        access['accessURL'] for node in jsonld_frame(doc, {
            '@type': 'DatasetDistribution',
            'access': {
                'accessURL': {},
            }
        })['@graph'] if node['access'] for access in force_list(node['access'])
        if access['accessURL'])
    if access_protocols:
        yield {
            'value':
            1,
            'comment':
            'Access protocol(s) found: {}'.format(', '.join(access_protocols))
        }
    else:
        yield {
            'value': 0,
            'comment': 'Could not identify any access protocols'
        }
def metric_108_resource_identifier(doc):
    identifiers = set(
        node['identifier'].get('identifierSource', '') +
        node['identifier']['identifier'] for node in jsonld_frame(
            doc, {
                '@type': 'Dataset',
                'identifier': {
                    'identifier': {},
                    'identifierSource': {
                        '@default': ''
                    },
                }
            })['@graph']
        if node['identifier'] and node['identifier']['identifier'])
    if identifiers:
        for identifier in identifiers:
            if '://' in identifier and requests.get(
                    identifier).status_code < 400:
                yield {
                    'value':
                    1,
                    'comment':
                    'Resource identifier {} was identified and verified'.
                    format(identifier)
                }
            else:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Resource identifier {} was identified but could not be verified'
                    .format(identifier)
                }
    else:
        yield {
            'value': 0,
            'comment': 'No resource identifier was found',
        }
def metric_143_ncbitaxon(doc):
    taxonomies = list(
        map(
            json.loads,
            set(
                json.dumps({
                    'value':
                    isAbout['name'],
                    'valueIRI':
                    isAbout.get('identifier', {}).get('identifierSource', '') +
                    isAbout.get('identifier', {}).get('identifier', '')
                }) for node in jsonld_frame(
                    doc, {
                        '@type': 'Dataset',
                        'isAbout': {
                            '@type': 'TaxonomicInformation',
                            'name': {},
                            'identifier': {
                                'identifier': {},
                                'identifierSource': {
                                    '@default': ''
                                },
                            }
                        }
                    })['@graph'] if node['isAbout']
                for isAbout in force_list(node['isAbout'])
                if isAbout['name'])))
    if taxonomies:
        for taxonomy in taxonomies:
            value_ns = IRI_to_NS(taxonomy.get('valueIRI'))
            if taxonomy.get(
                    'value') and taxonomy.get('valueIRI') and pronto.Term(
                        value_ns, taxonomy['value']) in NCBITaxon:
                yield {
                    'value':
                    1,
                    'comment':
                    'Ontological IRI for taxonomy {} and term match what is found in NCBITaxon.'
                    .format(value_ns),
                }
            elif taxonomy.get('valueIRI') and value_ns in NCBITaxon:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Ontological IRI for taxonomy {} found in NCBITaxon.'.
                    format(value_ns),
                }
            elif taxonomy.get(
                    'value') and taxonomy['value'] in NCBITaxon_reversed:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Taxonomy `{}` found in NCBITaxon.'.format(
                        taxonomy['value']),
                }
            elif taxonomy.get('value') and taxonomy[
                    'value'] in NCBITaxon_reversed_synonyms:
                yield {
                    'value':
                    0.5,
                    'comment':
                    'Taxonomy `{}` found in NCBITaxon synonyms.'.format(
                        taxonomy['value']),
                }
            else:
                yield {
                    'value':
                    0.25,
                    'comment':
                    'Taxonomy `{}` found but not in NCBITaxon.'.format(
                        taxonomy.get('value', '') +
                        (('<' + value_ns + '>') if value_ns else '')),
                }
    else:
        yield {
            'value': 0.0,
            'comment': 'Taxonomy could not be identified',
        }
Example #10
0
def metric_142_edam(doc):
  filetypes = list(map(json.loads,set(
    json.dumps({
      'value': information['value'],
      'valueIRI': information['valueIRI'],
    })
    for node in jsonld_frame(doc, {
      '@type': 'Dataset',
      'types': {
        'information': {
          'value': { '@default': '' },
          'valueIRI': { '@default': '' }
        }
      }
    })['@graph']
    if node['types']
    for types in force_list(node['types'])
    if types['information']
    for information in force_list(types['information'])
    if information['value'] and information['valueIRI']
  )))
  if filetypes:
    for filetype in filetypes:
      value_ns = IRI_to_NS(filetype.get('valueIRI'))
      if filetype.get('value') and filetype.get('valueIRI') and pronto.Term(value_ns, filetype['value']) in EDAM:
        yield {
          'value': 1,
          'comment': 'Ontological IRI for file type {} and term match what is found in EDAM.'.format(
            value_ns
          ),
        }
      elif filetype.get('valueIRI') and value_ns in EDAM:
        yield {
          'value': 0.75,
          'comment': 'Ontological IRI for filetype {} found in EDAM.'.format(
            value_ns
          ),
        }
      elif filetype.get('value') and filetype['value'] in EDAM_reversed:
        yield {
          'value': 0.75,
          'comment': 'Filetype `{}` found in EDAM.'.format(
            filetype['value']
          ),
        }
      elif filetype.get('value') and filetype['value'] in EDAM_reversed_synonyms:
        yield {
          'value': 0.5,
          'comment': 'Filetype `{}` found in EDAM synonyms.'.format(
            filetype['value']
          ),
        }
      else:
        yield {
          'value': 0.25,
          'comment': 'Filetype `{}` found but not in EDAM.'.format(
            filetype.get('value', '') + (('<' + value_ns + '>') if value_ns else '')
          ),
        }
  else:
    yield {
      'value': 0.0,
      'comment': 'filetype could not be identified',
    }
Example #11
0
def metric_144_cellosaurus(doc):
    cell_lines = list(
        map(
            json.loads,
            set(
                json.dumps({
                    'value':
                    isAbout['name'],
                    'valueIRI':
                    isAbout.get('identifier', {}).get('identifierSource', '') +
                    isAbout.get('identifier', {}).get('identifier', '')
                }) for node in jsonld_frame(
                    doc, {
                        '@type': 'Dataset',
                        'isAbout': {
                            '@type': 'BiologicalEntity',
                            'name': {},
                            'identifier': {
                                'identifier': {},
                                'identifierSource': {
                                    '@default': ''
                                },
                            }
                        }
                    })['@graph'] if node['isAbout']
                for isAbout in force_list(node['isAbout'])
                if isAbout['name'])))
    if cell_lines:
        for cell_line in cell_lines:
            value_ns = IRI_to_NS(cell_line.get('valueIRI'))
            if cell_line.get(
                    'value') and cell_line.get('valueIRI') and Cellosaurus.get(
                        value_ns, {}).get('name') == cell_line['value']:
                yield {
                    'value':
                    1,
                    'comment':
                    'Ontological IRI for cell line {} and term match what is found in Cellosaurus.'
                    .format(value_ns),
                }
            elif cell_line.get('valueIRI') and value_ns in Cellosaurus:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Ontological IRI for cell line {} found in Cellosaurus.'.
                    format(value_ns),
                }
            elif cell_line.get('value') and value_ns in Cellosaurus_reversed:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Cell line `{}` found in Cellosaurus.'.format(
                        cell_line['value']),
                }
            elif cell_line.get('value') and cell_line[
                    'value'] in Cellosaurus_reversed_synonyms:
                yield {
                    'value':
                    0.5,
                    'comment':
                    'Cell line `{}` found in Cellosaurus synonyms.'.format(
                        cell_line['value']),
                }
            else:
                yield {
                    'value':
                    0.25,
                    'comment':
                    'Cell line `{}` found but not in Cellosaurus.'.format(
                        cell_line.get('value', '') +
                        (('<' + value_ns + '>') if value_ns else '')),
                }
    else:
        yield {
            'value': 0.0,
            'comment': 'Cell line could not be identified',
        }
Example #12
0
def metric_140_uberon(doc):
    anatomical_parts = list(
        map(
            json.loads,
            set(
                json.dumps({
                    'value':
                    isAbout['name'],
                    'valueIRI':
                    isAbout.get('identifier', {}).get('identifierSource', '') +
                    isAbout.get('identifier', {}).get('identifier', '')
                }) for node in jsonld_frame(
                    doc, {
                        '@type': 'Dataset',
                        'isAbout': {
                            '@type': 'AnatomicalPart',
                            'name': {},
                            'identifier': {
                                'identifier': {},
                                'identifierSource': {
                                    '@default': ''
                                },
                            }
                        }
                    })['@graph'] if node['isAbout']
                for isAbout in force_list(node['isAbout'])
                if isAbout['name'])))
    if anatomical_parts:
        for anatomical_part in anatomical_parts:
            value_ns = IRI_to_NS(anatomical_part.get('valueIRI'))
            if anatomical_part.get('value') and anatomical_part.get(
                    'valueIRI') and pronto.Term(
                        value_ns, anatomical_part['value']) in UBERON:
                yield {
                    'value':
                    1,
                    'comment':
                    'Ontological IRI for anatomical part {} and term match what is found in UBERON.'
                    .format(value_ns),
                }
            elif anatomical_part.get('valueIRI') and value_ns in UBERON:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Ontological IRI for anatomical part {} found in UBERON.'.
                    format(value_ns),
                }
            elif anatomical_part.get(
                    'value') and anatomical_part['value'] in UBERON_reversed:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Anatomical part `{}` found in UBERON.'.format(
                        anatomical_part['value']),
                }
            elif anatomical_part.get('value') and anatomical_part[
                    'value'] in UBERON_reversed_synonyms:
                yield {
                    'value':
                    0.5,
                    'comment':
                    'Anatomical part `{}` found in UBERON synonyms.'.format(
                        anatomical_part['value']),
                }
            else:
                yield {
                    'value':
                    0.25,
                    'comment':
                    'Anatomical part `{}` found but not in UBERON.'.format(
                        anatomical_part.get('value', '') +
                        (('<' + value_ns + '>') if value_ns else '')),
                }
    else:
        yield {
            'value': 0.0,
            'comment': 'Anatomical part could not be identified',
        }
def metric_141_mondo(doc):
    diseases = list(
        map(
            json.loads,
            set(
                json.dumps({
                    'value':
                    isAbout['name'],
                    'valueIRI': (
                        isAbout['identifier'].get('identifierSource', '') +
                        isAbout['identifier'].get('identifier', '')
                    ) if isAbout['identifier'] else ''
                }) for node in jsonld_frame(
                    doc, {
                        '@type': 'Dataset',
                        'isAbout': {
                            '@type': 'Disease',
                            'name': {},
                            'identifier': {
                                'identifier': {},
                                'identifierSource': {
                                    '@default': ''
                                },
                            }
                        }
                    })['@graph'] if node['isAbout']
                for isAbout in force_list(node['isAbout'])
                if isAbout['name'])))
    if diseases:
        for disease in diseases:
            value_ns = IRI_to_NS(disease.get('valueIRI'))
            if disease.get(
                    'value') and disease.get('valueIRI') and pronto.Term(
                        value_ns, disease['value']) in MONDO:
                yield {
                    'value':
                    1,
                    'comment':
                    'Ontological IRI for disease {} and term match what is found in MONDO.'
                    .format(value_ns),
                }
            elif disease.get('valueIRI') and disease['valueIRI'] in MONDO:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Ontological IRI for disease {} found in MONDO.'.format(
                        value_ns),
                }
            elif disease.get('value') and disease['value'] in MONDO_reversed:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Disease `{}` found in MONDO.'.format(disease['value']),
                }
            elif disease.get(
                    'value') and disease['value'] in MONDO_reversed_synonyms:
                yield {
                    'value':
                    0.5,
                    'comment':
                    'Disease `{}` found in MONDO synonyms.'.format(
                        disease['value']),
                }
            else:
                yield {
                    'value':
                    0.5,
                    'comment':
                    'Disease `{}` found but not in MONDO.'.format(
                        disease.get('value', '') +
                        (('<' + value_ns + '>') if value_ns else '')),
                }
    else:
        yield {
            'value': 0.0,
            'comment': 'Disease could not be identified',
        }
def metric_139_bao(doc):
    assays = list(
        map(
            json.loads,
            set(
                json.dumps({
                    'value':
                    method if type(method) == str else method['value'],
                    'valueIRI':
                    '' if type(method) == str else method['valueIRI'],
                }) for node in jsonld_frame(
                    doc, {
                        '@type': 'Dataset',
                        'types': {
                            'method': {
                                'value': {
                                    '@default': ''
                                },
                                'valueIRI': {
                                    '@default': ''
                                }
                            }
                        }
                    })['@graph'] if node['types']
                for types in force_list(node['types']) if types['method']
                for method in force_list(types['method'])
                if type(method) == str or
                (type(method) == dict and method['value'] or method['valueIRI']
                 ))))
    if assays:
        for assay in assays:
            value_ns = IRI_to_NS(assay.get('valueIRI'))
            if assay.get('value') and assay.get('valueIRI') and pronto.Term(
                    value_ns, assay.get('value')) in BAO:
                yield {
                    'value':
                    1,
                    'comment':
                    'Ontological IRI for Assay {} and term match what is found in BAO.'
                    .format(assay['valueIRI']),
                }
            elif value_ns and assay['valueIRI'] in BAO:
                yield {
                    'value':
                    0.75,
                    'comment':
                    'Ontological IRI for Assay {} found in BAO.'.format(
                        assay['valueIRI']),
                }
            elif assay.get('value') and assay['value'] in BAO_reversed:
                yield {
                    'value': 0.75,
                    'comment': 'Assay {} found in BAO.'.format(assay['value']),
                }
            elif assay.get(
                    'value') and assay['value'] in BAO_reversed_synonyms:
                yield {
                    'value':
                    0.5,
                    'comment':
                    'Assay `{}` found in BAO synonyms.'.format(assay['value']),
                }
            else:
                yield {
                    'value':
                    0.25,
                    'comment':
                    'Assay {} found but not in BAO.'.format(
                        assay.get('value', '') +
                        (('<' + value_ns + '>') if value_ns else '')),
                }
    else:
        yield {
            'value': 0.0,
            'comment': 'Assay could not be identified',
        }