Esempio n. 1
0
def resource(prefix: str):
    """Serve the a Bioregistry entry page."""
    prefix = _normalize_prefix_or_404(prefix, '.' + resource.__name__)
    if not isinstance(prefix, str):
        return prefix
    example = bioregistry.get_example(prefix)
    return render_template(
        'resource.html',
        prefix=prefix,
        name=bioregistry.get_name(prefix),
        example=example,
        mappings=_get_resource_mapping_rows(prefix),
        synonyms=bioregistry.get_synonyms(prefix),
        homepage=bioregistry.get_homepage(prefix),
        pattern=bioregistry.get_pattern(prefix),
        version=bioregistry.get_version(prefix),
        has_terms=bioregistry.has_terms(prefix),
        obo_download=bioregistry.get_obo_download(prefix),
        owl_download=bioregistry.get_owl_download(prefix),
        namespace_in_lui=bioregistry.namespace_in_lui(prefix),
        deprecated=bioregistry.is_deprecated(prefix),
        contact=bioregistry.get_email(prefix),
        banana=bioregistry.get_banana(prefix),
        description=bioregistry.get_description(prefix),
        providers=None if example is None else _get_resource_providers(
            prefix, example),
    )
def warnings():
    """Make warnings list."""
    miriam_pattern_wrong = [
        dict(
            prefix=prefix,
            name=bioregistry.get_name(prefix),
            correct=entry['pattern'],
            miriam=entry['miriam']['pattern'],
        ) for prefix, entry in items
        if 'miriam' in entry and 'pattern' in entry
        and entry['pattern'] != entry['miriam']['pattern']
    ]

    miriam_embedding_rewrites = [
        dict(
            prefix=prefix,
            name=bioregistry.get_name(prefix),
            pattern=bioregistry.get_pattern(prefix),
            correct=entry['namespace.embedded'],
            miriam=entry['miriam']['namespaceEmbeddedInLui'],
        ) for prefix, entry in items if 'namespace.embedded' in entry
    ]

    # When are namespace rewrites required?
    miriam_prefix_rewrites = [
        dict(
            prefix=prefix,
            name=bioregistry.get_name(prefix),
            pattern=bioregistry.get_pattern(prefix),
            correct=entry['namespace.rewrite'],
        ) for prefix, entry in items if 'namespace.rewrite' in entry
    ]

    with open(os.path.join(DOCS_DATA, 'warnings.yml'), 'w') as file:
        yaml.safe_dump(
            {
                'wrong_patterns': miriam_pattern_wrong,
                'embedding_rewrites': miriam_embedding_rewrites,
                'prefix_rewrites': miriam_prefix_rewrites,
            },
            file,
        )
Esempio n. 3
0
 def check_valid_prefix_id(self, prefix, identifier):
     """Check the prefix/identifier pair is valid."""
     if prefix in self.entries:
         entry = self.entries[prefix]
         if not re.match(entry["pattern"], identifier):
             raise InvalidIdentifier(prefix, identifier)
     elif bioregistry.get_resource(prefix) is None:
         raise InvalidPrefix(prefix)
     elif bioregistry.get_pattern(prefix) is None:
         if bioregistry.validate(prefix, identifier):
             raise InvalidIdentifier(prefix, identifier)
Esempio n. 4
0
def resources():
    """Serve the Bioregistry page."""
    rows = [
        dict(
            prefix=prefix,
            name=bioregistry.get_name(prefix),
            example=bioregistry.get_example(prefix),
            homepage=bioregistry.get_homepage(prefix),
            pattern=bioregistry.get_pattern(prefix),
            namespace_in_lui=bioregistry.namespace_in_lui(prefix),
            banana=bioregistry.get_banana(prefix),
            description=bioregistry.get_description(prefix),
        ) for prefix in bioregistry.read_registry()
    ]
    return render_template('resources.html', rows=rows)
Esempio n. 5
0
def make_resource_node(cx: NiceCXBuilder, prefix: str) -> int:
    """Generate a CX node for a resource."""
    node = cx.add_node(
        name=bioregistry.get_name(prefix),
        represents=f'bioregistry.resource:{prefix}',
    )
    homepage = bioregistry.get_homepage(prefix)
    if homepage:
        cx.add_node_attribute(node, 'homepage', homepage)
    description = bioregistry.get_description(prefix)
    if description:
        cx.add_node_attribute(node, 'description', description)
    pattern = bioregistry.get_pattern(prefix)
    if pattern:
        cx.add_node_attribute(node, 'pattern', pattern)
    # TODO add more
    return node
Esempio n. 6
0
def curation():
    """Make curation list."""
    missing_wikidata_database = _g(lambda prefix, entry: entry.get(
        'wikidata', {}).get('database') is None)
    missing_pattern = _g(
        lambda prefix, entry: bioregistry.get_pattern(prefix) is None)
    missing_format_url = _g(
        lambda prefix, entry: bioregistry.get_format(prefix) is None)
    missing_example = _g(
        lambda prefix, entry: bioregistry.get_example(prefix) is None)

    with open(os.path.join(DOCS_DATA, 'curation.yml'), 'w') as file:
        yaml.safe_dump(
            {
                'wikidata': missing_wikidata_database,
                'pattern': missing_pattern,
                'formatter': missing_format_url,
                'example': missing_example,
            },
            file,
        )
Esempio n. 7
0
 def test_banana(self):
     """Test that entries curated with a new banana are resolved properly."""
     for prefix, entry in bioregistry.read_registry().items():
         banana = entry.get('banana')
         if banana is None:
             continue
         if prefix in IDOT_BROKEN:
             continue  # identifiers.org is broken for these prefixes
         with self.subTest(
                 prefix=prefix,
                 banana=banana,
                 pattern=bioregistry.get_pattern(prefix),
         ):
             identifier = bioregistry.get_example(prefix)
             self.assertIsNotNone(identifier)
             url = bioregistry.resolve_identifier.get_identifiers_org_url(
                 prefix, identifier)
             res = self.session.get(url, allow_redirects=False)
             self.assertEqual(302,
                              res.status_code,
                              msg=f'failed with URL: {url}')
Esempio n. 8
0
def main(url: str, local: bool):
    """Test the API."""
    url = url.rstrip('/')
    if local:
        url = 'http://localhost:5000'
    click.echo(f'Testing resolution API on {url}')
    failure = False
    prefixes = tqdm(bioregistry.read_registry())

    for prefix in prefixes:
        identifier = bioregistry.get_example(prefix)
        if identifier is None:
            continue
        prefixes.set_postfix({'prefix': prefix})
        req_url = f'{url}/{prefix}:{identifier}'
        res = requests.get(req_url, allow_redirects=False)
        log = partial(_log, req_url=req_url)
        if res.status_code == 302:  # redirect
            continue
        elif res.status_code != 404:
            text = res.text.splitlines()[3][len('<p>'):-len('</p>')]
            log(f'HTTP {res.status_code}: {res.reason} {text}', fg='red')
        elif not bioregistry.get_providers(prefix, identifier):
            continue
        elif '/' in identifier or SLASH_URL_ENCODED in identifier:
            log('contains slash 🎩 🎸', fg='red')
        elif not bioregistry.validate(prefix, identifier):
            pattern = bioregistry.get_pattern(prefix)
            if bioregistry.get_banana(prefix):
                log(f'banana {pattern} 🍌', fg='red')
            else:
                log(f'invalid example does not match pattern {pattern}',
                    fg='red')
        else:
            log('404 unknown issue', fg='red')

        failure = True

    return sys.exit(1 if failure else 0)
Esempio n. 9
0
    def test_lui(self):
        """Test the LUI makes sense (spoilers, they don't).

        Discussion is ongoing at:

        - https://github.com/identifiers-org/identifiers-org.github.io/issues/151
        """
        for prefix in bioregistry.read_registry():
            if not bioregistry.namespace_in_lui(prefix):
                continue
            if bioregistry.get_banana(prefix):
                continue  # rewrite rules are applied to prefixes with bananas
            if prefix in {'ark', 'obi'}:
                continue  # these patterns on identifiers.org are garb
            with self.subTest(prefix=prefix):
                re_pattern = bioregistry.get_pattern(prefix)
                miriam_prefix = bioregistry.get_identifiers_org_prefix(prefix)
                self.assertTrue(
                    re_pattern.startswith(f'^{miriam_prefix.upper()}')
                    or re_pattern.startswith(miriam_prefix.upper()),
                    msg=f'{prefix} pattern: {re_pattern}',
                )
Esempio n. 10
0
def resolve(prefix: str, identifier: Optional[str] = None):
    """Resolve a CURIE.

    The following things can make a CURIE unable to resolve:

    1. The prefix is not registered with the Bioregistry
    2. The prefix has a validation pattern and the identifier does not match it
    3. There are no providers available for the URL
    """  # noqa:DAR101,DAR201
    norm_prefix = bioregistry.normalize_prefix(prefix)
    if norm_prefix is None:
        return render_template('resolve_missing_prefix.html',
                               prefix=prefix,
                               identifier=identifier), 404
    if identifier is None:
        return redirect(url_for('.' + resource.__name__, prefix=norm_prefix))

    pattern = bioregistry.get_pattern(prefix)
    if pattern and not bioregistry.validate(prefix, identifier):
        return render_template(
            'resolve_invalid_identifier.html',
            prefix=prefix,
            identifier=identifier,
            pattern=pattern,
        ), 404

    url = bioregistry.get_link(prefix, identifier, use_bioregistry_io=False)
    if not url:
        return render_template('resolve_missing_providers.html',
                               prefix=prefix,
                               identifier=identifier), 404
    try:
        # TODO remove any garbage characters?
        return redirect(url)
    except ValueError:  # headers could not be constructed
        return render_template('resolve_disallowed_identifier.html',
                               prefix=prefix,
                               identifier=identifier), 404
Esempio n. 11
0
 def test_collections(self):
     """Check collections have minimal metadata and correct prefixes."""
     for key, collection in sorted(bioregistry.read_collections().items()):
         with self.subTest(key=key):
             self.assertRegex(key, '^\\d{7}$')
             self.assertIn('name', collection)
             self.assertIn('author', collection)
             self.assertIn('name', collection['author'])
             self.assertIn('orcid', collection['author'])
             self.assertRegex(collection['author']['orcid'], bioregistry.get_pattern('orcid'))
             self.assertIn('description', collection)
             incorrect = {
                 prefix
                 for prefix in collection['resources']
                 if prefix not in self.registry
             }
             self.assertEqual(set(), incorrect)
             duplicates = {
                 prefix
                 for prefix, count in Counter(collection['resources']).items()
                 if 1 < count
             }
             self.assertEqual(set(), duplicates, msg='Duplicates found')
Esempio n. 12
0
def load(
    load_all: bool,
    load_resources: bool = False,
    load_names: bool = False,
    load_alts: bool = False,
    load_xrefs: bool = True,
    load_synonyms: bool = False,
    reset: bool = False,
) -> None:
    """Load the database."""
    if reset:
        drop_all()
    create_all()

    if load_resources or load_all:
        prefix_to_resource: Dict[str, Resource] = {}
        prefixes = {resource.prefix for resource in Resource.query.all()}

        for prefix, entry in tqdm(bioregistry.read_registry().items(),
                                  desc="loading resources"):
            if bioregistry.is_deprecated(prefix):
                continue
            if prefix in prefixes:
                continue
            prefix_to_resource[prefix] = resource_model = Resource(
                prefix=prefix,
                name=entry["name"],
                pattern=bioregistry.get_pattern(prefix),
            )
            session.add(resource_model)
        session.commit()

    ooh_na_na_path = ensure_ooh_na_na()
    synonyms_path = ensure_synonyms()
    xrefs_path = ensure_inspector_javert()

    if load_alts or load_all:
        alts_path = ensure_alts()
        alts_df = pd.read_csv(alts_path, sep="\t",
                              dtype=str)  # prefix, alt, identifier
        logger.info("inserting %d alt identifiers", len(alts_df.index))
        alts_df.to_sql(name=Alt.__tablename__,
                       con=engine,
                       if_exists="append",
                       index=False)
        logger.info("committing alt identifier")
        session.commit()
        logger.info("done committing alt identifiers")

    for label, path, table, columns, checker in [
        ("names", ooh_na_na_path, Reference, None, load_names),
        ("synonyms", synonyms_path, Synonym, ["prefix", "identifier",
                                              "name"], load_synonyms),
        (
            "xrefs",
            xrefs_path,
            Xref,
            [
                "prefix", "identifier", "xref_prefix", "xref_identifier",
                "source"
            ],
            load_xrefs,
        ),
    ]:
        if not checker and not load_all:
            continue
        logger.info("beginning insertion of %s", label)
        conn = engine.raw_connection()
        logger.info("inserting with low-level copy of %s from: %s", label,
                    path)
        if columns:
            columns = ", ".join(columns)
            logger.info("corresponding to columns: %s", columns)
            columns = f" ({columns})"
        else:
            columns = ""

        with conn.cursor() as cursor, gzip.open(path) as file:
            # next(file)  # skip the header
            sql = f"""COPY {table.__tablename__}{columns} FROM STDIN WITH CSV HEADER DELIMITER E'\\t' QUOTE E'\\b';"""
            logger.info("running SQL: %s", sql)
            cursor.copy_expert(sql=sql, file=file)

        logger.info("committing %s", label)
        conn.commit()
        logger.info("done committing %s", label)

    logger.info(f"number resources loaded: {Resource.query.count():,}")
    logger.info(f"number references loaded: {Reference.query.count():,}")
    logger.info(f"number alts loaded: {Alt.query.count():,}")
    logger.info(f"number synonyms loaded: {Synonym.query.count():,}")
    logger.info(f"number xrefs loaded: {Xref.query.count():,}")