Ejemplo n.º 1
0
 def __init__(self, url, **options):
     self.db_url = url
     self.options = self._default_options
     self.options.update(options)
     self.client = OlsClient(page_size=self.options.get('page_size'))
     self.retry = 0
     self.db_init = False
     dal.db_init(self.db_url, **self.options)
     logger.info('Loaded with options %s ', self.options)
     logger.info('DB url %s ', self.db_url)
     self.current_ontology = None
Ejemplo n.º 2
0
 def setUp(self):
     warnings.simplefilter("ignore", ResourceWarning)
     try:
         dal.wipe_schema(self.db_url)
     except sqlalchemy.exc.InternalError as e:
         logger.info("Unable to wipe schema %s", e)
     self.loader = OlsLoader(self.db_url,
                             echo=False,
                             output_dir=log_dir,
                             verbosity=logging.DEBUG,
                             allowed_ontologies=self.test_ontologies,
                             ols_api_url=self.ols_api_url)
     self.client = OlsClient(base_site=self.ols_api_url)
Ejemplo n.º 3
0
 def setUp(self):
     warnings.simplefilter("ignore", ResourceWarning)
     try:
         dal.wipe_schema(self.db_url)
     except sqlalchemy.exc.InternalError as e:
         logger.info("Unable to wipe schema %s", e)
     self.loader = OlsLoader(self.db_url, echo=False, output_dir=log_dir)
     self.loader.allowed_ontologies = [
         'GO', 'SO', 'PATO', 'HP', 'VT', 'EFO', 'PO', 'EO', 'TO', 'CHEBI',
         'PR', 'FYPO', 'PECO', 'BFO', 'BTO', 'CL', 'CMO', 'ECO', 'MOD',
         'MP', 'OGMS', 'UO', 'MONDO', 'PHI', 'DUO'
     ]
     self.client = OlsClient(base_site=self.ols_api_url)
Ejemplo n.º 4
0
 def __init__(self, url, **options):
     self.db_url = url
     self.options = self._default_options
     self.options.update(options)
     self.client = OlsClient(page_size=self.options.get('page_size'),
                             base_site=self.options.get('ols_api_url'))
     self.retry = 0
     if self.options.get('allowed_ontologies', None):
         self.allowed_ontologies = self.options.get('allowed_ontologies')
     self.db_init = False
     dal.db_init(self.db_url, **self.options)
     dal.create_schema()
     logging.basicConfig(level=self.options['verbosity'])
     self.current_ontology = None
     self.report_log = None
     self.terms_log = None
Ejemplo n.º 5
0
 def __init__(self) -> None:
     super().__init__()
     self._ols_client = OlsClient()
Ejemplo n.º 6
0
class OlsLoader(object):
    """ class loader for mapping retrieved DTO from OLS client into expected database fields """
    __relation_map = {
        'parents': 'is_a',
        'children': 'is_a',
        'derives_from/develops_from': 'develops_from'
    }

    __ignored_relations = [
        'graph', 'jstree', 'descendants', 'ancestors', 'hierarchicalParents',
        'children', 'parents', 'hierarchicalAncestors', 'hierarchicalChildren',
        'hierarchicalDescendants'
    ]

    __synonym_map = {
        'hasExactSynonym': 'EXACT',
        'hasBroadSynonym': 'BROAD',
        'hasNarrowSynonym': 'NARROW',
        'hasRelatedSynonym': 'RELATED'
    }

    _default_options = dict(echo=False,
                            wipe=False,
                            db_version=getenv('ENS_VERSION', 99),
                            max_retry=5,
                            timeout=720,
                            process_relations=True,
                            process_parents=True,
                            page_size=1000,
                            output_dir=getenv("HOME"))

    allowed_ontologies = [
        'GO', 'SO', 'PATO', 'HP', 'VT', 'EFO', 'PO', 'EO', 'TO', 'CHEBI', 'PR',
        'FYPO', 'PECO', 'BFO', 'BTO', 'CL', 'CMO', 'ECO', 'MOD', 'MP', 'OGMS',
        'UO', 'MONDO', 'PHI'
    ]

    def __init__(self, url, **options):
        self.db_url = url
        self.options = self._default_options
        self.options.update(options)
        self.client = OlsClient(page_size=self.options.get('page_size'))
        self.retry = 0
        self.db_init = False
        dal.db_init(self.db_url, **self.options)
        logger.info('Loaded with options %s ', self.options)
        logger.info('DB url %s ', self.db_url)
        self.current_ontology = None

    def get_report_logger(self):
        report_logger = logging.getLogger(self.current_ontology + '_report')
        report_logger.setLevel(logging.INFO)
        if not len(report_logger.handlers):
            log_file = '{}_report.log'
            ols_report_handler = logging.FileHandler(
                join(self.options.get('output_dir'),
                     log_file.format(self.current_ontology)))
            formatter = logging.Formatter('%(asctime)s:  - \t%(message)s')
            ols_report_handler.setFormatter(formatter)
            report_logger.addHandler(ols_report_handler)
        return report_logger

    def report(self, *messages):
        report = self.get_report_logger()
        if len(messages) > 1:
            report.info(messages[0] % messages[1:])
        else:
            report.info(messages[0])

    def init_meta(self):
        with dal.session_scope() as session:
            prev_version = int(self.options.get('db_version')) - 1
            metas = {
                'schema_version':
                self.options.get('db_version'),
                'schema_type':
                'ontology',
                'patch':
                'patch_{}_{}_a.sql|schema version'.format(
                    prev_version, self.options.get('db_version'))
            }
            for meta_key, meta_value in metas.items():
                get_one_or_create(
                    Meta,
                    session,
                    meta_key=meta_key,
                    create_method_kwargs=dict(meta_value=meta_value))
        self.db_init = True

    def load_ontology(self, ontology, session, namespace=''):
        """
        Load single ontology data from OLS API.
        Update
        :param session:
        :param ontology:
        :param namespace:
        :return: an Ontology model object.
        """
        if type(ontology) is str:
            ontology = self.client.ontology(identifier=ontology)
        elif not isinstance(ontology, helpers.Ontology):
            raise RuntimeError('Wrong parameter')

        ontology_name = ontology.ontology_id.upper()
        self.current_ontology = ontology_name
        namespace = namespace if namespace != '' else ontology.ontology_id
        m_ontology, created = get_one_or_create(
            Ontology,
            session,
            name=ontology_name,
            namespace=namespace,
            create_method_kwargs={'helper': ontology})
        if created:
            self.report('----------------------------------')
            self.report(
                'Ontology [%s][%s] - %s:' %
                (ontology_name, ontology.namespace, ontology.config.title))
            self.report('- Number of terms: %s' % ontology.number_of_terms)
            self.report('- Number of individuals: %s' %
                        ontology.number_of_individuals)
            self.report('- Number of properties: %s' %
                        ontology.number_of_properties)
        start = datetime.datetime.now()
        logger.debug('Updating meta for ontology %s', ontology_name)
        get_one_or_create(
            Meta,
            session,
            meta_key=ontology_name + '_load_date',
            create_method_kwargs=dict(meta_value=ontology_name + '/' +
                                      start.strftime('%c')))
        try:
            updated_at = datetime.datetime.strptime(ontology.updated,
                                                    '%Y-%m-%dT%H:%M:%S.%f%z')
        except ValueError:
            # Default update to current date time
            updated_at = datetime.datetime.now()
        get_one_or_create(
            Meta,
            session,
            meta_key=ontology_name + '_file_date',
            create_method_kwargs=dict(meta_value=ontology_name + '/' +
                                      updated_at.strftime('%c')))

        logger.info('Loaded [%s/%s] %s', m_ontology.name, m_ontology.namespace,
                    m_ontology.title)
        return m_ontology

    @staticmethod
    def wipe_ontology(ontology_name):
        """
        Completely remove all ontology related data from DBs
        :param ontology_name: specified ontology short name
        :return: boolean whether or not Ontology has been successfully deleted
        """
        with dal.session_scope() as session:
            logger.info('Wipe ontology %s', ontology_name)
            try:
                metas = session.query(Meta).filter(
                    Meta.meta_key.like("%" + ontology_name + "%")).all()
                for meta in metas:
                    logger.debug('Deleted meta %s', meta)
                    session.delete(meta)
                ontologies = session.query(Ontology).filter_by(
                    name=ontology_name.upper()).all()
                for ontology in ontologies:
                    logger.info('Deleting namespaced ontology %s - %s',
                                ontology.name, ontology.namespace)
                    res = session.query(Synonym).filter(
                        Synonym.term_id == Term.term_id,
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    logger.info('Wiped %s synonyms', res)
                    rel = session.query(Relation).filter(
                        Relation.child_term_id == Term.term_id,
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    rel2 = session.query(Relation).filter(
                        Relation.parent_term_id == Term.term_id,
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    logger.info('Wiped %s Relations', rel + rel2)

                    clo = session.query(Closure).filter(
                        Closure.child_term_id == Term.term_id,
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    clo1 = session.query(Closure).filter(
                        Closure.parent_term_id == Term.term_id,
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    clo2 = session.query(Closure).filter(
                        Closure.subparent_term_id == Term.term_id,
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    logger.info('Wiped %s Closure', clo + clo1 + clo2)

                    res = session.query(AltId).filter(
                        AltId.term_id == Term.term_id,
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    logger.info('Wiped %s AltIds', res)
                    res = session.query(Term).filter(
                        Term.ontology_id == ontology.id).delete(
                            synchronize_session=False)
                    logger.info('Wiped %s Terms', res)
                    session.delete(ontology)
                    logger.debug('...Done')
                return True
            except NoResultFound:
                logger.error('Ontology %s not found !', ontology_name)
        return False

    def load_ontology_terms(self, ontology, start=None, end=None):
        nb_terms = 0
        nb_terms_ignored = 0
        o_ontology = self.client.ontology(identifier=ontology)
        if o_ontology:
            self.current_ontology = o_ontology.ontology_id.upper()
            if start is not None and end is not None:
                logger.info('Loading terms slice [%s, %s]', start, end)
                logger.info('-----------------------------------------')
                terms = o_ontology.terms()[start:end]
                logger.info('Slice len %s', len(terms))
                report_msg = ('- Loading %s terms slice [%s:%s]', ontology,
                              start, end)
            else:
                terms = o_ontology.terms()
                logger.info('Loading %s terms for %s', len(terms),
                            o_ontology.ontology_id.upper())
                report_msg = ('- Loading all terms (%s)', len(terms))
            with dal.session_scope() as session:
                for o_term in terms:
                    logger.info('Term namespace %s', o_term.namespace)
                    if o_term.is_defining_ontology and has_accession(o_term):
                        m_ontology, created = get_one_or_create(
                            Ontology,
                            session,
                            name=self.current_ontology,
                            namespace=o_term.namespace,
                            create_method_kwargs=dict(
                                version=o_ontology.version,
                                title=o_ontology.title))
                        logger.debug('Loaded term (from OLS) %s', o_term)
                        logger.debug(
                            'Adding/Retrieving namespaced ontology %s',
                            o_term.namespace)
                        logger.debug('Ontology namespace %s %s',
                                     m_ontology.name, m_ontology.namespace)
                        if m_ontology.namespace != o_term.namespace:
                            logger.warning(
                                'discrepancy term/ontology namespace')
                            logger.warning('term:', o_term)
                            logger.warning('ontology:', o_ontology)
                        term = self.load_term(o_term, m_ontology, session)
                        if term:
                            session.add(term)
                            nb_terms += 1
                    else:
                        logger.info('Ignored term [%s:%s]',
                                    o_term.is_defining_ontology,
                                    o_term.short_form)
                        nb_terms_ignored += 1
                self.report(*report_msg)
                self.report('- Expected %s terms (defined in ontology)',
                            nb_terms)
                self.report('- Ignored %s terms (not defined in ontology)',
                            nb_terms_ignored)
                return nb_terms, nb_terms_ignored
        else:
            logger.warn('Ontology not found %s', ontology)
            return 0, 0

    def load_term(self, o_term, ontology, session, process_relation=True):
        """
        :param o_term:
        :param ontology:
        :param session:
        :param process_relation:
        :return: Term
        """
        if type(ontology) is str:
            m_ontology = self.load_ontology(ontology, session,
                                            o_term.namespace)
            # session.merge(m_ontology)
        elif isinstance(ontology, Ontology):
            m_ontology = ontology
        elif isinstance(ontology, helpers.Ontology):
            m_ontology, created = get_one_or_create(
                Ontology,
                session,
                name=ontology.ontology_id.upper(),
                namespace=o_term.namespace)
        else:
            raise RuntimeError('Wrong parameter')
        session.merge(m_ontology)

        if has_accession(o_term):
            if not o_term.description:
                o_term.description = [inflection.humanize(o_term.label)]
            m_term, created = get_one_or_create(Term,
                                                session,
                                                accession=o_term.accession,
                                                create_method_kwargs=dict(
                                                    helper=o_term,
                                                    ontology=m_ontology))

            logger.info('Loaded Term [%s][%s][%s]', m_term.accession,
                        o_term.namespace, m_term.iri)
            if created:
                self.load_term_subsets(m_term, session)
                self.load_alt_ids(m_term, o_term, session)
                self.load_term_synonyms(m_term, o_term, session)
                if o_term.ontology_name.upper() in self.allowed_ontologies \
                        and self.options.get('process_relations', True) \
                        and process_relation:
                    self.load_term_relations(m_term, o_term, session)
                if not m_term.is_root and self.options.get(
                        'process_parents', True):
                    self.load_term_ancestors(m_term, o_term, session)
            return m_term
        else:
            return None

    def load_alt_ids(self, m_term, o_term, session):
        session.query(AltId).filter(AltId.term == m_term).delete()
        if o_term.annotation.has_alternative_id:
            logger.info('Loaded AltId %s',
                        o_term.annotation.has_alternative_id)
            [
                m_term.alt_ids.append(AltId(accession=alt_id, term=m_term))
                for alt_id in o_term.annotation.has_alternative_id
            ]
            logger.debug('...Done')
        else:
            logger.info('...No AltIds')
        return m_term

    def load_term_subsets(self, term, session):
        subsets = []
        if term.subsets:
            s_subsets = self.client.search(query=term.subsets,
                                           filters={
                                               'type': 'property',
                                               'exact': 'false'
                                           })
            seen = set()
            unique_subsets = [
                x for x in s_subsets if x.short_form.lower() not in seen
                and not seen.add(x.short_form.lower())
            ]
            logger.debug("Loading unique subsets %s", unique_subsets)

            for subset in unique_subsets:
                subset_def = inflection.humanize(subset.label)
                m_subset, created = get_one_or_create(
                    Subset,
                    session,
                    name=inflection.underscore(subset.label),
                    create_method_kwargs=dict(definition=subset_def))
                if created:
                    # avoid call to API if already exists
                    logger.info("Created new subset %s", m_subset.name)
                    try:
                        details = self.client.property(identifier=subset.iri)
                        if not details:
                            logger.warning(
                                'Unable to retrieve subset details %s for ontology %s',
                                subset.label, term.ontology.name)
                        else:
                            m_subset.definition = details.definition
                            session.merge(m_subset)
                            session.commit()
                    except ebi.ols.api.exceptions.ObjectNotRetrievedError:
                        logger.error('Too Many errors from API %s %s',
                                     subset.label, term.ontology.name)
            logger.info('Loaded subsets: %s ', subsets)
        else:
            logger.info('...No Subset')
        return subsets

    def load_term_relations(self, m_term, o_term, session):
        relation_types = [
            rel for rel in o_term.relations_types
            if rel not in self.__ignored_relations
        ]
        logger.info('Terms relations %s', relation_types)
        n_relations = 0
        for rel_name in relation_types:
            # updates relation types
            o_relatives = o_term.load_relation(rel_name)

            logger.info('Loading %s relation %s (%s)...', m_term.accession,
                        rel_name, rel_name)
            logger.info('%s related terms ', len(o_relatives))
            for o_related in o_relatives:
                if has_accession(o_related):
                    # o_related.ontology_name in self.allowed_ontologies
                    relation_type, created = get_one_or_create(
                        RelationType,
                        session,
                        name=self.__relation_map.get(rel_name, rel_name))

                    m_related, relation = self.load_term_relation(
                        m_term, o_related, relation_type, session)
                    n_relations += 1
                    logger.debug('Loading related %s', m_related)
            logger.info('... Done (%s)', n_relations)
        return n_relations

    def rel_dest_ontology(self, m_term, o_term, session):
        if o_term.is_defining_ontology:
            logger.debug('Related term is defined in SAME ontology')
            o_term_details = o_term
            r_ontology = m_term.ontology
            return o_term_details, r_ontology
        else:
            if has_accession(o_term):
                guessed_ontology = o_term.accession.split(':')[0]
                logger.debug('Term ontology: %s', guessed_ontology)
                if guessed_ontology not in self.allowed_ontologies:
                    logger.debug(
                        'Related term is defined in EXTERNAL ontology')
                    r_ontology = m_term.ontology
                    o_term_details = o_term
                    return o_term_details, r_ontology
                else:
                    logger.debug(
                        'Related term is defined in EXPECTED ontology')
                    o_term_details = self.client.term(identifier=o_term.iri,
                                                      silent=True,
                                                      unique=True)
                    if o_term_details:
                        o_onto_details = self.client.ontology(
                            identifier=o_term_details.ontology_name)
                        if o_onto_details:
                            namespace = o_term_details.namespace if o_term_details.namespace else o_term_details.ontology_name
                            r_ontology, created = get_one_or_create(
                                Ontology,
                                session,
                                name=o_onto_details.ontology_id.upper(),
                                namespace=namespace,
                                create_method_kwargs=dict(
                                    version=o_onto_details.version,
                                    title=o_onto_details.title))
                            return o_term_details, r_ontology
        return None, None

    def load_term_relation(self, m_term, o_term, relation_type, session):

        if has_accession(o_term):
            try:
                m_related = session.query(Term).filter_by(
                    accession=o_term.accession).one()
                logger.info('Exists %s', m_related)
            except NoResultFound:
                o_term_details, r_ontology = self.rel_dest_ontology(
                    m_term, o_term, session)
                if o_term_details and has_accession(o_term_details):
                    m_related = self.load_term(
                        o_term=o_term_details,
                        ontology=o_term_details.ontology_name,
                        session=session)
                else:
                    logger.warning(
                        'Term %s (%s) relation %s with %s not found in %s ',
                        m_term.accession, m_term.ontology.name,
                        relation_type.name, o_term.iri, o_term.ontology_name)
                    return None, None
            if m_related:
                logger.info('Adding relation %s %s %s', m_term.accession,
                            relation_type.name, m_related.accession)
                m_relation = m_term.add_parent_relation(
                    m_related, relation_type, session)
                logger.debug('Loaded relation %s %s %s', m_term.accession,
                             relation_type.name, m_related.accession)
                return m_related, m_relation
            else:
                return None, None

    def load_term_ancestors(self, m_term, o_term, session):
        # delete old ancestors
        try:
            ancestors = o_term.load_relation('parents')
            r_ancestors = 0
            relation_type, created = get_one_or_create(RelationType,
                                                       session,
                                                       name='is_a')
            for ancestor in ancestors:
                logger.debug('Parent %s ', ancestor.accession)
                if has_accession(ancestor):
                    parent, relation = self.load_term_relation(
                        m_term, ancestor, relation_type, session)
                    if parent:
                        r_ancestors = r_ancestors + 1
            return r_ancestors
        except CoreAPIException as e:
            logger.info('...No parent %s ')
            return 0

    def load_term_synonyms(self, m_term, o_term, session):
        logger.debug('Loading term synonyms...')

        session.query(Synonym).filter(Synonym.term == m_term).delete()
        n_synonyms = []

        obo_synonyms = o_term.obo_synonym or []
        for synonym in obo_synonyms:
            if isinstance(synonym, itypes.Dict):
                try:
                    db_xref = synonym['xrefs'][0][
                        'database'] or '' + ':' + synonym['xrefs'][0][
                            'id'] if 'xrefs' in synonym and len(
                                synonym['xrefs']) > 0 else ''
                    logger.info('Term synonym [%s - %s (%s)]', synonym['name'],
                                self.__synonym_map[synonym['scope']], db_xref)
                    m_syno, created = get_one_or_create(
                        Synonym,
                        session,
                        term=m_term,
                        name=synonym['name'],
                        create_method_kwargs=dict(
                            db_xref=db_xref,
                            type=self.__synonym_map[synonym['scope']]))
                    if created:
                        n_synonyms.append(synonym)
                except KeyError as e:
                    logging.error('Parse Synonym error %s: %s', synonym,
                                  str(e))
            else:
                logging.error('obo_synonym type error: %s', synonym)
        # OBO Xref are winning against standard synonymz
        synonyms = o_term.synonyms or []
        for synonym in synonyms:
            logger.info('Term synonym [%s - EXACT (No dbXref)]', synonym)
            m_syno, created = get_one_or_create(
                Synonym,
                session,
                term=m_term,
                name=synonym,
                create_method_kwargs=dict(type='EXACT'))
            if created:
                n_synonyms.append(synonym)
        if hasattr(o_term.annotation, 'has_related_synonym'):
            other_synonyms = o_term.annotation.has_related_synonym or []
            for synonym in other_synonyms:
                logger.info('Term synonym [%s - EXACT (No dbXref)]', synonym)
                m_syno, created = get_one_or_create(
                    Synonym,
                    session,
                    term=m_term,
                    name=synonym,
                    create_method_kwargs=dict(type='RELATED'))
                if created:
                    n_synonyms.append(synonym)
        if len(n_synonyms) == 0:
            logger.info('...No Synonym')
        logger.debug('...Done')
        return n_synonyms

    def final_report(self, ontology_name):
        """ Create a report from actual inserted data for ontology """
        session = dal.get_session()
        ontologies = session.query(Ontology).filter_by(
            name=ontology_name.upper()).all()
        self.current_ontology = ontology_name
        for ontology in ontologies:
            synonyms = session.query(Synonym).filter(
                Synonym.term_id == Term.term_id,
                Term.ontology_id == ontology.id).count()
            relations = session.query(Relation).filter(
                Relation.ontology == ontology).count()
            closures = session.query(Closure).filter(
                Closure.ontology == ontology).count()
            alt_ids = session.query(AltId).filter(
                AltId.term_id == Term.term_id,
                Term.ontology_id == ontology.id).count()
            terms = session.query(Term).filter(
                Term.ontology == ontology).count()
            repeat = len('Ontology %s / Namespace %s' %
                         (ontology.name, ontology.namespace))
            self.report('-' * repeat)
            self.report('Ontology %s / Namespace %s', ontology.name,
                        ontology.namespace)
            self.report('-' * repeat)
            self.report('- Imported Terms %s', terms)
            self.report('- Imported Relations %s', relations)
            self.report('- Imported Alt Ids %s', alt_ids)
            self.report('- Imported Synonyms %s', synonyms)
            self.report('- Generated Closure %s', closures)
Ejemplo n.º 7
0
import logging
import typing
from typing import Any

import pandas as pd
from ebi.ols.api.client import OlsClient
from pandas_schema import Column, Schema
from pandas_schema.validation import LeadingWhitespaceValidation, TrailingWhitespaceValidation, _SeriesValidation

from sdrfcheck.sdrf import sdrf
from sdrfcheck.sdrf.exceptions import LogicError

client = OlsClient()

HUMAN_TEMPLATE = 'human'
DEFAULT_TEMPLATE = 'default'
VERTEBRATES_TEMPLATE = 'vertebrates'
NON_VERTEBRATES_TEMPLATE = 'nonvertebrates'
PLANTS_TEMPLATE = 'plants'
CELL_LINES_TEMPLATE = 'cell_lines'
MASS_SPECTROMETRY = 'mass_spectrometry'
ALL_TEMPLATES = [
    DEFAULT_TEMPLATE, HUMAN_TEMPLATE, VERTEBRATES_TEMPLATE,
    NON_VERTEBRATES_TEMPLATE, PLANTS_TEMPLATE, CELL_LINES_TEMPLATE
]

TERM_NAME = 'NM'


def check_minimum_columns(panda_sdrf=None, minimun_columns: int = 0):
    return len(panda_sdrf.get_sdrf_columns()) < minimun_columns
Ejemplo n.º 8
0
class TestOLSLoaderBasic(unittest.TestCase):
    _multiprocess_shared_ = False
    db_url = os.getenv(
        'DB_TEST_URL',
        'mysql+pymysql://root@localhost:3306/ols_test_ontology?charset=utf8&autocommit=true'
    )
    ols_api_url = os.getenv('OLS_API_URL', 'http://localhost:8080/api')
    test_ontologies = ['AERO', 'DUO', 'BFO', 'EO', 'SO', 'ECO', 'PHI', 'OGMS']

    @classmethod
    def setUpClass(cls):
        logger.info('Using %s connexion string', cls.db_url)
        warnings.simplefilter("ignore", ResourceWarning)
        try:
            dal.wipe_schema(cls.db_url)
        except sqlalchemy.exc.InternalError as e:
            logger.info("Unable to wipe schema %s", e)

    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)
        try:
            dal.wipe_schema(self.db_url)
        except sqlalchemy.exc.InternalError as e:
            logger.info("Unable to wipe schema %s", e)
        self.loader = OlsLoader(self.db_url,
                                echo=False,
                                output_dir=log_dir,
                                verbosity=logging.DEBUG,
                                allowed_ontologies=self.test_ontologies,
                                ols_api_url=self.ols_api_url)
        self.client = OlsClient(base_site=self.ols_api_url)

    def testCascadeDelete(self):
        if 'mysql' not in self.db_url:
            self.skipTest('Only with mysql')
        with dal.session_scope() as session:
            m_ontology = Ontology(id=1000,
                                  name='GO',
                                  _namespace='namespace',
                                  version='1',
                                  title='Ontology test')
            m_ontology_2 = Ontology(id=1001,
                                    name='GO',
                                    _namespace='namespace 2',
                                    version='1',
                                    title='Ontology test 2')
            m_ontology_3 = Ontology(id=1002,
                                    name='FPO',
                                    _namespace='namespace 3',
                                    version='1',
                                    title='Ontology test 2')
            session.add(m_ontology)
            session.add(m_ontology_2)
            session.add(m_ontology_3)
            rel_type, created = get_one_or_create(RelationType,
                                                  session,
                                                  name='is_a')
            for i in range(0, 5):
                m_term = Term(term_id=100 + i,
                              accession='GO:0000%s' % i,
                              name='Term %s' % i,
                              ontology=m_ontology)
                m_term_2 = Term(term_id=1010 + i,
                                accession='GO:1000%s' % i,
                                name='Term %s' % i,
                                ontology=m_ontology_2)
                m_term_3 = Term(term_id=1020 + i,
                                accession='T3:0000%s' % i,
                                name='Term %s' % i,
                                ontology=m_ontology_3)
                syn_1 = Synonym(synonym_id=1000 + i,
                                name='TS:000%s' % i,
                                type=SynonymTypeEnum.EXACT,
                                db_xref='REF:000%s' % i)
                m_term.synonyms.append(syn_1)
                syn_2 = Synonym(synonym_id=1010 + i,
                                name='TS2:000%s' % i,
                                type=SynonymTypeEnum.EXACT,
                                db_xref='REF:000%s' % i)
                m_term_2.synonyms.append(syn_2)
                session.add_all([syn_1, syn_2])
                alt_id = AltId(alt_id=1000 + i, accession='ATL:000%s' % i)
                m_term.alt_ids.append(alt_id)
                session.add(alt_id)
                m_term.add_child_relation(session=session,
                                          rel_type=rel_type,
                                          child_term=m_term_3)
                m_term.add_parent_relation(session=session,
                                           rel_type=rel_type,
                                           parent_term=m_term_2)
                closure_1 = Closure(closure_id=1000 + i,
                                    child_term=m_term,
                                    parent_term=m_term_2,
                                    distance=1,
                                    ontology=m_ontology)
                closure_2 = Closure(closure_id=1010 + i,
                                    parent_term=m_term,
                                    child_term=m_term_3,
                                    distance=3,
                                    ontology=m_ontology_2)
                closure_3 = Closure(closure_id=1020 + i,
                                    parent_term=m_term_2,
                                    child_term=m_term_3,
                                    subparent_term=m_term,
                                    distance=2,
                                    ontology=m_ontology_3)
                session.add_all([closure_1, closure_2, closure_3])

            self.assertEqual(session.query(Synonym).count(), 10)
            self.assertEqual(session.query(AltId).count(), 5)
            self.assertEqual(session.query(Relation).count(), 10)
            self.assertEqual(session.query(Closure).count(), 12)

        with dal.session_scope() as session:
            self.loader.wipe_ontology('GO')
            for term in session.query(Term).all():
                self.assertTrue(term.accession.startswith('T3'))
            self.assertEqual(
                0,
                session.query(Term).filter(Term.ontology_id == 1).count())
            self.assertEqual(session.query(Term).count(), 5)
            self.assertEqual(session.query(Synonym).count(), 0)
            self.assertEqual(session.query(AltId).count(), 0)
            self.assertEqual(session.query(Relation).count(), 0)
            self.assertEqual(session.query(Closure).count(), 0)

    def testLoadOntologyTerms(self):
        session = dal.get_session()
        ontology_name = 'PHI'
        self.loader.load_ontology(ontology_name, session)
        expected, ignored = self.loader.load_ontology_terms(ontology_name)
        logger.info('Expected terms %s', expected)
        inserted = session.query(Term).count()
        logger.info('Inserted terms %s', inserted)
        self.assertEqual(expected, inserted)
        logger.info('Testing unknown ontology')
        with self.assertRaises(NotFoundException):
            expected, ignored = self.loader.load_ontology_terms(
                'unknownontology')
            self.assertEqual(0, expected)

    def testLoadOntology(self):
        # test retrieve
        # test try to create duplicated
        ontology_name = 'ogms'

        with dal.session_scope() as session:
            m_ontology = self.loader.load_ontology(ontology_name, session)
            logger.info('Loaded ontology %s', m_ontology)
            logger.info('number of Terms %s', m_ontology.number_of_terms)
            r_ontology = session.query(Ontology).filter_by(
                name=ontology_name, namespace='OGMS').one()
            ontology_id = r_ontology.id
            logger.info('(RE) Loaded ontology %s', r_ontology)
            self.assertEqual(m_ontology.name, r_ontology.name)
            self.assertEqual(m_ontology.version, r_ontology.version)
            assert isinstance(r_ontology, Ontology)
            # automatically create another one with another namespace
            new_ontology, created = get_one_or_create(
                Ontology,
                session,
                name=r_ontology.name,
                namespace='another_namespace')

            self.assertTrue(created)
            for i in range(0, 5):
                session.add(
                    Term(accession='CCC_00000{}'.format(i),
                         name='Term {}'.format(i),
                         ontology=r_ontology,
                         is_root=False,
                         is_obsolete=False))
            self.assertTrue(new_ontology.name == r_ontology.name)

        session = dal.get_session()
        self.assertEqual(
            5,
            session.query(Term).filter_by(ontology_id=ontology_id).count())
        ontologies = session.query(Ontology).filter_by(name=ontology_name)
        self.assertEqual(ontologies.count(), 2)
        self.loader.final_report(ontology_name)
        self.assertTrue(
            os.path.isfile(join(log_dir, ontology_name + '.ontology.log')))

    def testUpperCase(self):
        ontology_name = 'OGMS'
        self.loader.options['process_relations'] = False
        with dal.session_scope() as session:
            m_ontology = self.loader.load_ontology(ontology_name, session)
            session.add(m_ontology)
            self.assertEqual(m_ontology.name, 'OGMS')
            onto_id = m_ontology.id
            logger.info("Ontololgy name in DB %s", m_ontology.name)
            self.loader.load_ontology_terms('aero', 0, 50)
            terms = session.query(Term).all()
            for term in terms:
                if term.ontology.name == 'OGMS':
                    self.assertTrue(term.ontology_id == onto_id)

    def testRelatedNonExpected(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True
        with dal.session_scope() as session:
            ontology_name = 'ECO'
            expected, _ignored = self.loader.load_ontology_terms(ontology_name,
                                                                 start=0,
                                                                 end=50)
            logger.info('Expected terms %s', expected)
            s_terms = session.query(Term).filter(
                Ontology.name == ontology_name)
            inserted = s_terms.count()
            logger.info('Inserted terms %s', inserted)
            self.assertGreaterEqual(inserted, expected)

    def testLoadTimeMeta(self):
        ontology_name = 'BFO'
        self.loader.options['wipe'] = True
        self.loader.options['ens_version'] = 99

        init_schema(self.db_url, **self.loader.options)
        with dal.session_scope() as session:
            m_ontology = self.loader.load_ontology(ontology_name, session)
            session.add(m_ontology)
            self.assertIsInstance(m_ontology, Ontology)
        session = dal.get_session()
        meta_file_date = session.query(Meta).filter_by(meta_key=ontology_name +
                                                       '_file_date').one()
        meta_start = session.query(Meta).filter_by(meta_key=ontology_name +
                                                   '_load_date').one()
        meta_schema = session.query(Meta).filter_by(meta_key='patch').one()
        self.assertEqual('patch_98_99_a.sql|schema version',
                         meta_schema.meta_value)
        self.assertTrue(
            datetime.datetime.strptime(meta_start.meta_value,
                                       ontology_name.upper() +
                                       "/%c") < datetime.datetime.now())
        logger.debug('meta load_all date: %s', meta_start)
        logger.debug('meta file date: %s', meta_file_date)
        try:
            datetime.datetime.strptime(meta_file_date.meta_value,
                                       ontology_name.upper() + "/%c")
            datetime.datetime.strptime(meta_start.meta_value,
                                       ontology_name.upper() + "/%c")
        except ValueError:
            self.fail('Wrong date format')

    def testLogger(self):
        self.loader = OlsLoader(self.db_url,
                                echo=False,
                                output_dir=log_dir,
                                verbosity='DEBUG')

        with dal.session_scope() as session:
            self.loader.load_ontology('bfo', session)
            self.assertTrue(os.path.isfile(join(log_dir, 'bfo.ontology.log')))
            self.loader.load_ontology_terms('bfo', 0, 15)
            self.assertTrue(os.path.isfile(join(log_dir,
                                                'bfo.terms.0.15.log')))

    def testHiveLoader(self):
        class RunnableWithParams(OLSHiveLoader):
            def __init__(self, d):
                self._BaseRunnable__params = eHive.Params.ParamContainer(d)
                self.input_job = Job()
                self.input_job.transient_error = True
                self.debug = 1

        hive_loader = RunnableWithParams({
            'ontology_name': 'duo',
            'ens_version': 100,
            'db_url': self.db_url,
            'output_dir': log_dir
        })
        hive_loader.run()
        with dal.session_scope() as session:
            metas = session.query(Meta).all()
            self.assertGreaterEqual(len(metas), 2)
            schema_type = session.query(Meta).filter_by(
                meta_key='schema_type').one()
            self.assertEqual(schema_type.meta_value, 'ontology')
            schema_type = session.query(Meta).filter_by(
                meta_key='schema_version').one()
            self.assertEqual(schema_type.meta_value, '100')
            schema_patch = session.query(Meta).filter_by(
                meta_key='patch').one()
            self.assertEqual(schema_patch.meta_value,
                             'patch_99_100_a.sql|schema version')

    def testOntologyLoader(self):
        class OntologyLoader(OLSOntologyLoader):
            def __init__(self, d):
                self._BaseRunnable__params = eHive.Params.ParamContainer(d)
                self._BaseRunnable__read_pipe = open(join(base_dir, 'hive.in'),
                                                     mode='rb',
                                                     buffering=0)
                self._BaseRunnable__write_pipe = open(join(
                    base_dir, 'hive.out'),
                                                      mode='wb',
                                                      buffering=0)
                self.input_job = Job()
                self.input_job.transient_error = True
                self.debug = 1

        hive_loader = OntologyLoader({
            'ontology_name': 'aero',
            'ens_version': 100,
            'db_url': self.db_url,
            'output_dir': log_dir,
            'verbosity': '4',
            'wipe_one': 0,
            'allowed_ontologies': self.test_ontologies,
            'ols_api_url': self.ols_api_url
        })

        hive_loader.run()
        with dal.session_scope() as session:
            self.assertIsNotNone(
                session.query(Meta).filter_by(meta_key='AERO_load_date').one())
            self.assertIsNotNone(
                session.query(Meta).filter_by(meta_key='AERO_file_date').one())

    def testTermHiveLoader(self):
        class TermLoader(OLSTermsLoader):
            def __init__(self, d):
                self._BaseRunnable__params = eHive.Params.ParamContainer(d)
                self.input_job = Job()
                self.input_job.transient_error = True
                self.debug = 1

        params_set = {
            'ontology_name': 'bfo',
            'db_url': self.db_url,
            'output_dir': log_dir,
            'verbosity': '4',
            '_start_term_index': 0,
            '_end_term_index': 19,
            'ols_api_url': self.ols_api_url,
            'allowed_ontologies': self.test_ontologies,
            'page_size': 20
        }

        term_loader = TermLoader(params_set)
        term_loader.run()
        with dal.session_scope() as session:
            self.assertIsNotNone(
                session.query(Ontology).filter_by(name='BFO').one())
            self.assertGreaterEqual(session.query(Term).count(), 17)
            self.assertGreaterEqual(session.query(Relation).count(), 17)
            self.assertGreaterEqual(session.query(RelationType).count(), 1)

        params_set['_start_term_index'] = 20
        params_set['_end_term_index'] = 100
        term_loader = TermLoader(params_set)
        term_loader.run()
        with dal.session_scope() as session:
            self.assertIsNotNone(
                session.query(Ontology).filter_by(name='BFO').one())
            self.assertGreaterEqual(session.query(Term).count(), 18)
            self.assertGreaterEqual(session.query(Relation).count(), 18)
            self.assertEqual(session.query(RelationType).count(), 1)
        self.assertTrue(os.path.isfile(join(log_dir, 'bfo.ontology.log')))
        self.assertTrue(os.path.isfile(join(log_dir, 'bfo.terms.0.15.log')))

    def testPHIHiveLoader(self):
        class PhiTermLoader(OLSLoadPhiBaseIdentifier):
            def __init__(self, d):
                self._BaseRunnable__params = eHive.Params.ParamContainer(d)
                self.input_job = Job()
                self.input_job.transient_error = True
                self.debug = 1

        params_set = {
            'ontology_name': 'PHI',
            'db_url': self.db_url,
            'output_dir': log_dir,
            'verbosity': '4',
            '_start_term_index': 0,
            '_end_term_index': 150,
            'ols_api_url': self.ols_api_url,
            'allowed_ontologies': self.test_ontologies,
            'page_size': 20
        }

        term_loader = PhiTermLoader(params_set)
        term_loader.run()
        with dal.session_scope() as session:
            self.assertIsNotNone(
                session.query(Ontology).filter_by(name='PHI').one())
            self.assertGreaterEqual(session.query(Term).count(), 80)
            self.assertEqual(session.query(RelationType).count(), 1)

    def testRelationSingleTerm(self):
        with dal.session_scope() as session:
            o_term = self.client.term(
                identifier='http://purl.obolibrary.org/obo/ECO_0007571')
            m_term = self.loader.load_term(o_term, 'ECO', session)
            session.add(m_term)
            session.commit()

    def testAltIds(self):
        self.loader.options['process_relations'] = False
        self.loader.options['process_parents'] = False

        with dal.session_scope() as session:
            o_term = self.client.term(
                identifier='http://purl.obolibrary.org/obo/SO_0000569')
            # was http://purl.obolibrary.org/obo/GO_0005261
            m_term = self.loader.load_term(o_term, 'SO', session)
            session.add(m_term)
            session.commit()
            term = session.query(Term).filter_by(accession='SO:0000569').one()
            logger.debug("Retrieved alt Ids: %s", term.alt_ids)
            self.assertGreaterEqual(len(term.alt_ids), 1)

    def testSubsetEco(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True
        with dal.session_scope() as session:
            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/ECO_0000305",
                ontology_name='ECO',
                type=helpers.Term)
            m_term = self.loader.load_term(o_term, 'ECO', session)
            session.commit()
            subsets = session.query(Subset).all()
            subsets_name = [sub.name for sub in subsets]
            term_subsets = m_term.subsets.split(',')
            self.assertEqual(set(subsets_name), set(term_subsets))
            for definition in subsets:
                self.assertIsNotNone(definition)
Ejemplo n.º 9
0
class TestOLSLoaderRemote(unittest.TestCase):
    _multiprocess_shared_ = False
    db_url = os.getenv(
        'DB_TEST_URL',
        'mysql+pymysql://root@localhost:3306/ols_test_ontology?charset=utf8&autocommit=true'
    )
    ols_api_url = 'https://www.ebi.ac.uk/ols/api'

    @classmethod
    def setUpClass(cls):
        logger.info('Using %s connexion string', cls.db_url)
        try:
            dal.wipe_schema(cls.db_url)
        except sqlalchemy.exc.InternalError as e:
            logger.info("Unable to wipe schema %s", e)

    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)
        try:
            dal.wipe_schema(self.db_url)
        except sqlalchemy.exc.InternalError as e:
            logger.info("Unable to wipe schema %s", e)
        self.loader = OlsLoader(self.db_url, echo=False, output_dir=log_dir)
        self.loader.allowed_ontologies = [
            'GO', 'SO', 'PATO', 'HP', 'VT', 'EFO', 'PO', 'EO', 'TO', 'CHEBI',
            'PR', 'FYPO', 'PECO', 'BFO', 'BTO', 'CL', 'CMO', 'ECO', 'MOD',
            'MP', 'OGMS', 'UO', 'MONDO', 'PHI', 'DUO'
        ]
        self.client = OlsClient(base_site=self.ols_api_url)

    def tearDown(self):
        dal.wipe_schema(self.db_url)

    def testGoExpectedLinks(self):
        go_term = [
            'GO_0005575',
            'GO_0003674',
            'GO_0008150',
        ]
        self.loader.options['process_relations'] = False
        self.loader.options['process_parents'] = True
        with dal.session_scope() as session:
            terms = self.loader.load_ontology_terms('GO', 0, 20)
            ontologies = session.query(Ontology).filter_by(name='GO')
            namespaces = [onto.namespace for onto in ontologies]
            self.assertSetEqual(
                set([
                    'go', 'biological_process', 'cellular_component',
                    'molecular_function'
                ]), set(namespaces))
            GO_0005575 = session.query(Term).filter_by(
                accession='GO:0005575').one()
            GO_0003674 = session.query(Term).filter_by(
                accession='GO:0003674').one()
            GO_0008150 = session.query(Term).filter_by(
                accession='GO:0008150').one()
            self.assertEqual('biological_process',
                             GO_0008150.ontology.namespace)
            self.assertEqual('cellular_component',
                             GO_0005575.ontology.namespace)
            self.assertEqual('molecular_function',
                             GO_0003674.ontology.namespace)

    def testPartOfRelationship(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = False
        with dal.session_scope() as session:
            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/GO_0032042",
                ontology_name='GO',
                type=helpers.Term)
            m_term = self.loader.load_term(o_term, 'GO', session)
            self.assertIn('part_of', o_term.relations_types)
            self.assertIn('part_of', [
                relation.relation_type.name for relation in m_term.parent_terms
            ])
            self.assertIn('occurs_in', [
                relation.relation_type.name for relation in m_term.parent_terms
            ])

    def testChebi(self):
        self.loader.options['process_relations'] = False
        self.loader.options['process_parents'] = False
        self.loader.load_ontology_terms('CHEBI', start=1200, end=1250)
        session = dal.get_session()
        subsets = session.query(Subset).all()
        for subset in subsets:
            self.assertNotEqual(subset.definition, subset.name)

    def testTermInvalidDefinition(self):
        '''
        Term has invalid characters in the definition (e.g. "\\n")
        '''
        self.loader.options['process_relations'] = False
        self.loader.options['process_parents'] = False
        with dal.session_scope() as session:
            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/GO_0090481",
                ontology_name='GO',
                type=helpers.Term)
            if '\n' not in o_term.description:
                self.skipTest(
                    "Term Description does not contain invalid characters.")
            else:
                m_term = self.loader.load_term(o_term, 'GO', session)
                self.assertNotIn('\n', m_term.description)

    def testTermNoDefinition(self):
        '''
        Term does not declared a definition neither within annotation, label is therefore inserted
        '''
        self.loader.options['process_relations'] = False
        self.loader.options['process_parents'] = False
        with dal.session_scope() as session:
            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/MONDO_0020003",
                ontology_name='MONDO',
                type=helpers.Term)
            m_term = self.loader.load_term(o_term, 'MONDO', session)
            self.assertEqual(m_term.name, m_term.description.lower())

    def testLongTermDefinition(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True
        with dal.session_scope() as session:
            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/UBERON_0000948",
                ontology_name='UBERON',
                type=helpers.Term)
            m_term = self.loader.load_term(o_term, 'UBERON', session)
            for syn in m_term.synonyms:
                self.assertNotEqual(syn.name, '')

            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/MONDO_0004933",
                ontology_name='MONDO',
                type=helpers.Term)
            m_term = self.loader.load_term(o_term, 'MONDO', session)
            for syn in m_term.synonyms:
                self.assertNotEqual(syn.name, '')

    def testGoTerm(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True
        with dal.session_scope() as session:
            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/GO_0030118",
                ontology_name='GO',
                type=helpers.Term)
            m_term = self.loader.load_term(o_term, 'GO', session)
            session.add(m_term)
            self.assertIn(
                'GO:0030117',
                [rel.parent_term.accession for rel in m_term.parent_terms])
            o_term = self.client.detail(
                iri="http://purl.obolibrary.org/obo/GO_0030131",
                ontology_name='GO',
                type=helpers.Term)
            m_term = self.loader.load_term(o_term, 'GO', session)
            session.add(m_term)
            self.assertIn('GO:0030119', [
                rel.parent_term.accession for rel in m_term.parent_terms
                if rel.relation_type.name == 'is_a'
            ])
            self.assertIn('GO:0030118', [
                rel.parent_term.accession for rel in m_term.parent_terms
                if rel.relation_type.name == 'part_of'
            ])

    def testExternalRelationship(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True
        with dal.session_scope() as session:
            o_term = self.client.term(
                identifier='http://www.ebi.ac.uk/efo/EFO_0002911',
                unique=True,
                silent=True)
            m_term = self.loader.load_term(o_term, 'EFO', session)
            session.add(m_term)
            found = False
            for relation in m_term.parent_terms:
                found = found or (relation.parent_term.accession
                                  == 'OBI:0000245')
        self.assertTrue(found)
        session = dal.get_session()
        ontologies = session.query(Ontology).filter_by(name='OBI').count()
        # assert that OBI has not been inserted
        self.assertEqual(0, ontologies)

    def testMissingOboId(self):
        self.loader.options['process_relations'] = False
        self.loader.options['process_parents'] = False
        with dal.session_scope() as session:
            o_term = self.client.term(
                identifier='http://purl.obolibrary.org/obo/PR_P68993',
                unique=True,
                silent=True)
            m_term = self.loader.load_term(o_term, 'PR', session)
            self.assertEqual(m_term.accession, 'PR:P68993')

    def testSubsetErrors(self):
        with dal.session_scope() as session:
            o_term = self.client.term(
                identifier='http://www.ebi.ac.uk/efo/EFO_0003503')
            m_term = self.loader.load_term(o_term, 'EFO', session)
            session.add(m_term)
            self.assertIsInstance(
                session.query(Subset).filter_by(name='efo_slim').one(), Subset)

    def testTrickTerm(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True

        with dal.session_scope() as session:
            # o_term = helpers.Term(ontology_name='fypo', iri='http://purl.obolibrary.org/obo/FYPO_0001330')
            o_term = self.client.term(
                identifier='http://purl.obolibrary.org/obo/FYPO_0001330',
                unique=True,
                silent=True)
            m_term = self.loader.load_term(o_term, 'fypo', session)
            session.add(m_term)
            found = False
            print(m_term.parent_terms)
            for relation in m_term.parent_terms:
                found = found or (relation.parent_term.accession
                                  == 'FYPO:0001323')
        self.assertTrue(found)

    def testSubsets(self):
        self.loader.options['process_relations'] = False
        self.loader.options['process_parents'] = False

        with dal.session_scope() as session:
            term = helpers.Term(
                ontology_name='go',
                iri='http://purl.obolibrary.org/obo/GO_0099565')
            o_term = self.client.detail(term)
            m_term = self.loader.load_term(o_term, 'go', session)
            session.add(m_term)
            subsets = session.query(Subset).all()
            for subset in subsets:
                self.assertIsNotNone(subset.definition)

            subset = helpers.Property(
                ontology_name='go',
                iri=
                'http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym')
            details = self.client.detail(subset)
            self.assertNotEqual(details.definition, '')

    def testRelationOtherOntology(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True
        with dal.session_scope() as session:
            m_ontology = self.loader.load_ontology('efo', session)
            session.add(m_ontology)
            term = helpers.Term(ontology_name='efo',
                                iri='http://www.ebi.ac.uk/efo/EFO_0002215')
            o_term = self.client.detail(term)
            m_term = self.loader.load_term(o_term, m_ontology, session)
            session.add(m_term)
            self.assertGreaterEqual(session.query(Ontology).count(), 2)
            term = session.query(Term).filter_by(accession='BTO:0000164')
            self.assertEqual(1, term.count())

    def testRelationsShips(self):
        with dal.session_scope() as session:
            m_ontology = self.loader.load_ontology('bto', session)
            session.add(m_ontology)
            term = helpers.Term(
                ontology_name='bto',
                iri='http://purl.obolibrary.org/obo/BTO_0000005')
            o_term = self.client.detail(term)
            m_term = self.loader.load_term(o_term, m_ontology, session)
            session.add(m_term)
            self.assertGreaterEqual(len(m_term.parent_terms), 0)

    def testSingleTerm(self):
        self.loader.options['process_relations'] = True
        self.loader.options['process_parents'] = True

        with dal.session_scope() as session:
            m_ontology = self.loader.load_ontology('fypo', session)
            session.add(m_ontology)
            term = helpers.Term(
                ontology_name='fypo',
                iri='http://purl.obolibrary.org/obo/FYPO_0000257')
            o_term = self.client.detail(term)
            m_term = self.loader.load_term(o_term, m_ontology, session)
            session.commit()
            self.assertGreaterEqual(len(m_term.parent_terms), 1)

            self.loader.options['process_relations'] = False
            self.loader.options['process_parents'] = False
            o_ontology = self.client.ontology('GO')
            term = helpers.Term(
                ontology_name='GO',
                iri='http://purl.obolibrary.org/obo/GO_0000002')
            o_term = self.client.detail(term)
            m_term = self.loader.load_term(o_term, o_ontology, session)
            self.assertEqual(m_term.ontology.name, 'GO')
            with self.assertRaises(RuntimeError):
                self.loader.load_term(o_term, 33, session)

    def testPRErrors(self):
        class TermLoader(OLSTermsLoader):
            def __init__(self, d):
                self._BaseRunnable__params = eHive.Params.ParamContainer(d)
                self.input_job = Job()
                self.input_job.transient_error = True
                self.debug = 1

        params_set = {
            'ontology_name': 'pr',
            'db_url': self.db_url,
            'output_dir': log_dir,
            'verbosity': '4',
            '_start_term_index': 1000,
            '_end_term_index': 1999,
            'ols_api_url': self.ols_api_url,
            'allowed_ontologies': ['PR'],
            'page_size': 100
        }

        term_loader = TermLoader(params_set)
        term_loader.run()
        with dal.session_scope() as session:
            self.assertIsNotNone(
                session.query(Ontology).filter_by(name='PR').one())
Ejemplo n.º 10
0
 def setUp(self):
     warnings.simplefilter("ignore", ResourceWarning)
     self.client = OlsClient(base_site=self.ols_api_url, page_size=100)
Ejemplo n.º 11
0
 def testSlicingWithPageSize(self):
     # terms_client = BfoClientMixin(self.ols_api_url)
     self.client = OlsClient(base_site=self.ols_api_url, page_size=10)
     ontology = self.client.ontology('bfo')
     terms = ontology.terms()
     self.assertEqual(terms.page_size, 10)
Ejemplo n.º 12
0
class OntologyTestBasic(unittest.TestCase):
    """Basic test cases."""

    ols_api_url = os.getenv('OLS_API_URL', 'http://localhost:8080/api')

    def _checkOntology(self, ontology):
        self.assertIsInstance(ontology, helpers.Ontology)
        self.assertIsInstance(ontology.config, helpers.OntologyConfig)
        self.assertIsInstance(ontology.config.annotations,
                              helpers.OntologyAnnotation)

    def _checkProperty(self, prop):
        self.assertIsInstance(prop, helpers.Property)

    def _checkProperties(self, properties):
        if type(properties) is helpers.Property:
            properties = [properties]
        [self._checkProperty(prop) for prop in properties]

    def _checkIndividual(self, individual):
        self.assertIsInstance(individual, helpers.Individual)

    def _checkIndividuals(self, individuals):
        [self._checkIndividual(ind) for ind in individuals]

    def _checkOntologies(self, ontologies):
        [self._checkOntology(ontology) for ontology in ontologies]

    def _checkMixed(self, helper):
        return getattr(self, '_check' + helper.__class__.__name__)(helper)

    def setUp(self):
        warnings.simplefilter("ignore", ResourceWarning)
        self.client = OlsClient(base_site=self.ols_api_url, page_size=100)

    def test_ontologies_list(self):
        # standard first page
        ontologies = self.client.ontologies()
        self.assertEqual(ontologies.page_size, self.client.page_size)
        total_pages = ontologies.pages
        current = 0
        num_pages = 1
        test_item = None
        for ontology in ontologies:
            if current == 1:
                test_item = ontology
            self._checkOntology(ontology)
            current += 1
            if current % ontologies.page_size == 0:
                num_pages += 1
        item = ontologies[1]
        self.assertEqual(test_item, item)
        # self.assertEqual(ontologies.page, len(ontologies) // ontologies.page_size)
        self.assertEqual(total_pages, num_pages)
        self.assertEqual(current, len(ontologies))
        self.assertEqual(num_pages, ontologies.pages)

    def test_ontology(self):
        # FIXME add further testing on single ontology data
        ontology = self.client.ontology('aero')
        self._checkOntology(ontology)

    def _checkTerm(self, term):
        self.assertIsInstance(term, helpers.Term)
        self.assertTrue(hasattr(term, 'iri'))
        self.assertTrue(hasattr(term, 'obo_xref'))

    def _checkTerms(self, terms):
        [self._checkTerm(term) for term in terms]

    def test_ontology_terms(self):
        """
        Tess retrieve ontology terms
        :return:
        """
        # Search for all terms in ontology, loop over and load Termn accordingly
        ontology = self.client.ontology("aero")
        terms = ontology.terms()
        self.assertEqual(terms.index, 0)
        self.assertGreaterEqual(len(terms), ontology.number_of_terms)
        self._checkTerms(terms)

    def test_ontology_individuals(self):
        ontology = self.client.ontology("aero")
        individuals = ontology.individuals()
        self.assertGreaterEqual(len(individuals),
                                ontology.number_of_individuals)
        self._checkIndividuals(individuals)

    def test_ontology_properties(self):
        ontology = self.client.ontology("aero")
        properties = ontology.properties()
        self.assertGreaterEqual(len(properties), ontology.number_of_properties)
        self._checkProperties(properties)

    def test_list_range(self):
        ontology = self.client.ontology("aero")
        terms = ontology.terms({'size': self.client.page_size})

        slice_terms = terms[23:253]
        term_3 = terms[252]

        self.assertEqual(230, len(slice_terms))
        self.assertGreaterEqual(len(terms), 480)
        current = 23
        for term in slice_terms:
            self.assertEqual(term.accession, terms[current].accession)
            current += 1
        self.assertEqual(slice_terms[-1], term_3)
        with self.assertRaises(IndexError):
            error_slice = terms[1:550]
        with self.assertRaises(IndexError):
            current = slice_terms[12555]
        with self.assertRaises(TypeError):
            current = slice_terms['12512']

    def test_list_filters(self):
        """
        Test ontology terms api filtering options
        :return:
        """
        filters = {'short_form': 'DUO_0000024'}
        ontology = self.client.ontology('duo')
        ontologies = self.client.ontologies(
            filters={'fake_filter': 'fake_value'})

        terms = ontology.terms(filters=filters)
        for term in terms:
            self.assertEqual(term.short_form, 'DUO_0000024')
        filters = {'obo_id': 'DUO:0000024'}
        terms = ontology.terms(filters=filters)
        for term in terms:
            self.assertEqual(term.obo_id, 'DUO:0000024')

        filters = {'iri': 'http://purl.obolibrary.org/obo/DUO_0000017'}
        terms = ontology.terms(filters=filters)
        for term in terms:
            self.assertEqual(term.short_form, 'DUO_0000017')
            self.assertEqual(term.obo_id, 'DUO:0000017')
            self.assertEqual(term.iri,
                             'http://purl.obolibrary.org/obo/DUO_0000017')

    def test_terms(self):
        """
        Test direct calls to terms entry point.
        Should warn that test may be long according to the nnumber of terms involved
        :return:
        """
        term_1 = helpers.Term(ontology_name='duo',
                              iri='http://purl.obolibrary.org/obo/DUO_0000026')
        ancestors = term_1.load_relation('ancestors')
        for ancestor in ancestors:
            self._checkTerm(ancestor)
        self._checkTerm(term_1)

    def test_dynamic_links(self):
        term = helpers.Term(ontology_name='aero',
                            iri='http://purl.obolibrary.org/obo/IAO_0000630')
        for relation in term.relations_types:
            related = term.load_relation(relation)
            if related:
                self._checkTerms(related)
        self.assertIn('has_part', term.relations_types)
        term = helpers.Term(ontology_name='aero',
                            iri='http://purl.obolibrary.org/obo/IAO_0000314')
        self.assertIn('part_of', term.relations_types)

    def test_search_simple(self):
        """
        Test Basic, simple query param
        """
        # Local Docker simple tests
        results = self.client.search(query='gene')
        self.assertGreaterEqual(len(results), 8)
        i = 0
        for term in results[0:500]:
            if i == 7:
                term_2 = term
                term_3 = self.client.detail(term)
                self._checkMixed(term_3)
            i += 1

        term_1 = results[7]
        self.assertEqual(term_2, term_1)

    def test_search_filters(self):
        """
        Test Search feature :
        - with dictionary parameter
        - kwargs passed
        """
        # Test search which returns only properties
        properties = self.client.search(query='goslim_chembl',
                                        filters={'type': 'property'})
        for prop in properties:
            self._checkMixed(prop)
            detailed = self.client.detail(prop)
            self._checkMixed(detailed)

    def test_search_kwargs(self):
        """
        Test Search feature : - kwargs passed
        """
        mixed = self.client.search(query='go', type='property')
        self.assertGreaterEqual(len(mixed), 15)

        clazz = []
        for mix in mixed:
            clazz.append(mix.__class__.__name__
                         ) if mix.__class__.__name__ not in clazz else None
        self.assertEqual(len(clazz), 1)
        clazz = []
        # only terms and properties
        mixed = self.client.search(query='date', ontology='aero')
        self.assertGreater(len(mixed), 1)
        for mix in mixed:
            clazz.append(mix.__class__.__name__
                         ) if mix.__class__.__name__ not in clazz else None
        self.assertGreaterEqual(len(clazz), 2)
        # test obsoletes
        mixed = self.client.search(query='BFO_0000005',
                                   ontology='aero',
                                   obsoletes='true',
                                   type='term')
        self.assertGreaterEqual(len(mixed), 1)
        for mix in mixed:
            detailed = self.client.detail(ontology_name='aero',
                                          iri=mix.iri,
                                          type=helpers.Term)
            found_obsolete = detailed.is_obsolete == 1
        self.assertEqual(found_obsolete, True)

        mixed = self.client.search(query='gene',
                                   ontology='aero',
                                   fieldList='iri,label,short_form,obo_id')
        self.assertGreater(len(mixed), 0)
        mixed = self.client.search(query='involves',
                                   type='property',
                                   queryFields='label,logical_description,iri')
        self.assertGreater(len(mixed), 0)
        mixed = self.client.search(
            query='go',
            ontology='aero',
            fieldList={'iri', 'label', 'short_form', 'obo_id'})
        self.assertGreater(len(mixed), 0)
        for mix in mixed:
            if mixed.page > 2:
                break
            self._checkMixed(mix)
        mixed = self.client.search(
            query='definition',
            ontology='duo',
            type='property',
            queryFields={'label', 'logical_description', 'iri'})
        self.assertGreater(len(mixed), 0)

    def test_search_wrong_filters(self):
        with self.assertRaises(exceptions.BadFilters) as ex:
            self.client.search(query='go',
                               type='property,unknown',
                               ontology='aero')
            self.assertIn('type', ex.message)
        with self.assertRaises(exceptions.BadFilters) as ex:
            self.client.search(query='go',
                               type='property,term',
                               ontology='duo',
                               obsoletes='totototo')
            self.assertIn('obsoletes', ex.message)
        with self.assertRaises(exceptions.BadFilters) as ex:
            self.client.search(query='go', ontology='aero', local='1')
            self.assertIn('local', ex.message)
        with self.assertRaises(exceptions.BadFilters) as ex:
            self.client.search(
                query='go',
                ontology='aero',
                fieldList={'iri', 'label', 'wrong_short_form', 'obo_id'})
            self.assertIn('fieldList', ex.message)
        with self.assertRaises(exceptions.BadFilters) as ex:
            self.client.search(
                query='go',
                ontology='duo',
                queryFields={'label', 'logical_description_wrong', 'iri'})
            self.assertIn('queryFields', ex.message)

    def test_individuals(self):
        ontology = self.client.ontology('aero')
        individuals = ontology.individuals()
        self._checkIndividuals(individuals)

    def test_reverse_range(self):
        ontologies = self.client.ontologies()
        sliced = ontologies[2:0]
        self.assertEqual(len(sliced), 2)
        i = 1
        for ontology in sliced:
            self.assertEqual(ontology.ontology_id, ontologies[i].ontology_id)
            i -= 1

        sliced = ontologies[0:2]
        self.assertEqual(len(sliced), 2)
        i = 0
        for ontology in sliced:
            self.assertEqual(ontology.ontology_id, ontologies[i].ontology_id)
            i += 1

    def test_error_search(self):
        properties = self.client.search(query='goslim_yeast',
                                        filters={
                                            'ontology': 'go',
                                            'type': 'property'
                                        })
        for prop in properties:
            details = self.client.detail(ontology_name='go',
                                         iri=prop.iri,
                                         type=helpers.Property)
            self.assertIsNotNone(details)
            self.assertIsNotNone(details.label)

    def test_accessions(self):
        accessions = ['TopObjectProperty', 'SubsetProperty', 'ObsoleteClass']
        for accession in accessions:
            prop = helpers.Property(short_form=accession)
            self.assertIsNone(prop.accession)
        iris = ['http://purl.obolibrary.org/obo/go#gocheck_do_not_annotate']
        for accession in iris:
            prop = helpers.Property(iri=accession)
            self.assertIsNotNone(prop.accession)
            o_property = self.client.property(accession, unique=False)
            self.assertIsNotNone(o_property)
            self._checkProperties(o_property)

    def test_exception_retry(self):
        # unknown URI
        with self.assertRaises(ebi.ols.api.exceptions.NotFoundException):
            h_term = helpers.Term(
                ontology_name='so',
                iri='http://purl.obolibrary.org/obo/SO_99999999')
            self.client.detail(h_term)
        with self.assertRaises(ebi.ols.api.exceptions.NotFoundException):
            self.client.ontology('unexisting_ontology')
        with self.assertRaises(ebi.ols.api.exceptions.BadFilters):
            filters = {'accession': 'EFO:0000405'}
            self.client.terms(filters=filters)
        with self.assertRaises(ebi.ols.api.exceptions.ObjectNotRetrievedError):
            prop = helpers.Property(
                iri=
                'http://purl.obolibrary.org/obo/uberon/insect-anatomy#efo_slim',
                ontology_name='efo')
            self.client.detail(prop)

    def test_namespace(self):
        # retrieved from namespace annotation
        h_term = helpers.Term(ontology_name='aero',
                              iri='http://purl.obolibrary.org/obo/IAO_0000314')
        self.client.detail(h_term)
        self.assertEqual(h_term.namespace, 'aero')

        # retrieved from obo_name_space annotation
        h_term = helpers.Term(ontology_name='duo',
                              iri='http://purl.obolibrary.org/obo/DUO_0000017')
        self.client.detail(h_term)
        self.assertEqual(h_term.namespace, 'duo')

    def test_term_description(self):
        h_term = self.client.detail(iri="http://www.w3.org/2002/07/owl#Thing",
                                    ontology_name='duo',
                                    type=helpers.Term)
        self.assertEqual('', h_term.description)

    def test_properties_retrieval(self):
        subsets = "is quality measurement of"

        s_subsets = self.client.search(query=subsets,
                                       ontology='aero',
                                       type='property',
                                       exact='true')
        seen = set()
        self.assertEqual(len(s_subsets), 1)
        subset = s_subsets[0]
        d_subset = self.client.property(identifier=subset.iri)
        self.assertEqual(d_subset.definition, subset.definition)
        self.assertEqual(d_subset.accession, 'IAO:0000221')

    def test_term_definition(self):
        o_term = self.client.detail(
            iri="http://purl.obolibrary.org/obo/BFO_0000015",
            ontology_name='bfo',
            type=helpers.Term)
        self.assertEqual(o_term.description,
                         o_term.obo_definition_citation[0]['definition'])
        self.assertEqual(
            o_term.description,
            'p is a process = Def. p is an occurrent that has temporal proper parts '
            'and for some time t, p s-depends_on some material entity at t. (axiom '
            'label in BFO2 Reference: [083-003])')
        self.assertEqual(o_term.label, 'process')

    def testSlicingWithPageSize(self):
        # terms_client = BfoClientMixin(self.ols_api_url)
        self.client = OlsClient(base_site=self.ols_api_url, page_size=10)
        ontology = self.client.ontology('bfo')
        terms = ontology.terms()
        self.assertEqual(terms.page_size, 10)

    def testPRPagesFailures(self):
        class BfoClientMixin(ListClientMixin):
            count_call = 0

            def fetch_document(self,
                               path,
                               params=None,
                               filters=None,
                               base_document=None):
                BfoClientMixin.count_call = BfoClientMixin.count_call + 1
                return super().fetch_document(path, params, filters,
                                              base_document)

        terms_list_client = BfoClientMixin('/'.join(
            ['https://www.ebi.ac.uk/ols/api', 'ontologies', 'pr']),
                                           helpers.Term,
                                           page_size=100)
        terms = terms_list_client()
        for term in terms[220:520]:
            logger.info("Current term: %s", term)
        self.assertEqual(terms_list_client.count_call, 4)