def testPRErrors(self): class TermLoader(OLSTermsLoader): def __init__(self, d): self._BaseRunnable__params = eHive.Params.ParamContainer(d) self.input_job = Job() self.input_job.transient_error = True self.debug = 1 params_set = { 'ontology_name': 'pr', 'db_url': self.db_url, 'output_dir': log_dir, 'verbosity': '4', '_start_term_index': 1000, '_end_term_index': 1999, 'ols_api_url': self.ols_api_url, 'allowed_ontologies': ['PR'], 'page_size': 100 } term_loader = TermLoader(params_set) term_loader.run() with dal.session_scope() as session: self.assertIsNotNone( session.query(Ontology).filter_by(name='PR').one())
def testSingleTerm(self): self.loader.options['process_relations'] = True self.loader.options['process_parents'] = True with dal.session_scope() as session: m_ontology = self.loader.load_ontology('fypo', session) session.add(m_ontology) term = helpers.Term( ontology_name='fypo', iri='http://purl.obolibrary.org/obo/FYPO_0000257') o_term = self.client.detail(term) m_term = self.loader.load_term(o_term, m_ontology, session) session.commit() self.assertGreaterEqual(len(m_term.parent_terms), 1) self.loader.options['process_relations'] = False self.loader.options['process_parents'] = False o_ontology = self.client.ontology('GO') term = helpers.Term( ontology_name='GO', iri='http://purl.obolibrary.org/obo/GO_0000002') o_term = self.client.detail(term) m_term = self.loader.load_term(o_term, o_ontology, session) self.assertEqual(m_term.ontology.name, 'GO') with self.assertRaises(RuntimeError): self.loader.load_term(o_term, 33, session)
def testGoExpectedLinks(self): go_term = [ 'GO_0005575', 'GO_0003674', 'GO_0008150', ] self.loader.options['process_relations'] = False self.loader.options['process_parents'] = True with dal.session_scope() as session: terms = self.loader.load_ontology_terms('GO', 0, 20) ontologies = session.query(Ontology).filter_by(name='GO') namespaces = [onto.namespace for onto in ontologies] self.assertSetEqual( set([ 'go', 'biological_process', 'cellular_component', 'molecular_function' ]), set(namespaces)) GO_0005575 = session.query(Term).filter_by( accession='GO:0005575').one() GO_0003674 = session.query(Term).filter_by( accession='GO:0003674').one() GO_0008150 = session.query(Term).filter_by( accession='GO:0008150').one() self.assertEqual('biological_process', GO_0008150.ontology.namespace) self.assertEqual('cellular_component', GO_0005575.ontology.namespace) self.assertEqual('molecular_function', GO_0003674.ontology.namespace)
def testGoTerm(self): self.loader.options['process_relations'] = True self.loader.options['process_parents'] = True with dal.session_scope() as session: o_term = self.client.detail( iri="http://purl.obolibrary.org/obo/GO_0030118", ontology_name='GO', type=helpers.Term) m_term = self.loader.load_term(o_term, 'GO', session) session.add(m_term) self.assertIn( 'GO:0030117', [rel.parent_term.accession for rel in m_term.parent_terms]) o_term = self.client.detail( iri="http://purl.obolibrary.org/obo/GO_0030131", ontology_name='GO', type=helpers.Term) m_term = self.loader.load_term(o_term, 'GO', session) session.add(m_term) self.assertIn('GO:0030119', [ rel.parent_term.accession for rel in m_term.parent_terms if rel.relation_type.name == 'is_a' ]) self.assertIn('GO:0030118', [ rel.parent_term.accession for rel in m_term.parent_terms if rel.relation_type.name == 'part_of' ])
def run(self): options = param_defaults() options['wipe'] = self.param('wipe_one') self.input_job.transient_error = False options['ols_api_url'] = self.param('ols_api_url') options['page_size'] = self.param('page_size') options['output_dir'] = self.param('output_dir') ols_loader = OlsLoader(self.param_required('db_url'), **options) # TODO update options with loader params logging.basicConfig(level=log_levels.get(self.param('verbosity'), logging.ERROR), datefmt='%m-%d %H:%M:%S') logger = ols_loader.get_ontology_logger( self.param_required('ontology_name')) logger.info('Loading ontology info %s', self.param_required('ontology_name')) if self.param_required('wipe_one') == 1: logger.info("Wiping existing ontology data %s", self.param_required('ontology_name')) ols_loader.wipe_ontology(self.param_required('ontology_name')) if not self.param_required( 'ontology_name').upper() in ols_loader.allowed_ontologies: raise JobFailedException("Ontology %s not implemented" % self.param_required('ontology_name')) with dal.session_scope() as session: m_ontology = ols_loader.load_ontology( self.param_required('ontology_name'), session=session) session.add(m_ontology) self.dataflow({ "ontology_name": self.param_required('ontology_name'), "nb_terms": m_ontology.number_of_terms })
def load_ontology_terms(self, ontology, start=None, end=None): nb_terms = 0 nb_terms_ignored = 0 o_ontology = self.client.ontology(identifier=ontology) if o_ontology: self.current_ontology = o_ontology.ontology_id.upper() if start is not None and end is not None: logger.info('Loading terms slice [%s, %s]', start, end) logger.info('-----------------------------------------') terms = o_ontology.terms()[start:end] logger.info('Slice len %s', len(terms)) report_msg = ('- Loading %s terms slice [%s:%s]', ontology, start, end) else: terms = o_ontology.terms() logger.info('Loading %s terms for %s', len(terms), o_ontology.ontology_id.upper()) report_msg = ('- Loading all terms (%s)', len(terms)) with dal.session_scope() as session: for o_term in terms: logger.info('Term namespace %s', o_term.namespace) if o_term.is_defining_ontology and has_accession(o_term): m_ontology, created = get_one_or_create( Ontology, session, name=self.current_ontology, namespace=o_term.namespace, create_method_kwargs=dict( version=o_ontology.version, title=o_ontology.title)) logger.debug('Loaded term (from OLS) %s', o_term) logger.debug( 'Adding/Retrieving namespaced ontology %s', o_term.namespace) logger.debug('Ontology namespace %s %s', m_ontology.name, m_ontology.namespace) if m_ontology.namespace != o_term.namespace: logger.warning( 'discrepancy term/ontology namespace') logger.warning('term:', o_term) logger.warning('ontology:', o_ontology) term = self.load_term(o_term, m_ontology, session) if term: session.add(term) nb_terms += 1 else: logger.info('Ignored term [%s:%s]', o_term.is_defining_ontology, o_term.short_form) nb_terms_ignored += 1 self.report(*report_msg) self.report('- Expected %s terms (defined in ontology)', nb_terms) self.report('- Ignored %s terms (not defined in ontology)', nb_terms_ignored) return nb_terms, nb_terms_ignored else: logger.warn('Ontology not found %s', ontology) return 0, 0
def testSubsetErrors(self): with dal.session_scope() as session: o_term = self.client.term( identifier='http://www.ebi.ac.uk/efo/EFO_0003503') m_term = self.loader.load_term(o_term, 'EFO', session) session.add(m_term) self.assertIsInstance( session.query(Subset).filter_by(name='efo_slim').one(), Subset)
def testMissingOboId(self): self.loader.options['process_relations'] = False self.loader.options['process_parents'] = False with dal.session_scope() as session: o_term = self.client.term( identifier='http://purl.obolibrary.org/obo/PR_P68993', unique=True, silent=True) m_term = self.loader.load_term(o_term, 'PR', session) self.assertEqual(m_term.accession, 'PR:P68993')
def testRelationsShips(self): with dal.session_scope() as session: m_ontology = self.loader.load_ontology('bto', session) session.add(m_ontology) term = helpers.Term( ontology_name='bto', iri='http://purl.obolibrary.org/obo/BTO_0000005') o_term = self.client.detail(term) m_term = self.loader.load_term(o_term, m_ontology, session) session.add(m_term) self.assertGreaterEqual(len(m_term.parent_terms), 0)
def testTermNoDefinition(self): ''' Term does not declared a definition neither within annotation, label is therefore inserted ''' self.loader.options['process_relations'] = False self.loader.options['process_parents'] = False with dal.session_scope() as session: o_term = self.client.detail( iri="http://purl.obolibrary.org/obo/MONDO_0020003", ontology_name='MONDO', type=helpers.Term) m_term = self.loader.load_term(o_term, 'MONDO', session) self.assertEqual(m_term.name, m_term.description.lower())
def run(self): self.input_job.transient_error = False logger.info('Loading PHIBASe Identifier terms') with dal.session_scope() as session: # delete phi-base-identifier namespaces ontology if self.param_required('_start_term_index') == 0: # only delete for first chunk ontologies = session.query(Ontology).filter_by( name='phi', namespace='phibase_identifier').all() for ontology in ontologies: logger.info('Deleting namespaced ontology %s - %s', ontology.name, ontology.namespace) rel = session.query(Relation).filter_by( ontology=ontology).delete() logger.info('Wiped %s Relations', rel) res = session.query(Term).filter_by( ontology=ontology).delete() logger.info('Wiped %s Terms', res) logger.debug('...Done') m_ontology, created = get_one_or_create( Ontology, session, name='phi', namespace='phibase_identifier', create_method_kwargs=dict(version='1.0', title='PHI-base Identifiers')) relation_type, created = get_one_or_create(RelationType, session, name='is_a') for i in range(self.param_required('_start_term_index'), self.param_required('_end_term_index') + 1): accession = 'PHI:{}'.format(i) term = Term(accession=accession, name='{}'.format(i)) if i == 0: term.name = 'phibase identifier' term.is_root = 1 m_related = term else: m_related = session.query(Term).filter_by( accession='PHI:0').one() logger.debug('Adding Term %s', accession) session.add(term) m_ontology.terms.append(term) if i != 0: term.add_parent_relation(m_related, relation_type, session) else: m_related = term if i % 100 == 0: logger.info('Committing transaction') session.commit()
def testRelationOtherOntology(self): self.loader.options['process_relations'] = True self.loader.options['process_parents'] = True with dal.session_scope() as session: m_ontology = self.loader.load_ontology('efo', session) session.add(m_ontology) term = helpers.Term(ontology_name='efo', iri='http://www.ebi.ac.uk/efo/EFO_0002215') o_term = self.client.detail(term) m_term = self.loader.load_term(o_term, m_ontology, session) session.add(m_term) self.assertGreaterEqual(session.query(Ontology).count(), 2) term = session.query(Term).filter_by(accession='BTO:0000164') self.assertEqual(1, term.count())
def testPartOfRelationship(self): self.loader.options['process_relations'] = True self.loader.options['process_parents'] = False with dal.session_scope() as session: o_term = self.client.detail( iri="http://purl.obolibrary.org/obo/GO_0032042", ontology_name='GO', type=helpers.Term) m_term = self.loader.load_term(o_term, 'GO', session) self.assertIn('part_of', o_term.relations_types) self.assertIn('part_of', [ relation.relation_type.name for relation in m_term.parent_terms ]) self.assertIn('occurs_in', [ relation.relation_type.name for relation in m_term.parent_terms ])
def testTermInvalidDefinition(self): ''' Term has invalid characters in the definition (e.g. "\\n") ''' self.loader.options['process_relations'] = False self.loader.options['process_parents'] = False with dal.session_scope() as session: o_term = self.client.detail( iri="http://purl.obolibrary.org/obo/GO_0090481", ontology_name='GO', type=helpers.Term) if '\n' not in o_term.description: self.skipTest( "Term Description does not contain invalid characters.") else: m_term = self.loader.load_term(o_term, 'GO', session) self.assertNotIn('\n', m_term.description)
def testTrickTerm(self): self.loader.options['process_relations'] = True self.loader.options['process_parents'] = True with dal.session_scope() as session: # o_term = helpers.Term(ontology_name='fypo', iri='http://purl.obolibrary.org/obo/FYPO_0001330') o_term = self.client.term( identifier='http://purl.obolibrary.org/obo/FYPO_0001330', unique=True, silent=True) m_term = self.loader.load_term(o_term, 'fypo', session) session.add(m_term) found = False print(m_term.parent_terms) for relation in m_term.parent_terms: found = found or (relation.parent_term.accession == 'FYPO:0001323') self.assertTrue(found)
def init_meta(self): with dal.session_scope() as session: prev_version = int(self.options.get('db_version')) - 1 metas = { 'schema_version': self.options.get('db_version'), 'schema_type': 'ontology', 'patch': 'patch_{}_{}_a.sql|schema version'.format( prev_version, self.options.get('db_version')) } for meta_key, meta_value in metas.items(): get_one_or_create( Meta, session, meta_key=meta_key, create_method_kwargs=dict(meta_value=meta_value)) self.db_init = True
def init_schema(db_url, **options): dal.db_init(db_url, **options) dal.create_schema() db_version = options.get('ens_version', 99) with dal.session_scope() as session: metas = { 'schema_version': db_version, 'schema_type': 'ontology', 'patch': 'patch_{}_{}_a.sql|schema version'.format(db_version - 1, db_version) } for meta_key, meta_value in metas.items(): get_one_or_create(Meta, session, meta_key=meta_key, create_method_kwargs=dict(meta_value=meta_value))
def testLongTermDefinition(self): self.loader.options['process_relations'] = True self.loader.options['process_parents'] = True with dal.session_scope() as session: o_term = self.client.detail( iri="http://purl.obolibrary.org/obo/UBERON_0000948", ontology_name='UBERON', type=helpers.Term) m_term = self.loader.load_term(o_term, 'UBERON', session) for syn in m_term.synonyms: self.assertNotEqual(syn.name, '') o_term = self.client.detail( iri="http://purl.obolibrary.org/obo/MONDO_0004933", ontology_name='MONDO', type=helpers.Term) m_term = self.loader.load_term(o_term, 'MONDO', session) for syn in m_term.synonyms: self.assertNotEqual(syn.name, '')
def testExternalRelationship(self): self.loader.options['process_relations'] = True self.loader.options['process_parents'] = True with dal.session_scope() as session: o_term = self.client.term( identifier='http://www.ebi.ac.uk/efo/EFO_0002911', unique=True, silent=True) m_term = self.loader.load_term(o_term, 'EFO', session) session.add(m_term) found = False for relation in m_term.parent_terms: found = found or (relation.parent_term.accession == 'OBI:0000245') self.assertTrue(found) session = dal.get_session() ontologies = session.query(Ontology).filter_by(name='OBI').count() # assert that OBI has not been inserted self.assertEqual(0, ontologies)
def run(self): # False => erreur marque le job en failed, i.e pas de retry self.input_job.transient_error = False logger.info('Loading ontology info %s', self.param_required('ontology_name')) if self.param_required('wipe_one') == 1: self.ols_loader.wipe_ontology(self.param_required('ontology_name')) assert self.param_required( 'ontology_name').upper() in self.ols_loader.allowed_ontologies with dal.session_scope() as session: m_ontology = self.ols_loader.load_ontology( self.param_required('ontology_name'), session=session) session.add(m_ontology) self.dataflow({ 'ontology_name': self.param_required('ontology_name'), 'nb_terms': m_ontology.number_of_terms })
def testSubsets(self): self.loader.options['process_relations'] = False self.loader.options['process_parents'] = False with dal.session_scope() as session: term = helpers.Term( ontology_name='go', iri='http://purl.obolibrary.org/obo/GO_0099565') o_term = self.client.detail(term) m_term = self.loader.load_term(o_term, 'go', session) session.add(m_term) subsets = session.query(Subset).all() for subset in subsets: self.assertIsNotNone(subset.definition) subset = helpers.Property( ontology_name='go', iri= 'http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym') details = self.client.detail(subset) self.assertNotEqual(details.definition, '')
'drop': not arguments.keep, 'echo': arguments.verbose, 'db_version': arguments.release } if arguments.host_url is None: db_url = 'sqlite:///' + expanduser("~") + '/' + db_name + '.sqlite' options.update({'pool_size': None}) else: db_url = rreplace( '{}/{}?charset=utf8'.format(arguments.host_url, db_name), '//', '/', 1) print('Db Url set to:', db_url) print('Loader arguments:', db_url, options) response = input("Confirm to proceed (y/N)? ") if response.upper() != 'Y': logging.info('Process cancelled') exit(0) loader = OlsLoader(db_url, **options) if not arguments.keep: logger.info('Wiping %s ontology', arguments.ontology) loader.wipe_ontology(ontology_name=arguments.ontology) logger.info('Ontology %s reset', arguments.ontology) logger.info('Loading ontology %s', arguments.ontology) with dal.session_scope() as session: n_terms, n_ignored = loader.load_ontology_terms( arguments.ontology, 0, 20) logger.info('...Done')
def load_ontology_terms(self, ontology, start=None, end=None): nb_terms = 0 nb_terms_ignored = 0 o_ontology = self.client.ontology(identifier=ontology) terms_log = self.get_term_logger(ontology, start, end) report = self.get_ontology_logger(ontology) if o_ontology: self.current_ontology = o_ontology.ontology_id.upper() if start is not None and end is not None: terms_log.info('Loading terms slice [%s, %s]', start, end) # TODO move this slice fix into ols-client when dealing with discrepancies between number of terms # between ontology / terms api calls max_terms = len(o_ontology.terms()) - 1 min_end = min(end, max_terms) terms_log.debug('Which resolve to [%s, %s]', start, min_end) terms_log.info('-----------------------------------------') if min_end < start: terms_log.warning("Wrong slice order.min:%s max:%s ", start, min_end) # skip this chunk return None, None terms = o_ontology.terms()[start:min_end] terms_log.info('Slice len %s', len(terms)) report.info('- Loading %s terms slice [%s:%s]', ontology, start, end) else: terms = o_ontology.terms() terms_log.info('Loading %s terms for %s', len(terms), o_ontology.ontology_id.upper()) report.info('- Loading all terms (%s)', len(terms)) with dal.session_scope() as session: for o_term in terms: if o_term.is_defining_ontology and has_accession(o_term): terms_log.debug('Term %s', o_term) m_ontology, created = get_one_or_create( Ontology, session, name=self.current_ontology, namespace=o_term.namespace, create_method_kwargs=dict( version=o_ontology.version, title=o_ontology.title)) terms_log.debug('Loaded term (from OLS) %s', o_term) terms_log.debug( 'Adding/Retrieving namespaced ontology %s', o_term.namespace) terms_log.debug('Ontology namespace %s %s', m_ontology.name, m_ontology.namespace) if m_ontology.namespace != o_term.namespace: terms_log.warning( 'discrepancy term/ontology namespace') terms_log.warning('term:', o_term) terms_log.warning('ontology:', o_ontology) term = self.load_term(o_term, m_ontology, session) if term: session.add(term) nb_terms += 1 else: terms_log.info('Ignored term [%s:%s]', o_term.is_defining_ontology, o_term.short_form) nb_terms_ignored += 1 terms_log.info( '- Expected %s terms (defined in accepted ontology)', nb_terms) terms_log.info( '- Ignored %s terms (not defined in accepted ontology)', nb_terms_ignored) return nb_terms, nb_terms_ignored else: report.info('Ontology not found %s', ontology) terms_log.warning('Ontology not found %s', ontology) return 0, 0
def wipe_ontology(ontology_name): """ Completely remove all ontology related data from DBs :param ontology_name: specified ontology short name :return: boolean whether or not Ontology has been successfully deleted """ with dal.session_scope() as session: logger.info('Wipe ontology %s', ontology_name) try: metas = session.query(Meta).filter( Meta.meta_key.like("%" + ontology_name + "%")).all() for meta in metas: logger.debug('Deleted meta %s', meta) session.delete(meta) ontologies = session.query(Ontology).filter_by( name=ontology_name.upper()).all() for ontology in ontologies: logger.info('Deleting namespaced ontology %s - %s', ontology.name, ontology.namespace) res = session.query(Synonym).filter( Synonym.term_id == Term.term_id, Term.ontology_id == ontology.id).delete( synchronize_session=False) logger.info('Wiped %s synonyms', res) rel = session.query(Relation).filter( Relation.child_term_id == Term.term_id, Term.ontology_id == ontology.id).delete( synchronize_session=False) rel2 = session.query(Relation).filter( Relation.parent_term_id == Term.term_id, Term.ontology_id == ontology.id).delete( synchronize_session=False) logger.info('Wiped %s Relations', rel + rel2) clo = session.query(Closure).filter( Closure.child_term_id == Term.term_id, Term.ontology_id == ontology.id).delete( synchronize_session=False) clo1 = session.query(Closure).filter( Closure.parent_term_id == Term.term_id, Term.ontology_id == ontology.id).delete( synchronize_session=False) clo2 = session.query(Closure).filter( Closure.subparent_term_id == Term.term_id, Term.ontology_id == ontology.id).delete( synchronize_session=False) logger.info('Wiped %s Closure', clo + clo1 + clo2) res = session.query(AltId).filter( AltId.term_id == Term.term_id, Term.ontology_id == ontology.id).delete( synchronize_session=False) logger.info('Wiped %s AltIds', res) res = session.query(Term).filter( Term.ontology_id == ontology.id).delete( synchronize_session=False) logger.info('Wiped %s Terms', res) session.delete(ontology) logger.debug('...Done') return True except NoResultFound: logger.error('Ontology %s not found !', ontology_name) return False