def _get_mp_classes(self, mp_uri): #self._logger.debug("_get_mp_classes") #load the onotology self.mp_ontology = OntologyClassReader() opentargets_ontologyutils.mp.load_mammalian_phenotype_ontology(self.mp_ontology, mp_uri) #TODO this is a moderately hideous bit of pointless munging, but I don't have time fix it now! for mp_id,label in list(self.mp_ontology.current_classes.items()): mp_class = {} mp_class["label"] = label if mp_id not in self.mp_ontology.classes_paths: self._logger.warning("cannot find paths for "+mp_id) continue mp_class["path"] = self.mp_ontology.classes_paths[mp_id]['all'] mp_class["path_codes"] = self.mp_ontology.classes_paths[mp_id]['ids'] mp_id_key = mp_id.split("/")[-1].replace(":", "_") self.mps[mp_id_key] = mp_class self.mp_labels[mp_class["label"]] = mp_id self.mp_to_label[mp_id] = mp_class["label"] paths = [] for path in mp_class["path"]: item = path[0] paths.append(item) self.top_levels[mp_id] = paths
def _process_ontology_data(self): self.disease_ontology = OntologyClassReader() opentargets_ontologyutils.efo.load_open_targets_disease_ontology( self.disease_ontology, self.efo_uri) ''' Get all phenotypes ''' #becuse of opentargets_ontologyutils for legacy iterates over key,uri pairs disease_phenotype_uris_counter = enumerate(self.disease_phenotype_uris) utils = DiseaseUtils() disease_phenotypes = utils.get_disease_phenotypes( self.disease_ontology, self.hpo_uri, self.mp_uri, disease_phenotype_uris_counter) for uri, label in self.disease_ontology.current_classes.items(): properties = self.disease_ontology.parse_properties(URIRef(uri)) #create a text block definition/description by joining others together definition = '' if 'http://purl.obolibrary.org/obo/IAO_0000115' in properties: definition = ". ".join( properties['http://purl.obolibrary.org/obo/IAO_0000115']) synonyms = [] if 'http://www.ebi.ac.uk/efo/alternative_term' in properties: synonyms = properties[ 'http://www.ebi.ac.uk/efo/alternative_term'] phenotypes = [] if uri in disease_phenotypes: phenotypes = disease_phenotypes[uri]['phenotypes'] therapeutic_labels = [ item[0] for item in self.disease_ontology.classes_paths[uri]['labels'] ] therapeutic_labels = self._remove_duplicates(therapeutic_labels) efo = EFO( code=uri, label=label, synonyms=synonyms, phenotypes=phenotypes, path=self.disease_ontology.classes_paths[uri]['all'], path_codes=self.disease_ontology.classes_paths[uri]['ids'], path_labels=self.disease_ontology.classes_paths[uri]['labels'], therapeutic_labels=therapeutic_labels, definition=definition) id = self.disease_ontology.classes_paths[uri]['ids'][0][-1] if uri in self.disease_ontology.children: efo.children = self.disease_ontology.children[uri] self.efos[id] = efo
def __init__(self, es_hosts, es_index, es_mappings, es_settings, eco_uri, so_uri, workers_write, queue_write): self.es_hosts = es_hosts self.es_index = es_index self.es_mappings = es_mappings self.es_settings = es_settings self.eco_uri = eco_uri self.so_uri = so_uri self.workers_write = workers_write self.queue_write = queue_write self.ecos = OrderedDict() self.evidence_ontology = OntologyClassReader()
def _process_ontology_data(self): self.disease_ontology = OntologyClassReader() opentargets_ontologyutils.efo.load_open_targets_disease_ontology(self.disease_ontology, self.efo_uri) ''' Get all phenotypes ''' #becuse of opentargets_ontologyutils for legacy iterates over key,uri pairs disease_phenotype_uris_counter = enumerate(self.disease_phenotype_uris) utils = DiseaseUtils() disease_phenotypes = utils.get_disease_phenotypes(self.disease_ontology, self.hpo_uri, self.mp_uri, disease_phenotype_uris_counter) #for uri,label in self.disease_ontology.current_classes.items(): for uri in self.disease_ontology.classes_paths: #get the short code form of the uri classes_path = self.disease_ontology.classes_paths[uri] id = classes_path['ids'][0][-1] label = classes_path['labels'][0][-1] if uri != classes_path["all"][0][-1]["uri"]: raise RuntimeError('mismatch between uri and classes_path["all"][0][-1]["uri"] %s %s' % (uri, classes_path["all"][0][-1]["uri"])) properties = self.disease_ontology.parse_properties(URIRef(uri)) #create a text block definition/description by joining others together definition = '' if 'http://purl.obolibrary.org/obo/IAO_0000115' in properties: definition = ". ".join(properties['http://purl.obolibrary.org/obo/IAO_0000115']) #build a set of all the relevant synonyms synonyms = set() #exact synonyms if 'http://www.geneontology.org/formats/oboInOwl#hasExactSynonym' in properties: synonyms.update(properties['http://www.geneontology.org/formats/oboInOwl#hasExactSynonym']) #related synonyms (partially overlapping) if 'http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym' in properties: synonyms.update(properties['http://www.geneontology.org/formats/oboInOwl#hasRelatedSynonym']) #generic synoynms if 'http://www.geneontology.org/formats/oboInOwl#hasSynonym' in properties: synonyms.update(properties['http://www.geneontology.org/formats/oboInOwl#hasSynonym']) #narrow synonyms if 'http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym' in properties: synonyms.update(properties['http://www.geneontology.org/formats/oboInOwl#hasNarrowSynonym']) #could have http://www.geneontology.org/formats/oboInOwl#hasBroadSynonym, but that is better captured by parent term phenotypes = [] if uri in disease_phenotypes: phenotypes = disease_phenotypes[uri]['phenotypes'] if uri not in self.disease_ontology.classes_paths: self.logger.warning("Unable to find %s", uri) continue therapeutic_labels = self.disease_ontology.therapeutic_labels[uri] therapeutic_uris = self.disease_ontology.therapeutic_uris[uri] therapeutic_codes = [self.disease_ontology.classes_paths[ta_uri]['ids'][0][-1] for ta_uri in therapeutic_uris] efo = EFO(code=uri, label=label, synonyms=synonyms, phenotypes=phenotypes, path=classes_path['all'], path_codes=classes_path['ids'], path_labels=classes_path['labels'], therapeutic_labels=therapeutic_labels, therapeutic_codes=therapeutic_codes, definition=definition ) if uri in self.disease_ontology.children: efo.children = self.disease_ontology.children[uri] #logger.debug(str(classes_path['ids'])) self.logger.debug("done %s %s %s", id, uri, label) if id in self.efos: self.logger.warning("duplicate %s", id) continue self.efos[id] = efo
def __init__(self, loader, eco_uri, so_uri): self.loader = loader self.ecos = OrderedDict() self.evidence_ontology = OntologyClassReader() self.eco_uri = eco_uri self.so_uri = so_uri