def _get_phenotypicseries_parents(entry, g): """ Extract the phenotypic series parent relationship out of the entry :param entry: :return: """ gu = GraphUtils(curie_map.get()) omimid = 'OMIM:'+str(entry['mimNumber']) # the phenotypic series mappings serieslist = [] if 'phenotypicSeriesExists' in entry: if entry['phenotypicSeriesExists'] is True: if 'phenotypeMapList' in entry: phenolist = entry['phenotypeMapList'] for p in phenolist: serieslist.append(p['phenotypeMap']['phenotypicSeriesNumber']) if 'geneMap' in entry and 'phenotypeMapList' in entry['geneMap']: phenolist = entry['geneMap']['phenotypeMapList'] for p in phenolist: if 'phenotypicSeriesNumber' in p['phenotypeMap']: serieslist.append(p['phenotypeMap']['phenotypicSeriesNumber']) # add this entry as a subclass of the series entry for ser in serieslist: series_id = 'OMIM:'+ser gu.addClassToGraph(g, series_id, None) gu.addSubclass(g, series_id, omimid) return
class Pathway(): """ This provides convenience methods to deal with gene and protein collections in the context of pathways. """ pathway_parts = { 'signal_transduction': 'GO:0007165', 'cellular_process': 'GO:0009987', 'pathway': 'PW:0000001', 'gene_product': 'CHEBI:33695' # bioinformation molecule } object_properties = { 'involved_in': 'RO:0002331', 'gene_product_of': 'RO:0002204', 'has_gene_product': 'RO:0002205' } properties = object_properties.copy() def __init__(self, graph, nobnodes=False): self.gu = GraphUtils(curie_map.get()) self.graph = graph self.nobnodes = nobnodes self.gu.loadProperties(self.graph, self.object_properties, self.gu.OBJPROP) return def addPathway( self, pathway_id, pathway_label, pathway_type=None, pathway_description=None): """ Adds a pathway as a class. If no specific type is specified, it will default to a subclass of "GO:cellular_process" and "PW:pathway". :param pathway_id: :param pathway_label: :param pathway_type: :param pathway_description: :return: """ if pathway_type is None: pathway_type = self.pathway_parts['cellular_process'] self.gu.addClassToGraph( self.graph, pathway_id, pathway_label, pathway_type, pathway_description) self.gu.addSubclass( self.graph, self.pathway_parts['pathway'], pathway_id) return def addGeneToPathway(self, pathway_id, gene_id): """ When adding a gene to a pathway, we create an intermediate 'gene product' that is involved in the pathway, through a blank node. gene_id RO:has_gene_product _gene_product _gene_product RO:involved_in pathway_id :param pathway_id: :param gene_id: :return: """ gene_product = '_'+re.sub(r':', '', gene_id)+'product' if self.nobnodes: gene_product = ':'+gene_product self.gu.addIndividualToGraph( self.graph, gene_product, None, self.pathway_parts['gene_product']) self.gu.addTriple( self.graph, gene_id, self.object_properties['has_gene_product'], gene_product) self.addComponentToPathway(pathway_id, gene_product) return def addComponentToPathway(self, pathway_id, component_id): """ This can be used directly when the component is directly involved in the pathway. If a transforming event is performed on the component first, then the addGeneToPathway should be used instead. :param pathway_id: :param component_id: :return: """ self.gu.addTriple(self.graph, component_id, self.object_properties['involved_in'], pathway_id) return
def _get_equivids(self, limit): """ The file processed here is of the format: #NBK_id GR_shortname OMIM NBK1103 trimethylaminuria 136132 NBK1103 trimethylaminuria 602079 NBK1104 cdls 122470 Where each of the rows represents a mapping between a gr id and an omim id. These are a 1:many relationship, and some of the omim ids are genes(not diseases). Therefore, we need to create a loose coupling here. We make the assumption that these NBKs are generally higher-level grouping classes; therefore the OMIM ids are treated as subclasses. (This assumption is poor for those omims that are actually genes, but we have no way of knowing what those are here... we will just have to deal with that for now.) :param limit: :return: """ raw = '/'.join((self.rawdir, self.files['idmap']['file'])) gu = GraphUtils(curie_map.get()) line_counter = 0 # we look some stuff up in OMIM, so initialize here omim = OMIM() id_map = {} allomimids = set() with open(raw, 'r', encoding="utf8") as csvfile: filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"') for row in filereader: line_counter += 1 if line_counter == 1: # skip header continue (nbk_num, shortname, omim_num) = row gr_id = 'GeneReviews:'+nbk_num omim_id = 'OMIM:'+omim_num if not ( (self.testMode and len(self.test_ids) > 0 and omim_id in self.test_ids) or not self.testMode): continue # sometimes there's bad omim nums if len(omim_num) > 6: logger.warning( "OMIM number incorrectly formatted " + "in row %d; skipping:\n%s", line_counter, '\t'.join(row)) continue # build up a hashmap of the mappings; then process later if nbk_num not in id_map: id_map[nbk_num] = set() id_map[nbk_num].add(omim_num) # add the class along with the shortname gu.addClassToGraph(self.graph, gr_id, None) gu.addSynonym(self.graph, gr_id, shortname) allomimids.add(omim_num) if not self.testMode and \ limit is not None and line_counter > limit: break # end looping through file # get the omim ids that are not genes entries_that_are_phenotypes = \ omim.process_entries( list(allomimids), filter_keep_phenotype_entry_ids, None, None, limit) logger.info("Filtered out %d/%d entries that are genes or features", len(allomimids)-len(entries_that_are_phenotypes), len(allomimids)) for nbk_num in self.book_ids: gr_id = 'GeneReviews:'+nbk_num if nbk_num in id_map: omim_ids = id_map.get(nbk_num) for omim_num in omim_ids: omim_id = 'OMIM:'+omim_num # add the gene reviews as a superclass to the omim id, # but only if the omim id is not a gene if omim_id in entries_that_are_phenotypes: gu.addClassToGraph(self.graph, omim_id, None) gu.addSubclass(self.graph, gr_id, omim_id) # add this as a generic subclass of DOID:4 gu.addSubclass(self.graph, 'DOID:4', gr_id) return