ncitObo = GeneOntology(dataDir + "miRExplore/obodir/ncit.obo") ncitTerm2Sym = NcitTermSymbolDB.loadFromFolder() vAllSyns = [] for termID in ncitObo.dTerms: oboNode = ncitObo.dTerms[termID] oboID = oboNode.id oboName = oboNode.name oboSyns = oboNode.synonym oboRels = oboNode.is_a newSyn = Synonym(oboID) newSyn.addSyn(oboName) if oboSyns != None: for x in oboSyns: newSyn.addSyn(x.syn) allOrgs = [x for x in ncitTerm2Sym.org_term2symbol] for org in allOrgs: ncitID = oboID[oboID.index(":") + 1:] if ncitID in ncitTerm2Sym.org_term2symbol[org]: orgSyms = ncitTerm2Sym.org_term2symbol[org][ncitID]
if x.startswith("EC "): continue if x.upper() in ["TH1"]: continue allSyms.append(x) printID = mgiID.replace(':', '_', 1) if printID in locID2sym and len(locID2sym[printID]) > 0: allSyms.append(mgiID) allSyms.append(mgiID.replace(':', '_')) printID = locID2sym[printID] synline = printID + ":" + "|".join(allSyms) synonyme = Synonym.parseFromLine(synline) vAllSyns.append(synonyme) for syn in vAllSyns: removeSyns = [] for synword in syn.syns: if len(synword) == 1: removeSyns.append(synword) if len(removeSyns) > 0: print(syn.id, removeSyns) syn.removeSyn(removeSyns)
bodypartsObo = GeneOntology( dataDir + "miRExplore/foundational_model_anatomy/fma_obo.obo") vAllSyns = [] for cellID in bodypartsObo.dTerms: oboNode = bodypartsObo.dTerms[cellID] oboID = oboNode.id oboName = oboNode.name oboSyns = oboNode.synonym oboRels = oboNode.is_a newSyn = Synonym(oboID) newSyn.addSyn(oboName) aName = oboName.split(' ') if len(aName) > 1 and len(aName) < 5: acro = "" if aName[-1].upper() == 'CELL': acro = "".join([x[0].upper() for x in aName]) newSyn.addSyn(acro) if oboSyns != None: for x in oboSyns: newSyn.addSyn(x.syn)
aline = [x.strip() for x in line.split('\t')] name = aline[1] altNames = StringIO() altNames.write(aline[2] + "\n") names = [] for line in csv.reader([aline[2]], dialect='phenotypes'): for elem in line: names.append(elem) names = names + [name] newSyn = Synonym( 'DISEASE' + str(len(vAllSyns)+1)) newSyn.addSyn(name) for x in names: if x.startswith('[D]') or x.startswith('[X]') or x.startswith('[M]'): x = x[3:] xsyns = [] if ' - ' in x: xsyns += x.split(' - ') else: xsyns.append(x) for xsyn in xsyns:
oboNode = celloObo.dTerms[cellID] oboID = oboNode.id if not oboID.startswith("CL"): continue if oboID == 'CL:1000413': print(oboID) print(oboNode.name) oboName = oboNode.name oboSyns = oboNode.synonym oboRels = oboNode.is_a newSyn = Synonym(oboID) newSyn.addSyn(oboName) if oboSyns != None: for x in oboSyns: if x == None: continue if x.syn in allOboNames: continue newSyn.addSyn(x.syn) for x in newSyn.syns:
allNodes.append(oboNode) globalKeywordExcludes = loadExludeWords(common=False, cell_co=False, disease=False, generic=False) for x in globalKeywordExcludes: if 'membrane' in globalKeywordExcludes[x]: print("Membrane: " + x) synSet = set() for node in allNodes: newSyn = Synonym(node.id) newSyn.addSyn(node.name) if node.synonym != None: for x in node.synonym: if x == None: continue newSyn.addSyn(x.syn) synSet.add(newSyn) vPrintSyns = handleCommonExcludeWords(synSet, globalKeywordExcludes, mostCommonCount=66, maxCommonCount=5, minSynCount=0)