예제 #1
0
ncitObo = GeneOntology(dataDir + "miRExplore/obodir/ncit.obo")
ncitTerm2Sym = NcitTermSymbolDB.loadFromFolder()

vAllSyns = []

for termID in ncitObo.dTerms:

    oboNode = ncitObo.dTerms[termID]

    oboID = oboNode.id
    oboName = oboNode.name

    oboSyns = oboNode.synonym
    oboRels = oboNode.is_a

    newSyn = Synonym(oboID)
    newSyn.addSyn(oboName)

    if oboSyns != None:
        for x in oboSyns:
            newSyn.addSyn(x.syn)

    allOrgs = [x for x in ncitTerm2Sym.org_term2symbol]

    for org in allOrgs:

        ncitID = oboID[oboID.index(":") + 1:]

        if ncitID in ncitTerm2Sym.org_term2symbol[org]:

            orgSyms = ncitTerm2Sym.org_term2symbol[org][ncitID]
예제 #2
0
            if x.startswith("EC "):
                continue

            if x.upper() in ["TH1"]:
                continue

            allSyms.append(x)

        printID = mgiID.replace(':', '_', 1)
        if printID in locID2sym and len(locID2sym[printID]) > 0:
            allSyms.append(mgiID)
            allSyms.append(mgiID.replace(':', '_'))
            printID = locID2sym[printID]

        synline = printID + ":" + "|".join(allSyms)
        synonyme = Synonym.parseFromLine(synline)

        vAllSyns.append(synonyme)

    for syn in vAllSyns:

        removeSyns = []
        for synword in syn.syns:

            if len(synword) == 1:
                removeSyns.append(synword)

        if len(removeSyns) > 0:
            print(syn.id, removeSyns)

            syn.removeSyn(removeSyns)
예제 #3
0
bodypartsObo = GeneOntology(
    dataDir + "miRExplore/foundational_model_anatomy/fma_obo.obo")
vAllSyns = []

for cellID in bodypartsObo.dTerms:

    oboNode = bodypartsObo.dTerms[cellID]

    oboID = oboNode.id
    oboName = oboNode.name

    oboSyns = oboNode.synonym
    oboRels = oboNode.is_a

    newSyn = Synonym(oboID)
    newSyn.addSyn(oboName)

    aName = oboName.split(' ')

    if len(aName) > 1 and len(aName) < 5:

        acro = ""
        if aName[-1].upper() == 'CELL':
            acro = "".join([x[0].upper() for x in aName])

        newSyn.addSyn(acro)

    if oboSyns != None:
        for x in oboSyns:
            newSyn.addSyn(x.syn)
예제 #4
0
        aline = [x.strip() for x in line.split('\t')]

        name = aline[1]

        altNames = StringIO()
        altNames.write(aline[2] + "\n")

        names = []

        for line in csv.reader([aline[2]], dialect='phenotypes'):
            for elem in line:
                names.append(elem)

        names = names + [name]

        newSyn = Synonym( 'DISEASE' + str(len(vAllSyns)+1))
        newSyn.addSyn(name)

        for x in names:

            if x.startswith('[D]') or x.startswith('[X]') or x.startswith('[M]'):
                x = x[3:]

            xsyns = []

            if ' - ' in x:
                xsyns += x.split(' - ')
            else:
                xsyns.append(x)

            for xsyn in xsyns:
예제 #5
0
    oboNode = celloObo.dTerms[cellID]

    oboID = oboNode.id

    if not oboID.startswith("CL"):
        continue

    if oboID == 'CL:1000413':
        print(oboID)
        print(oboNode.name)

    oboName = oboNode.name
    oboSyns = oboNode.synonym
    oboRels = oboNode.is_a

    newSyn = Synonym(oboID)
    newSyn.addSyn(oboName)

    if oboSyns != None:
        for x in oboSyns:

            if x == None:
                continue

            if x.syn in allOboNames:
                continue

            newSyn.addSyn(x.syn)

    for x in newSyn.syns:
예제 #6
0
    allNodes.append(oboNode)

globalKeywordExcludes = loadExludeWords(common=False,
                                        cell_co=False,
                                        disease=False,
                                        generic=False)

for x in globalKeywordExcludes:
    if 'membrane' in globalKeywordExcludes[x]:
        print("Membrane: " + x)

synSet = set()

for node in allNodes:
    newSyn = Synonym(node.id)
    newSyn.addSyn(node.name)

    if node.synonym != None:
        for x in node.synonym:
            if x == None:
                continue
            newSyn.addSyn(x.syn)

    synSet.add(newSyn)

vPrintSyns = handleCommonExcludeWords(synSet,
                                      globalKeywordExcludes,
                                      mostCommonCount=66,
                                      maxCommonCount=5,
                                      minSynCount=0)