Example #1
0
from collections import Counter

from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, loadExludeWords

resultBase = dataDir + "/miRExplore/textmine/results/"
indexFoundSyns = Counter()
excludedSyns = loadExludeWords()

checkResultsFor = 'disease'
analyseFiles = 100
maxFiles = 892

checkSynsMap = SynfileMap(resultBase + "/" + checkResultsFor + "/synfile.map")
checkSynsMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

for splitFileID in range(maxFiles, maxFiles - analyseFiles - 1, -1):

    fileID = "{:>4}".format(splitFileID).replace(" ", "0")

    print(fileID)

    indexFile = resultBase + "/" + checkResultsFor + "/medline17n" + fileID + ".index"
    foundHits = SyngrepHitFile(indexFile, checkSynsMap)

    for doc in foundHits:

        docHits = foundHits.getHitsForDocument(doc)

        for hit in docHits:
Example #2
0
    if oboSyns != None:
        for x in oboSyns:
            newSyn.addSyn(x.syn)

    allOrgs = [x for x in ncitTerm2Sym.org_term2symbol]

    for org in allOrgs:

        ncitID = oboID[oboID.index(":") + 1:]

        if ncitID in ncitTerm2Sym.org_term2symbol[org]:

            orgSyms = ncitTerm2Sym.org_term2symbol[org][ncitID]

            for sym in orgSyms:
                newSyn.addSyn(sym)

    vAllSyns.append(newSyn)

globalKeywordExcludes = loadExludeWords()

vPrintSyns = handleCommonExcludeWords(vAllSyns,
                                      None,
                                      mostCommonCount=100,
                                      maxCommonCount=5)  #globalKeywordExcludes
#printToFile(vPrintSyns, dataDir + "/miRExplore/textmine/synonyms/ncit.syn")
printToFile(vPrintSyns,
            "/mnt/d/dev/data/pmid_jun2020/synonyms/ncit.syn",
            codec='utf8')
Example #3
0
    oboRels = oboNode.is_a

    newSyn = Synonym(oboID)
    newSyn.addSyn(oboName)

    aName = oboName.split(' ')

    if len(aName) > 1 and len(aName) < 5:

        acro = ""
        if aName[-1].upper() == 'CELL':
            acro = "".join([x[0].upper() for x in aName])

        newSyn.addSyn(acro)

    if oboSyns != None:
        for x in oboSyns:
            newSyn.addSyn(x.syn)

    #print(str(taxID) + " " + str(newSyn))

    vAllSyns.append(newSyn)

globalKeywordExcludes = loadExludeWords(cell_co=False)

vPrintSyns = handleCommonExcludeWords(vAllSyns,
                                      globalKeywordExcludes,
                                      mostCommonCount=200,
                                      maxCommonCount=5)
printToFile(vPrintSyns,
            "/mnt/d/dev/data/pmid_jun2020/synonyms/model_anatomy.syn")
Example #4
0
        removeSyns = []
        for synword in syn.syns:

            if len(synword) == 1:
                removeSyns.append(synword)

        if len(removeSyns) > 0:
            print(syn.id, removeSyns)

            syn.removeSyn(removeSyns)

    #exWords = loadExludeWords(common=True, generic=True, disease=False, taxnames=False, cell_co=False)

    exWords = loadExludeWords(cell_co=False,
                              common=False,
                              generic=True,
                              syngrep=False)
    vPrintSyns = handleCommonExcludeWords(
        vAllSyns,
        exWords,
        mostCommonCount=500,
        maxCommonCount=7,
        addAlphaBeta=True,
        addHyphenGene=True,
        removeSyn=lambda synonym: synonym.id.startswith(
            'MIR') and not synonym.id.endswith('HG'))

    printToFile(vPrintSyns,
                "/mnt/d/dev/data/pmid_jun2020/synonyms/mgi.syn",
                codec="utf8")
    """
Example #5
0
filepath = sys.argv[1]

fileObo = GeneOntology(filepath)
namespace2syn = defaultdict(set)

allowedTaxIDs = set([str(speciesName2TaxID[x]) for x in speciesName2TaxID])

allNodes = []
for cellID in fileObo.dTerms:
    oboNode = fileObo.dTerms[cellID]

    allNodes.append(oboNode)

globalKeywordExcludes = loadExludeWords(common=False,
                                        cell_co=False,
                                        disease=False,
                                        generic=False)

for x in globalKeywordExcludes:
    if 'membrane' in globalKeywordExcludes[x]:
        print("Membrane: " + x)

synSet = set()

for node in allNodes:
    newSyn = Synonym(node.id)
    newSyn.addSyn(node.name)

    if node.synonym != None:
        for x in node.synonym:
            if x == None: