def getSynIDs(oboLocation, selOboIDs):

    ontology = GeneOntology(oboLocation)

    allSynIDs = []

    for termID in ontology.dTerms:

        oboNode = ontology.dTerms[termID]

        oboID = oboNode.id
        oboName = oboNode.name

        oboSyns = oboNode.synonym
        oboRels = oboNode.is_a

        if oboID in selOboIDs:

            allSynIDs.append(oboID)

            allchildren = oboNode.getAllChildren()

            allSynIDs += [x.term.id for x in allchildren]

    allSynIDs = [x.replace(":", '_') for x in allSynIDs]

    return allSynIDs
Beispiel #2
0
def printTerms(oboLocation, selOboIDs, excludeIDs=None, printDepht=3):

    ontology = GeneOntology(oboLocation)

    allExclIDs = set()

    if excludeIDs != None:
        for elem in excludeIDs:

            exclElem = ontology.dTerms[elem]
            allChildren = exclElem.getAllChildren()

            for child in allChildren:
                allExclIDs.add(child.term.id)

    for termID in ontology.dTerms:

        oboNode = ontology.dTerms[termID]

        oboID = oboNode.id
        oboName = oboNode.name

        oboSyns = oboNode.synonym
        oboRels = oboNode.is_a

        if oboID in selOboIDs:

            print("Base", oboID, oboName)
            printChildren(oboNode, 5, 1, allExclIDs)
dbData = DataFrame.parseFromFile(miRExploreDir + "/miR2Disease/mirna_disease.tsv", bConvertTextToNumber=False)

allDiseases = set()
for row in dbData:

    disease = row['disease']

    if disease == 'None':
        continue

    allDiseases.add(disease.upper())

print(len(allDiseases))

diseaseObo = GeneOntology(miRExploreDir + "/doid.obo")

disease2obo = defaultdict(set)

"""

find perfect matches

"""
for oboID in diseaseObo.dTerms:
    oboNode = diseaseObo.dTerms[oboID]
    doidName = oboNode.name

    upperDiseaseName = doidName.upper()

    if upperDiseaseName in allDiseases:
Beispiel #4
0
    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    diseaseSyns = SynfileMap(resultBase + "/go/synfile.map")
    diseaseSyns.loadSynFiles(
        ('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/go/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    fmaObo = GeneOntology(args.datadir + "miRExplore/go/go.obo")

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)

        synid = synid.replace('_', ':', 1)

        return obo.getID(synid)

    def analyseFile(splitFileIDs, env):

        fileCoocs = []

        for splitFileID in splitFileIDs:
    dataDir
from database.Neo4JInterface import neo4jInterface
from utils.parallel import MapReduce
from enum import Enum

resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
diseaseSyns = SynfileMap(resultBase + "/disease/synfile.map")
diseaseSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

allfiles = glob.glob(resultBase + "/disease/*.index")
allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
allfileIDs = sorted(allfileIDs, reverse=True)

#allfileIDs = [894]

celloObo = GeneOntology(dataDir + "miRExplore/doid.obo")


def analyseFile(splitFileIDs, env):

    fileCoocs = []

    for splitFileID in splitFileIDs:

        diseaseFile = resultBase + "/disease/" + splitFileID + ".index"

        diseaseHits = SyngrepHitFile(diseaseFile, diseaseSyns)
        if len(diseaseHits) == 0:
            continue

        sentFile = "/mnt/c/dev/data/pmc/allsent/" + splitFileID + ".sent"
    dataDir
from database.Neo4JInterface import neo4jInterface
from utils.parallel import MapReduce
from enum import Enum

resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
diseaseSyns = SynfileMap(resultBase + "/cellline/synfile.map")
diseaseSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

allfiles = glob.glob(resultBase + "/cellline/*.index")
allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
allfileIDs = sorted(allfileIDs, reverse=True)

#allfileIDs = [894]

celloObo = GeneOntology(dataDir + "miRExplore/meta_cells.obo")


def getTerm(synid, obo):

    if synid in obo.dTerms:
        return obo.getID(synid)

    synid = synid.replace('_', ':', 1)

    return obo.getID(synid)


def analyseFile(splitFileIDs, env):

    fileCoocs = []
Beispiel #7
0
    dataDir
from database.Neo4JInterface import neo4jInterface
from utils.parallel import MapReduce
from enum import Enum

resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
diseaseSyns = SynfileMap(resultBase + "/go/synfile.map")
diseaseSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

allfiles = glob.glob(resultBase + "/go/*.index")
allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
allfileIDs = sorted(allfileIDs, reverse=True)

#allfileIDs = [894]

fmaObo = GeneOntology(dataDir + "miRExplore/go/go.obo")


def analyseFile(splitFileIDs, env):

    fileCoocs = []

    for splitFileID in splitFileIDs:

        diseaseFile = resultBase + "/go/" + splitFileID + ".index"

        diseaseHits = SyngrepHitFile(diseaseFile, diseaseSyns)
        if len(diseaseHits) == 0:
            continue

        sentFile = "/mnt/c/dev/data/pmc/allsent/" + splitFileID + ".sent"
    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    diseaseSyns = SynfileMap(resultBase + "/cellline/synfile.map")
    diseaseSyns.loadSynFiles(
        ('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/cellline/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.datadir + "miRExplore/meta_cells.obo")

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)

        synid = synid.replace('_', ':', 1)

        return obo.getID(synid)

    def analyseFile(splitFileIDs, env):

        fileCoocs = []

        for splitFileID in splitFileIDs:
Beispiel #9
0
from collections import defaultdict
from nertoolkit.geneontology.GeneOntology import GeneOntology

from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID

from collections import defaultdict
from nertoolkit.geneontology.GeneOntology import GeneOntology

from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID

celloObo = GeneOntology(dataDir + "miRExplore/cell_ontology/cl.obo")
vAllSyns = []

allOboNames = defaultdict(set)

for cellID in celloObo.dTerms:

    oboNode = celloObo.dTerms[cellID]

    oboID = oboNode.id

    if not oboID.startswith("CL"):
        continue

    oboName = oboNode.name

    allOboNames[oboName].add(oboID)
Beispiel #10
0
            taxName = " ".join(aTaxName[0:min(2, len(aTaxName))])

            try:
                int(taxName)
            except ValueError:
                globalKeywordExcludes.add(taxName)

        globalKeywordExcludes = sorted(globalKeywordExcludes)
        printToFile(
            globalKeywordExcludes,
            dataDir + "miRExplore/textmine/excludes/exclude_words.names.syn")
        print("Done: names")

if addGO:

    goObo = GeneOntology(dataDir + "miRExplore/textmine/excludes/go.obo")

    globalKeywordExcludes = set()

    for id in goObo.dTerms:

        child = goObo.dTerms[id]
        if 'cellular_component' in child.namespace:

            globalKeywordExcludes.add(child.name)

            if child.synonym != None:
                for syn in child.synonym:
                    if syn != None:

                        synWord = syn.syn
Beispiel #11
0
    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    diseaseSyns = SynfileMap(resultBase + "/disease/synfile.map")
    diseaseSyns.loadSynFiles(
        ('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/disease/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.datadir + "/miRExplore/doid.obo")

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)

        synid = synid.replace('_', ':', 1)

        return obo.getID(synid)

    def analyseFile(splitFileIDs, env):

        fileCoocs = []

        for splitFileID in splitFileIDs:
Beispiel #12
0
from collections import defaultdict
from nertoolkit.geneontology.GeneOntology import GeneOntology

from database.Neo4JInterface import neo4jInterface
from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID, eprint

celloObo = GeneOntology(dataDir + "miRExplore/cellosaurus/cellosaurus.obo")
tax2cells = defaultdict(set)

id2node = {}
id2species = defaultdict(set)
id2derived_from = defaultdict(set)

allowedTaxIDs = set([str(speciesName2TaxID[x]) for x in speciesName2TaxID])

for cellID in celloObo.dTerms:

    oboNode = celloObo.dTerms[cellID]

    oboID = oboNode.id
    oboName = oboNode.name

    oboSyns = oboNode.synonym
    oboXRefs = oboNode.xref
    oboRels = oboNode.is_a

    taxID = set()
    if oboXRefs != None:
        for xref in oboXRefs:
    seenElements[iprElem.id] = iprElem

noParents = 0
for iprID in seenElements:

    iprElem = seenElements[iprID]

    if len(iprElem.parents) == 0:
        noParents += 1

    for id in iprElem.parents:
        if not id in seenElements:
            print(id)

    print(iprElem)

print("Elem Count", len(seenElements))
print("Root Count", noParents)

interProObo = GeneOntology()

for iprID in seenElements:
    iprElem = seenElements[iprID]

    iprGoElem = iprElem.to_go_term()

    interProObo.dTerms[iprID] = iprGoElem

interProObo.linkChildren()
interProObo.saveFile(interproFolder + "/interpro.obo")
Beispiel #14
0
from collections import defaultdict
from nertoolkit.geneontology.GeneOntology import GeneOntology

from database.Neo4JInterface import neo4jInterface
from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID

diseaseObo = GeneOntology(dataDir + "miRExplore/doid.obo")
tax2cells = defaultdict(set)

id2node = {}
id2derived_from = defaultdict(set)


for cellID in diseaseObo.dTerms:

    oboNode = diseaseObo.dTerms[cellID]

    oboID = oboNode.id
    oboName = oboNode.name
    oboRels = oboNode.is_a

    id2node[oboID] = {'id': oboID, 'name': oboName}

    if oboRels != None:
        for rel in oboRels:
            term = rel.term
            id2derived_from[oboID].add(term.id)

db = neo4jInterface(simulate=False)
Beispiel #15
0
            if len(line) > 0:
                accept_pmids.add(line)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    oboSyns = SynfileMap(resultBase + "/synfile.map")
    oboSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.obo.name)

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)

        synid = synid.replace('_', ':', 1)

        return obo.getID(synid)

    def analyseFile(splitFileIDs, env):

        fileCoocs = []

        for splitFileID in splitFileIDs:
Beispiel #16
0
    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    diseaseSyns = SynfileMap(resultBase + "/model_anatomy/synfile.map")
    diseaseSyns.loadSynFiles(
        ('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/model_anatomy/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    fmaObo = GeneOntology(args.datadir +
                          "miRExplore/foundational_model_anatomy/fma_obo.obo")

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)

        synid = synid.replace('_', ':', 1)

        return obo.getID(synid)

    def analyseFile(splitFileIDs, env):

        fileCoocs = []

        for splitFileID in splitFileIDs:
Beispiel #17
0
from collections import defaultdict
from nertoolkit.geneontology.GeneOntology import GeneOntology

from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID

celloObo = GeneOntology(dataDir + "miRExplore/textmine/neutrophils.obo")
vAllSyns = []

for cellID in celloObo.dTerms:

    oboNode = celloObo.dTerms[cellID]

    oboID = oboNode.id
    oboName = oboNode.name

    oboSyns = oboNode.synonym
    oboRels = oboNode.is_a

    newSyn = Synonym(oboID)
    newSyn.addSyn(oboName)

    if oboSyns != None:
        for x in oboSyns:
            newSyn.addSyn(x.syn)

    #print(str(taxID) + " " + str(newSyn))

    print(newSyn)