Esempio n. 1
0
    def export_flat_obo(self, filepath, termPrefix):

        go = GeneOntology()

        newterms = []

        for synid in self.mSyns:

            syn = self.mSyns[synid]

            got = self.syn2goterm(syn, synid)#termPrefix + str(len(go.dTerms) + len(newterms)))

            if got != None:
                newterms.append(got)

        go.addterms(newterms)

        go.saveFile(filepath)
Esempio n. 2
0
from synonymes.GeneOntology import GeneOntology
from textdb.NcitTerm2Symbols import NcitTermSymbolDB
from collections import defaultdict

from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID

ncitObo = GeneOntology(dataDir + "miRExplore/obodir/ncit.obo")
ncitTerm2Sym = NcitTermSymbolDB.loadFromFolder()

vAllSyns = []

for termID in ncitObo.dTerms:

    oboNode = ncitObo.dTerms[termID]

    oboID = oboNode.id
    oboName = oboNode.name

    oboSyns = oboNode.synonym
    oboRels = oboNode.is_a

    newSyn = Synonym(oboID)
    newSyn.addSyn(oboName)

    if oboSyns != None:
        for x in oboSyns:
            newSyn.addSyn(x.syn)

    allOrgs = [x for x in ncitTerm2Sym.org_term2symbol]
Esempio n. 3
0
                                           rtype="gene",
                                           normGeneSymbols=normGeneSymbols,
                                           switchLR=True,
                                           stopAfter=-1)

    relDBs = [mirelPMIDhsa, mirelPMIDmmu]

    requiredPMIDs = set()
    for rdb in relDBs:

        assert (isinstance(rdb, DataBaseDescriptor))

        for rpmid in rdb.get_evidence_docids():
            requiredPMIDs.add(rpmid)

    diseaseObo = GeneOntology(args.obodir + "/doid.obo")

    #{'group': 'disease', 'termid': 'DOID:1936', 'name': 'atherosclerosis'}
    #{'group': 'disease', 'termid': 'DOID:2349', 'name': 'arteriosclerosis'}
    #{'group': 'disease', 'termid': 'DOID:1287', 'name': 'cardiovascular system disease'},
    elemTerm = diseaseObo['DOID:1936']
    elemTerms = [x.term.id for x in elemTerm.getAllChildren()] + [elemTerm.id]

    cvTerm = diseaseObo['DOID:1287']
    cvTerms = [x.term.id
               for x in cvTerm.getAllChildren()] + [cvTerm.id] + elemTerms

    pmid2disease = PMID2XDB.loadFromFile(args.pmidBase + "/disease.pmid",
                                         diseaseObo, requiredPMIDs)

    # number of genes with interaction
Esempio n. 4
0
def start_app_from_args(args):

    global mirFeedback
    global mirandaDB_mm10
    global relDBs
    global diseaseObo
    global goObo
    global cellObo
    global pmid2go
    global pmid2disease
    global pmid2fma
    global pmid2cell
    global testRels
    global mirelPMID
    global sentDB
    global featureViewer
    global symbol2ensemblDB
    global pmid2ncit
    global ncitObo

    global humanGeneNeighbourDB
    global mouseGeneNeighbourDB
    global geneNeighbourHoods

    global mi2mirna
    global dateDB

    pmidBase = args.textmine + '/aggregated_pmid/'
    pmcBase = args.textmine + '/aggregated_pmc/'

    normGeneSymbols = normalize_gene_names(path=fileurl +
                                           "/hgnc_no_withdrawn.syn")

    #mouseGeneNeighbourDB = GeneNeighbourDB.loadFromFile("mmu", inputgff=args.obodir + "/mm10_primary_assembly_and_lncRNA.gtf")
    #geneNeighbourHoods[mouseGeneNeighbourDB.orgid] = mouseGeneNeighbourDB

    print(datetime.datetime.now(), "Loading PMID2PMC")

    # allInteractions = defaultdict(list)

    print(datetime.datetime.now(), "Loading Sym2Ens")

    symbol2ensemblDB = SymbolEnsemblDB.loadFromFile(fileurl + "/sym2ens/")

    print(datetime.datetime.now(), "Loading MI2Mirna")
    mi2mirna = MI2Mirna.loadFromFile(fileurl + "/dbs/mirnas_mirbase.csv")
    print(datetime.datetime.now(), "Loading miranda interactions mm10")
    # mirandaDB_mm10 = MirandaRelDB.loadFromFile(filepath=args.obodir + "/mm10_interactionsAllGenes.txt", symbol2ens=symbol2ensemblDB, org="mmu")
    # mirandaDB_hg38 = MirandaRelDB.loadFromFile(filepath=args.obodir + "/hg38_interactionsAllGenes.txt", org="hsa")

    mirandaDB_mm10 = None
    mirandaDB_hg38 = None
    recordsDB = None
    mirtarbaseDB = None
    dianaDB, celllInfos = None, None

    if args.load_mirecords:
        print(datetime.datetime.now(), "Loading miRecords")
        recordsDB = miRecordDB.loadFromFile(filelocation=fileurl +
                                            "/dbs/mirecords_v4.xlsx",
                                            normGeneSymbols=normGeneSymbols)

    if args.load_mirtarbase:
        print(datetime.datetime.now(), "Loading miRTarBase")
        mirtarbaseDB = MirTarBaseDB.loadFromFile(
            filepath=fileurl + "/dbs/miRTarBase.csv",
            normGeneSymbols=normGeneSymbols)

    if args.load_diana:
        print(datetime.datetime.now(), "Loading hsa_mmu.diana")
        dianaDB, celllInfos = DIANATarbaseDB.loadFromFile(
            fileurl + "/dbs/hsa_mmu.diana.csv",
            normGeneSymbols=normGeneSymbols)

    allDBS = None

    print(datetime.datetime.now(), "Loading PMID2PMC")
    pmid2pmcDB = None
    excludePMIDs = None

    if args.load_pmc:
        pmid2pmcDB = PMID2PMCDB.loadFromFile(pmcBase + '/pmc2pmid',
                                             PMC2PMID=True)
        excludePMIDs = pmid2pmcDB.getAllPMIDs()
        print("Got", len(excludePMIDs), "exclude PMIDs")

        if len(excludePMIDs) > 5:
            print(list(excludePMIDs)[:5])

    print(datetime.datetime.now(), "Finished PMID2PMC")

    print(datetime.datetime.now(), "Loading mirel")

    testRels = None  # TestRelLoader.loadFromFile(pmidBase + "/test_rels_4")

    print(datetime.datetime.now(), "Loading mirel PMID")
    mirelPMIDhsa = MiGenRelDB.loadFromFile(pmidBase + "/mirna_gene.hsa.pmid",
                                           ltype="mirna",
                                           rtype="gene",
                                           normGeneSymbols=normGeneSymbols,
                                           switchLR=True,
                                           excludeIDs=excludePMIDs)
    mirelPMIDmmu = MiGenRelDB.loadFromFile(pmidBase + "/mirna_gene.mmu.pmid",
                                           ltype="mirna",
                                           rtype="gene",
                                           normGeneSymbols=normGeneSymbols,
                                           switchLR=True,
                                           excludeIDs=excludePMIDs)

    print(datetime.datetime.now(), "Loading mirel PMC")
    mirelPMChsa = None
    mirelPMCmmu = None

    if args.load_pmc:
        mirelPMChsa = MiGenRelDB.loadFromFile(pmcBase + "/mirna_gene.hsa.pmid",
                                              ltype="mirna",
                                              rtype="gene",
                                              normGeneSymbols=normGeneSymbols,
                                              switchLR=True)
        mirelPMCmmu = MiGenRelDB.loadFromFile(pmcBase + "/mirna_gene.mmu.pmid",
                                              ltype="mirna",
                                              rtype="gene",
                                              normGeneSymbols=normGeneSymbols,
                                              switchLR=True)

    lncMirPMID = None  #MiGenRelDB.loadFromFile(pmidBase + "/lncrna_mirna.pmid", ltype="lncrna", rtype="mirna")
    geneLncPMID = None  #MiGenRelDB.loadFromFile(pmidBase + "/gene_lncrna.pmid", ltype="gene", rtype="lncrna")

    print(datetime.datetime.now(), "Finished mirel")

    print(datetime.datetime.now(), "Loading Dates")
    dateDB = PubmedDateDB.loadFromFile(pmidBase + "/allpmids.date")

    if args.load_pmc:
        pmc_dateDB = PubmedDateDB.loadFromFile(pmcBase + "/allpmc.date")
        dateDB.add_database(pmc_dateDB)

    print(datetime.datetime.now(), "Finished Dates")

    print(datetime.datetime.now(), "Loading mirWalk")
    mirWalkMMU3UTRDB = None  #MirWalkDB.loadFromFile('/mnt/c/ownCloud/data/miRExplore/mirwalk/mmu_miRWalk_3UTR.txt', org="mmu", bindSite="3UTR", normGeneSymbols=normGeneSymbols)
    print(datetime.datetime.now(), "Loading mirWalk finished")

    relDBs = [
        recordsDB, mirtarbaseDB, dianaDB, mirelPMIDhsa, mirelPMIDmmu,
        mirelPMChsa, mirelPMCmmu, lncMirPMID, geneLncPMID, mirandaDB_mm10,
        mirWalkMMU3UTRDB
    ]
    relDBs = [x for x in relDBs if x != None]

    mirFeedback = feedbackDB(args.feedback)

    requiredDocuments = set()
    for relDB in relDBs:
        requiredDocuments = requiredDocuments.union(
            relDB.get_evidence_docids())

    print("Requiring", len(requiredDocuments), "documents")

    print(datetime.datetime.now(), "Loading sents")
    print(datetime.datetime.now(), "Loading sents PMID")
    sentDB = SentenceDB.loadFromFile(args.sentdir,
                                     pmidBase + "/pmid2sent",
                                     requiredIDs=requiredDocuments)

    if args.load_pmc:
        print(datetime.datetime.now(), "Loading sents PMC")
        sentDBPMC = SentenceDB.loadFromFile(args.sentdir_pmc,
                                            pmcBase + "/pmc2sent",
                                            requiredIDs=requiredDocuments)
        print(datetime.datetime.now(), "Merging sentence DBs")
        sentDB.add_database(sentDBPMC)
    print(datetime.datetime.now(), "Finished sents")

    allDBsPMID = None
    if os.path.isfile(pmidBase + "/dbs.pickle"):
        print(datetime.datetime.now(), "Loading pickle PMID")
        with open(pmidBase + "/dbs.pickle", 'rb') as fin:
            allDBsPMID = pickle.load(fin)

        pmid2go = allDBsPMID[0]
        pmid2disease = allDBsPMID[1]
        pmid2fma = allDBsPMID[2]
        pmid2cell = allDBsPMID[3]
        pmid2ncit = allDBsPMID[4]

        print(datetime.datetime.now(), "Loading pickle PMID ended")

    allDBsPMC = None
    if os.path.isfile(pmcBase + "/dbs.pickle") and args.load_pmc:
        print(datetime.datetime.now(), "Loading pickle PMC")
        with open(pmidBase + "/dbs.pickle", 'rb') as fin:
            allDBsPMC = pickle.load(fin)

        pmc2go = allDBsPMC[0]
        pmc2disease = allDBsPMC[1]
        pmc2fma = allDBsPMC[2]
        pmc2cell = allDBsPMC[3]
        pmc2ncit = allDBsPMC[4]

        print(datetime.datetime.now(), "Loading pickle PMC ended")

    print(datetime.datetime.now(), "Loading ontologies")

    diseaseObo = GeneOntology(args.obodir + "/doid.obo")
    goObo = GeneOntology(args.obodir + "/go.obo")
    cellObo = GeneOntology(args.obodir + "/meta_cells.obo")
    ncitObo = GeneOntology(args.obodir + "/ncit.obo")
    fmaObo = GeneOntology(args.obodir + "/fma_obo.obo")

    print(datetime.datetime.now(), "Loading ontologies finished")

    if allDBsPMID is None:
        pmid2go = None
        pmid2disease = None
        pmid2fma = None
        pmid2cell = None
        pmid2ncit = None

        print(datetime.datetime.now(), "Loading GO")
        pmid2go = PMID2XDB.loadFromFile(pmidBase + "/go.pmid", goObo,
                                        requiredDocuments)
        print(datetime.datetime.now(), "Loading Disease")
        pmid2disease = PMID2XDB.loadFromFile(pmidBase + "/disease.pmid",
                                             diseaseObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading FMA")
        pmid2fma = PMID2XDB.loadFromFile(pmidBase + "/model_anatomy.pmid",
                                         fmaObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading cellline")
        pmid2cell = PMID2XDB.loadFromFile(pmidBase + "/celllines.pmid",
                                          cellObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading ncit")
        pmid2ncit = PMID2XDB.loadFromFile(pmidBase + "/ncit.pmid", ncitObo,
                                          requiredDocuments)

        allDBsPMID = (pmid2go, pmid2disease, pmid2fma, pmid2cell, pmid2ncit)

        print(datetime.datetime.now(), "Writing Pickle")

        with open(pmidBase + "/dbs.pickle", 'wb') as fout:
            pickle.dump(allDBsPMID, fout)

        print(datetime.datetime.now(), "Finished Writing Pickle")

    if allDBsPMC is None and args.load_pmc:
        pmc2go = None
        pmc2disease = None
        pmc2fma = None
        pmc2cell = None
        pmc2ncit = None

        print(datetime.datetime.now(), "Loading GO")
        pmc2go = PMID2XDB.loadFromFile(pmcBase + "/go.pmid", goObo,
                                       requiredDocuments)
        print(datetime.datetime.now(), "Loading Disease")
        pmc2disease = PMID2XDB.loadFromFile(pmcBase + "/disease.pmid",
                                            diseaseObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading FMA")
        pmc2fma = PMID2XDB.loadFromFile(pmcBase + "/model_anatomy.pmid",
                                        fmaObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading cellline")
        pmc2cell = PMID2XDB.loadFromFile(pmcBase + "/celllines.pmid", cellObo,
                                         requiredDocuments)
        print(datetime.datetime.now(), "Loading ncit")
        pmc2ncit = PMID2XDB.loadFromFile(pmcBase + "/ncit.pmid", ncitObo,
                                         requiredDocuments)

        allDBsPMID = (pmc2go, pmc2disease, pmc2fma, pmc2cell, pmc2ncit)

        print(datetime.datetime.now(), "Writing Pickle")

        with open(pmcBase + "/dbs.pickle", 'wb') as fout:
            pickle.dump(allDBsPMID, fout)

        print(datetime.datetime.now(), "Finished Writing Pickle")

    if args.load_pmc:
        print(datetime.datetime.now(), "Merging Context DBs")
        print(datetime.datetime.now(), "Merging Context GO")
        pmid2go.add_database(pmc2go)
        print(datetime.datetime.now(), "Merging Context DISEASE")
        pmid2disease.add_database(pmc2disease)
        print(datetime.datetime.now(), "Merging Context FMA")
        pmid2fma.add_database(pmc2fma)
        print(datetime.datetime.now(), "Merging Context CELL")
        pmid2cell.add_database(pmc2cell)
        print(datetime.datetime.now(), "Merging Context NCIT")
        pmid2ncit.add_database(pmc2ncit)
        print(datetime.datetime.now(), "Finished Merging Context DBs")

    if celllInfos != None:
        print(datetime.datetime.now(), "Adding CelllInfo Features")
        for celllInfo in celllInfos:
            pmid2cell.docid2info[celllInfo['docid']].append(celllInfo)

        print(datetime.datetime.now(), "Finished Adding CelllInfo Features")

    print(datetime.datetime.now(), "Loading Features")
    #rfDB = RFamDB.loadFromFile(fileurl + "/dbs/rfam.regions.mirexplore")
    #featureViewerMMU = FeatureViewer('mmu', args.obodir, rfamDB=rfDB)
    #featureViewerHSA = FeatureViewer('hsa', args.obodir, rfamDB=rfDB)

    print(datetime.datetime.now(), "Loading Features finished")
    print(datetime.datetime.now(), "Loading finished")
Esempio n. 5
0
import os, sys
sys.path.insert(0, str(os.path.dirname(os.path.realpath(__file__))) + "/../")
sys.path.insert(0, "/mnt/f/dev/git/NERtoolkit/")

from collections import defaultdict
from synonymes.GeneOntology import GeneOntology

from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID

infile = sys.argv[1]  # dataDir + "miRExplore/doid.obo"
outfile = sys.argv[2]  #"/mnt/d/dev/data/pmid_jun2020/synonyms/disease.syn"

celloObo = GeneOntology(infile)

ignoreTerms = set()

ignoreTerms.add("DOID:4")
print("Total terms:", len(celloObo.dTerms), "Ignore terms", len(ignoreTerms))

vAllSyns = []

for cellID in celloObo.dTerms:

    oboNode = celloObo.dTerms[cellID]

    oboID = oboNode.id
    oboName = oboNode.name

    if oboID in ignoreTerms:
Esempio n. 6
0
import os, sys
sys.path.insert(0, str(os.path.dirname(os.path.realpath(__file__))) + "/../")

from collections import defaultdict
from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID
from synonymes.GeneOntology import GeneOntology

bodypartsObo = GeneOntology(
    dataDir + "miRExplore/foundational_model_anatomy/fma_obo.obo")
vAllSyns = []

for cellID in bodypartsObo.dTerms:

    oboNode = bodypartsObo.dTerms[cellID]

    oboID = oboNode.id
    oboName = oboNode.name

    oboSyns = oboNode.synonym
    oboRels = oboNode.is_a

    newSyn = Synonym(oboID)
    newSyn.addSyn(oboName)

    aName = oboName.split(' ')

    if len(aName) > 1 and len(aName) < 5:

        acro = ""
Esempio n. 7
0
from synonymes.GeneOntology import GeneOntology
from utils.tmutils import normalize_gene_names

sys.path.insert(0, str(os.path.dirname("/mnt/d/dev/git/poreSTAT/")))

from porestat.utils.DataFrame import DataFrame, DataRow, ExportTYPE

from synonymes.mirnaID import miRNA, miRNAPART, miRNACOMPARISONLEVEL
from textdb.makeNetworkView import DataBasePlotter
from utils.cytoscape_grapher import CytoscapeGrapher

import matplotlib.pyplot as plt
from natsort import natsorted
if __name__ == '__main__':

    cellObo = GeneOntology(
        "/mnt/d/owncloud/data/miRExplore/obodir/meta_cells.obo")

    cellTypeName2Terms = {
        "EC": ["META:52"],
        "MC": ["META:148", "META:99"],
        "FC": ["CL:0000891"],
        "SMC": ["META:83"],
    }

    cellType2AccTerms = {}
    for cellT in cellTypeName2Terms:

        cellType2AccTerms[cellT] = set()

        for et in cellTypeName2Terms[cellT]:
Esempio n. 8
0
            if len(line) > 0:
                accept_pmids.add(line)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    oboSyns = SynfileMap(resultBase + "/synfile.map")
    oboSyns.loadSynFiles((args.mine_path, args.datadir))

    allfiles = glob.glob(resultBase + "/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.obo.name)

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)

        synid = synid.replace('_', ':', 1)

        return obo.getID(synid)

    def analyseFile(splitFileIDs, env):

        fileCoocs = []

        for splitFileID in splitFileIDs:
Esempio n. 9
0
from collections import defaultdict

from synonymes.GeneOntology import GeneOntology
from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID
import math
celloObo = GeneOntology(dataDir + "miRExplore/cellosaurus/cellosaurus.obo")
tax2cells = defaultdict(set)

allowedTaxIDs = set([str(speciesName2TaxID[x]) for x in speciesName2TaxID])

xref2cello = defaultdict(set)

considerXRefs = ['CL', 'CLO', 'DOID', 'UBERON']

for cellID in celloObo.dTerms:

    oboNode = celloObo.dTerms[cellID]
    oboXRefs = oboNode.xref

    taxID = {'all'}
    if oboXRefs != None:
        for xref in oboXRefs:
            if xref.startswith('NCBI_TaxID'):

                newTaxID = xref.split(' ')[0].split(':')[1]

                if newTaxID in allowedTaxIDs:
                    taxID.add(newTaxID)
Esempio n. 10
0
from collections import defaultdict

import sys, os
sys.path.insert(0, str(os.path.dirname("/mnt/d/dev/git/miRExplore/python/")))

from synonymes.GeneOntology import GeneOntology
from synonymes.Synonym import Synonym
from synonymes.SynonymUtils import handleCommonExcludeWords
from utils.idutils import dataDir, loadExludeWords, printToFile, speciesName2TaxID

filepath = sys.argv[1]

fileObo = GeneOntology(filepath)
namespace2syn = defaultdict(set)

allowedTaxIDs = set([str(speciesName2TaxID[x]) for x in speciesName2TaxID])

allNodes = []
for cellID in fileObo.dTerms:
    oboNode = fileObo.dTerms[cellID]

    allNodes.append(oboNode)

globalKeywordExcludes = loadExludeWords(common=False,
                                        cell_co=False,
                                        disease=False,
                                        generic=False)

for x in globalKeywordExcludes:
    if 'membrane' in globalKeywordExcludes[x]:
        print("Membrane: " + x)