Example #1
0
                        'termname': cellline,
                        'evidences': []
                    }

                    foundCellInfos.append(celllInfo)

            entry = DIANATarbaseEntry(
                (tuple(docOrgs), cellline, tissue, method, measure, direction),
                docID, (geneID, "gene"), (mirna, "mirna"), "DIANA", idx)

            ret.ltype2rel[geneID].add(entry)
            ret.rtype2rel[mirna].add(entry)

            ret.all_ltypes.add(geneID)
            ret.all_rtypes.add(mirna)

        return ret, foundCellInfos


if __name__ == '__main__':

    normGeneSymbols = normalize_gene_names(
        path="/mnt/c/ownCloud/data/miRExplore/obodir/" +
        "/hgnc_no_withdrawn.syn")

    ret, celllinfo = DIANATarbaseDB.loadFromFile(
        "/mnt/c/ownCloud/data/miRExplore/diana/hsa_mmu.diana.csv",
        normGeneSymbols=normGeneSymbols)

    for x in ret.get_rels('gene', 'CXCR4'):
        print(x.toJSON())
Example #2
0
import re

from textdb.MiGenRelDB import MiGenRelDB
from utils.tmutils import normalize_gene_names

mainPath = "/mnt/d/owncloud/data/miRExplore/"

normGeneSymbols = normalize_gene_names(path=mainPath + "/obodir/" +
                                       "/hgnc_no_withdrawn.syn")

mirelPMIDhsa = MiGenRelDB.loadFromFile(
    mainPath + "/textmine/aggregated_pmid/" + "/mirna_gene.hsa.pmid",
    ltype="mirna",
    rtype="gene",
    normGeneSymbols=normGeneSymbols,
    switchLR=True)

print(mirelPMIDhsa.get_rels("mirna", "miR-758"))

exit(0)


def makeListingGroups(baseHits, conjunts):

    resElems = {}

    for baseHit in baseHits:
        spos = baseHit.start()
        epos = baseHit.end()

        curGroup = []
Example #3
0
scaiBase = "/mnt/d/owncloud/data/miRExplore/scai_corpus/"

if sys.argv[1].upper() == "TRAIN":
    scaiFile = "miRNA_train_fixed.xml"
elif sys.argv[1].upper() == "TEST":
    scaiFile = "miRNA_test_fixed.xml"
else:
    exit(-1)

sentFile = open(sys.argv[2], 'w')
synFile = open(sys.argv[3], 'w')

print(sentFile.name)
print(synFile.name)

normGeneSymbols = normalize_gene_names(path=scaiBase + "/../obodir/" +
                                       "/hgnc_no_withdrawn.syn")

relexAccepted = []

with open("relexfiles/scai_" + sys.argv[1].lower() + "_relex.out") as fin:

    wasRelation = False
    curSentID = None
    for line in fin:

        if line.startswith(">"):
            curSentID = line.strip()[1:]

        if line.startswith("#RELATIONS:"):
            wasRelation = True
            continue
Example #4
0
def start_app_from_args(args):

    global mirFeedback
    global mirandaDB_mm10
    global relDBs
    global diseaseObo
    global goObo
    global cellObo
    global pmid2go
    global pmid2disease
    global pmid2fma
    global pmid2cell
    global testRels
    global mirelPMID
    global sentDB
    global featureViewer
    global symbol2ensemblDB
    global pmid2ncit
    global ncitObo

    global humanGeneNeighbourDB
    global mouseGeneNeighbourDB
    global geneNeighbourHoods

    global mi2mirna
    global dateDB

    pmidBase = args.textmine + '/aggregated_pmid/'
    pmcBase = args.textmine + '/aggregated_pmc/'

    normGeneSymbols = normalize_gene_names(path=fileurl +
                                           "/hgnc_no_withdrawn.syn")

    #mouseGeneNeighbourDB = GeneNeighbourDB.loadFromFile("mmu", inputgff=args.obodir + "/mm10_primary_assembly_and_lncRNA.gtf")
    #geneNeighbourHoods[mouseGeneNeighbourDB.orgid] = mouseGeneNeighbourDB

    print(datetime.datetime.now(), "Loading PMID2PMC")

    # allInteractions = defaultdict(list)

    print(datetime.datetime.now(), "Loading Sym2Ens")

    symbol2ensemblDB = SymbolEnsemblDB.loadFromFile(fileurl + "/sym2ens/")

    print(datetime.datetime.now(), "Loading MI2Mirna")
    mi2mirna = MI2Mirna.loadFromFile(fileurl + "/dbs/mirnas_mirbase.csv")
    print(datetime.datetime.now(), "Loading miranda interactions mm10")
    # mirandaDB_mm10 = MirandaRelDB.loadFromFile(filepath=args.obodir + "/mm10_interactionsAllGenes.txt", symbol2ens=symbol2ensemblDB, org="mmu")
    # mirandaDB_hg38 = MirandaRelDB.loadFromFile(filepath=args.obodir + "/hg38_interactionsAllGenes.txt", org="hsa")

    mirandaDB_mm10 = None
    mirandaDB_hg38 = None
    recordsDB = None
    mirtarbaseDB = None
    dianaDB, celllInfos = None, None

    if args.load_mirecords:
        print(datetime.datetime.now(), "Loading miRecords")
        recordsDB = miRecordDB.loadFromFile(filelocation=fileurl +
                                            "/dbs/mirecords_v4.xlsx",
                                            normGeneSymbols=normGeneSymbols)

    if args.load_mirtarbase:
        print(datetime.datetime.now(), "Loading miRTarBase")
        mirtarbaseDB = MirTarBaseDB.loadFromFile(
            filepath=fileurl + "/dbs/miRTarBase.csv",
            normGeneSymbols=normGeneSymbols)

    if args.load_diana:
        print(datetime.datetime.now(), "Loading hsa_mmu.diana")
        dianaDB, celllInfos = DIANATarbaseDB.loadFromFile(
            fileurl + "/dbs/hsa_mmu.diana.csv",
            normGeneSymbols=normGeneSymbols)

    allDBS = None

    print(datetime.datetime.now(), "Loading PMID2PMC")
    pmid2pmcDB = None
    excludePMIDs = None

    if args.load_pmc:
        pmid2pmcDB = PMID2PMCDB.loadFromFile(pmcBase + '/pmc2pmid',
                                             PMC2PMID=True)
        excludePMIDs = pmid2pmcDB.getAllPMIDs()
        print("Got", len(excludePMIDs), "exclude PMIDs")

        if len(excludePMIDs) > 5:
            print(list(excludePMIDs)[:5])

    print(datetime.datetime.now(), "Finished PMID2PMC")

    print(datetime.datetime.now(), "Loading mirel")

    testRels = None  # TestRelLoader.loadFromFile(pmidBase + "/test_rels_4")

    print(datetime.datetime.now(), "Loading mirel PMID")
    mirelPMIDhsa = MiGenRelDB.loadFromFile(pmidBase + "/mirna_gene.hsa.pmid",
                                           ltype="mirna",
                                           rtype="gene",
                                           normGeneSymbols=normGeneSymbols,
                                           switchLR=True,
                                           excludeIDs=excludePMIDs)
    mirelPMIDmmu = MiGenRelDB.loadFromFile(pmidBase + "/mirna_gene.mmu.pmid",
                                           ltype="mirna",
                                           rtype="gene",
                                           normGeneSymbols=normGeneSymbols,
                                           switchLR=True,
                                           excludeIDs=excludePMIDs)

    print(datetime.datetime.now(), "Loading mirel PMC")
    mirelPMChsa = None
    mirelPMCmmu = None

    if args.load_pmc:
        mirelPMChsa = MiGenRelDB.loadFromFile(pmcBase + "/mirna_gene.hsa.pmid",
                                              ltype="mirna",
                                              rtype="gene",
                                              normGeneSymbols=normGeneSymbols,
                                              switchLR=True)
        mirelPMCmmu = MiGenRelDB.loadFromFile(pmcBase + "/mirna_gene.mmu.pmid",
                                              ltype="mirna",
                                              rtype="gene",
                                              normGeneSymbols=normGeneSymbols,
                                              switchLR=True)

    lncMirPMID = None  #MiGenRelDB.loadFromFile(pmidBase + "/lncrna_mirna.pmid", ltype="lncrna", rtype="mirna")
    geneLncPMID = None  #MiGenRelDB.loadFromFile(pmidBase + "/gene_lncrna.pmid", ltype="gene", rtype="lncrna")

    print(datetime.datetime.now(), "Finished mirel")

    print(datetime.datetime.now(), "Loading Dates")
    dateDB = PubmedDateDB.loadFromFile(pmidBase + "/allpmids.date")

    if args.load_pmc:
        pmc_dateDB = PubmedDateDB.loadFromFile(pmcBase + "/allpmc.date")
        dateDB.add_database(pmc_dateDB)

    print(datetime.datetime.now(), "Finished Dates")

    print(datetime.datetime.now(), "Loading mirWalk")
    mirWalkMMU3UTRDB = None  #MirWalkDB.loadFromFile('/mnt/c/ownCloud/data/miRExplore/mirwalk/mmu_miRWalk_3UTR.txt', org="mmu", bindSite="3UTR", normGeneSymbols=normGeneSymbols)
    print(datetime.datetime.now(), "Loading mirWalk finished")

    relDBs = [
        recordsDB, mirtarbaseDB, dianaDB, mirelPMIDhsa, mirelPMIDmmu,
        mirelPMChsa, mirelPMCmmu, lncMirPMID, geneLncPMID, mirandaDB_mm10,
        mirWalkMMU3UTRDB
    ]
    relDBs = [x for x in relDBs if x != None]

    mirFeedback = feedbackDB(args.feedback)

    requiredDocuments = set()
    for relDB in relDBs:
        requiredDocuments = requiredDocuments.union(
            relDB.get_evidence_docids())

    print("Requiring", len(requiredDocuments), "documents")

    print(datetime.datetime.now(), "Loading sents")
    print(datetime.datetime.now(), "Loading sents PMID")
    sentDB = SentenceDB.loadFromFile(args.sentdir,
                                     pmidBase + "/pmid2sent",
                                     requiredIDs=requiredDocuments)

    if args.load_pmc:
        print(datetime.datetime.now(), "Loading sents PMC")
        sentDBPMC = SentenceDB.loadFromFile(args.sentdir_pmc,
                                            pmcBase + "/pmc2sent",
                                            requiredIDs=requiredDocuments)
        print(datetime.datetime.now(), "Merging sentence DBs")
        sentDB.add_database(sentDBPMC)
    print(datetime.datetime.now(), "Finished sents")

    allDBsPMID = None
    if os.path.isfile(pmidBase + "/dbs.pickle"):
        print(datetime.datetime.now(), "Loading pickle PMID")
        with open(pmidBase + "/dbs.pickle", 'rb') as fin:
            allDBsPMID = pickle.load(fin)

        pmid2go = allDBsPMID[0]
        pmid2disease = allDBsPMID[1]
        pmid2fma = allDBsPMID[2]
        pmid2cell = allDBsPMID[3]
        pmid2ncit = allDBsPMID[4]

        print(datetime.datetime.now(), "Loading pickle PMID ended")

    allDBsPMC = None
    if os.path.isfile(pmcBase + "/dbs.pickle") and args.load_pmc:
        print(datetime.datetime.now(), "Loading pickle PMC")
        with open(pmidBase + "/dbs.pickle", 'rb') as fin:
            allDBsPMC = pickle.load(fin)

        pmc2go = allDBsPMC[0]
        pmc2disease = allDBsPMC[1]
        pmc2fma = allDBsPMC[2]
        pmc2cell = allDBsPMC[3]
        pmc2ncit = allDBsPMC[4]

        print(datetime.datetime.now(), "Loading pickle PMC ended")

    print(datetime.datetime.now(), "Loading ontologies")

    diseaseObo = GeneOntology(args.obodir + "/doid.obo")
    goObo = GeneOntology(args.obodir + "/go.obo")
    cellObo = GeneOntology(args.obodir + "/meta_cells.obo")
    ncitObo = GeneOntology(args.obodir + "/ncit.obo")
    fmaObo = GeneOntology(args.obodir + "/fma_obo.obo")

    print(datetime.datetime.now(), "Loading ontologies finished")

    if allDBsPMID is None:
        pmid2go = None
        pmid2disease = None
        pmid2fma = None
        pmid2cell = None
        pmid2ncit = None

        print(datetime.datetime.now(), "Loading GO")
        pmid2go = PMID2XDB.loadFromFile(pmidBase + "/go.pmid", goObo,
                                        requiredDocuments)
        print(datetime.datetime.now(), "Loading Disease")
        pmid2disease = PMID2XDB.loadFromFile(pmidBase + "/disease.pmid",
                                             diseaseObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading FMA")
        pmid2fma = PMID2XDB.loadFromFile(pmidBase + "/model_anatomy.pmid",
                                         fmaObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading cellline")
        pmid2cell = PMID2XDB.loadFromFile(pmidBase + "/celllines.pmid",
                                          cellObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading ncit")
        pmid2ncit = PMID2XDB.loadFromFile(pmidBase + "/ncit.pmid", ncitObo,
                                          requiredDocuments)

        allDBsPMID = (pmid2go, pmid2disease, pmid2fma, pmid2cell, pmid2ncit)

        print(datetime.datetime.now(), "Writing Pickle")

        with open(pmidBase + "/dbs.pickle", 'wb') as fout:
            pickle.dump(allDBsPMID, fout)

        print(datetime.datetime.now(), "Finished Writing Pickle")

    if allDBsPMC is None and args.load_pmc:
        pmc2go = None
        pmc2disease = None
        pmc2fma = None
        pmc2cell = None
        pmc2ncit = None

        print(datetime.datetime.now(), "Loading GO")
        pmc2go = PMID2XDB.loadFromFile(pmcBase + "/go.pmid", goObo,
                                       requiredDocuments)
        print(datetime.datetime.now(), "Loading Disease")
        pmc2disease = PMID2XDB.loadFromFile(pmcBase + "/disease.pmid",
                                            diseaseObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading FMA")
        pmc2fma = PMID2XDB.loadFromFile(pmcBase + "/model_anatomy.pmid",
                                        fmaObo, requiredDocuments)
        print(datetime.datetime.now(), "Loading cellline")
        pmc2cell = PMID2XDB.loadFromFile(pmcBase + "/celllines.pmid", cellObo,
                                         requiredDocuments)
        print(datetime.datetime.now(), "Loading ncit")
        pmc2ncit = PMID2XDB.loadFromFile(pmcBase + "/ncit.pmid", ncitObo,
                                         requiredDocuments)

        allDBsPMID = (pmc2go, pmc2disease, pmc2fma, pmc2cell, pmc2ncit)

        print(datetime.datetime.now(), "Writing Pickle")

        with open(pmcBase + "/dbs.pickle", 'wb') as fout:
            pickle.dump(allDBsPMID, fout)

        print(datetime.datetime.now(), "Finished Writing Pickle")

    if args.load_pmc:
        print(datetime.datetime.now(), "Merging Context DBs")
        print(datetime.datetime.now(), "Merging Context GO")
        pmid2go.add_database(pmc2go)
        print(datetime.datetime.now(), "Merging Context DISEASE")
        pmid2disease.add_database(pmc2disease)
        print(datetime.datetime.now(), "Merging Context FMA")
        pmid2fma.add_database(pmc2fma)
        print(datetime.datetime.now(), "Merging Context CELL")
        pmid2cell.add_database(pmc2cell)
        print(datetime.datetime.now(), "Merging Context NCIT")
        pmid2ncit.add_database(pmc2ncit)
        print(datetime.datetime.now(), "Finished Merging Context DBs")

    if celllInfos != None:
        print(datetime.datetime.now(), "Adding CelllInfo Features")
        for celllInfo in celllInfos:
            pmid2cell.docid2info[celllInfo['docid']].append(celllInfo)

        print(datetime.datetime.now(), "Finished Adding CelllInfo Features")

    print(datetime.datetime.now(), "Loading Features")
    #rfDB = RFamDB.loadFromFile(fileurl + "/dbs/rfam.regions.mirexplore")
    #featureViewerMMU = FeatureViewer('mmu', args.obodir, rfamDB=rfDB)
    #featureViewerHSA = FeatureViewer('hsa', args.obodir, rfamDB=rfDB)

    print(datetime.datetime.now(), "Loading Features finished")
    print(datetime.datetime.now(), "Loading finished")