'termname': cellline, 'evidences': [] } foundCellInfos.append(celllInfo) entry = DIANATarbaseEntry( (tuple(docOrgs), cellline, tissue, method, measure, direction), docID, (geneID, "gene"), (mirna, "mirna"), "DIANA", idx) ret.ltype2rel[geneID].add(entry) ret.rtype2rel[mirna].add(entry) ret.all_ltypes.add(geneID) ret.all_rtypes.add(mirna) return ret, foundCellInfos if __name__ == '__main__': normGeneSymbols = normalize_gene_names( path="/mnt/c/ownCloud/data/miRExplore/obodir/" + "/hgnc_no_withdrawn.syn") ret, celllinfo = DIANATarbaseDB.loadFromFile( "/mnt/c/ownCloud/data/miRExplore/diana/hsa_mmu.diana.csv", normGeneSymbols=normGeneSymbols) for x in ret.get_rels('gene', 'CXCR4'): print(x.toJSON())
import re from textdb.MiGenRelDB import MiGenRelDB from utils.tmutils import normalize_gene_names mainPath = "/mnt/d/owncloud/data/miRExplore/" normGeneSymbols = normalize_gene_names(path=mainPath + "/obodir/" + "/hgnc_no_withdrawn.syn") mirelPMIDhsa = MiGenRelDB.loadFromFile( mainPath + "/textmine/aggregated_pmid/" + "/mirna_gene.hsa.pmid", ltype="mirna", rtype="gene", normGeneSymbols=normGeneSymbols, switchLR=True) print(mirelPMIDhsa.get_rels("mirna", "miR-758")) exit(0) def makeListingGroups(baseHits, conjunts): resElems = {} for baseHit in baseHits: spos = baseHit.start() epos = baseHit.end() curGroup = []
scaiBase = "/mnt/d/owncloud/data/miRExplore/scai_corpus/" if sys.argv[1].upper() == "TRAIN": scaiFile = "miRNA_train_fixed.xml" elif sys.argv[1].upper() == "TEST": scaiFile = "miRNA_test_fixed.xml" else: exit(-1) sentFile = open(sys.argv[2], 'w') synFile = open(sys.argv[3], 'w') print(sentFile.name) print(synFile.name) normGeneSymbols = normalize_gene_names(path=scaiBase + "/../obodir/" + "/hgnc_no_withdrawn.syn") relexAccepted = [] with open("relexfiles/scai_" + sys.argv[1].lower() + "_relex.out") as fin: wasRelation = False curSentID = None for line in fin: if line.startswith(">"): curSentID = line.strip()[1:] if line.startswith("#RELATIONS:"): wasRelation = True continue
def start_app_from_args(args): global mirFeedback global mirandaDB_mm10 global relDBs global diseaseObo global goObo global cellObo global pmid2go global pmid2disease global pmid2fma global pmid2cell global testRels global mirelPMID global sentDB global featureViewer global symbol2ensemblDB global pmid2ncit global ncitObo global humanGeneNeighbourDB global mouseGeneNeighbourDB global geneNeighbourHoods global mi2mirna global dateDB pmidBase = args.textmine + '/aggregated_pmid/' pmcBase = args.textmine + '/aggregated_pmc/' normGeneSymbols = normalize_gene_names(path=fileurl + "/hgnc_no_withdrawn.syn") #mouseGeneNeighbourDB = GeneNeighbourDB.loadFromFile("mmu", inputgff=args.obodir + "/mm10_primary_assembly_and_lncRNA.gtf") #geneNeighbourHoods[mouseGeneNeighbourDB.orgid] = mouseGeneNeighbourDB print(datetime.datetime.now(), "Loading PMID2PMC") # allInteractions = defaultdict(list) print(datetime.datetime.now(), "Loading Sym2Ens") symbol2ensemblDB = SymbolEnsemblDB.loadFromFile(fileurl + "/sym2ens/") print(datetime.datetime.now(), "Loading MI2Mirna") mi2mirna = MI2Mirna.loadFromFile(fileurl + "/dbs/mirnas_mirbase.csv") print(datetime.datetime.now(), "Loading miranda interactions mm10") # mirandaDB_mm10 = MirandaRelDB.loadFromFile(filepath=args.obodir + "/mm10_interactionsAllGenes.txt", symbol2ens=symbol2ensemblDB, org="mmu") # mirandaDB_hg38 = MirandaRelDB.loadFromFile(filepath=args.obodir + "/hg38_interactionsAllGenes.txt", org="hsa") mirandaDB_mm10 = None mirandaDB_hg38 = None recordsDB = None mirtarbaseDB = None dianaDB, celllInfos = None, None if args.load_mirecords: print(datetime.datetime.now(), "Loading miRecords") recordsDB = miRecordDB.loadFromFile(filelocation=fileurl + "/dbs/mirecords_v4.xlsx", normGeneSymbols=normGeneSymbols) if args.load_mirtarbase: print(datetime.datetime.now(), "Loading miRTarBase") mirtarbaseDB = MirTarBaseDB.loadFromFile( filepath=fileurl + "/dbs/miRTarBase.csv", normGeneSymbols=normGeneSymbols) if args.load_diana: print(datetime.datetime.now(), "Loading hsa_mmu.diana") dianaDB, celllInfos = DIANATarbaseDB.loadFromFile( fileurl + "/dbs/hsa_mmu.diana.csv", normGeneSymbols=normGeneSymbols) allDBS = None print(datetime.datetime.now(), "Loading PMID2PMC") pmid2pmcDB = None excludePMIDs = None if args.load_pmc: pmid2pmcDB = PMID2PMCDB.loadFromFile(pmcBase + '/pmc2pmid', PMC2PMID=True) excludePMIDs = pmid2pmcDB.getAllPMIDs() print("Got", len(excludePMIDs), "exclude PMIDs") if len(excludePMIDs) > 5: print(list(excludePMIDs)[:5]) print(datetime.datetime.now(), "Finished PMID2PMC") print(datetime.datetime.now(), "Loading mirel") testRels = None # TestRelLoader.loadFromFile(pmidBase + "/test_rels_4") print(datetime.datetime.now(), "Loading mirel PMID") mirelPMIDhsa = MiGenRelDB.loadFromFile(pmidBase + "/mirna_gene.hsa.pmid", ltype="mirna", rtype="gene", normGeneSymbols=normGeneSymbols, switchLR=True, excludeIDs=excludePMIDs) mirelPMIDmmu = MiGenRelDB.loadFromFile(pmidBase + "/mirna_gene.mmu.pmid", ltype="mirna", rtype="gene", normGeneSymbols=normGeneSymbols, switchLR=True, excludeIDs=excludePMIDs) print(datetime.datetime.now(), "Loading mirel PMC") mirelPMChsa = None mirelPMCmmu = None if args.load_pmc: mirelPMChsa = MiGenRelDB.loadFromFile(pmcBase + "/mirna_gene.hsa.pmid", ltype="mirna", rtype="gene", normGeneSymbols=normGeneSymbols, switchLR=True) mirelPMCmmu = MiGenRelDB.loadFromFile(pmcBase + "/mirna_gene.mmu.pmid", ltype="mirna", rtype="gene", normGeneSymbols=normGeneSymbols, switchLR=True) lncMirPMID = None #MiGenRelDB.loadFromFile(pmidBase + "/lncrna_mirna.pmid", ltype="lncrna", rtype="mirna") geneLncPMID = None #MiGenRelDB.loadFromFile(pmidBase + "/gene_lncrna.pmid", ltype="gene", rtype="lncrna") print(datetime.datetime.now(), "Finished mirel") print(datetime.datetime.now(), "Loading Dates") dateDB = PubmedDateDB.loadFromFile(pmidBase + "/allpmids.date") if args.load_pmc: pmc_dateDB = PubmedDateDB.loadFromFile(pmcBase + "/allpmc.date") dateDB.add_database(pmc_dateDB) print(datetime.datetime.now(), "Finished Dates") print(datetime.datetime.now(), "Loading mirWalk") mirWalkMMU3UTRDB = None #MirWalkDB.loadFromFile('/mnt/c/ownCloud/data/miRExplore/mirwalk/mmu_miRWalk_3UTR.txt', org="mmu", bindSite="3UTR", normGeneSymbols=normGeneSymbols) print(datetime.datetime.now(), "Loading mirWalk finished") relDBs = [ recordsDB, mirtarbaseDB, dianaDB, mirelPMIDhsa, mirelPMIDmmu, mirelPMChsa, mirelPMCmmu, lncMirPMID, geneLncPMID, mirandaDB_mm10, mirWalkMMU3UTRDB ] relDBs = [x for x in relDBs if x != None] mirFeedback = feedbackDB(args.feedback) requiredDocuments = set() for relDB in relDBs: requiredDocuments = requiredDocuments.union( relDB.get_evidence_docids()) print("Requiring", len(requiredDocuments), "documents") print(datetime.datetime.now(), "Loading sents") print(datetime.datetime.now(), "Loading sents PMID") sentDB = SentenceDB.loadFromFile(args.sentdir, pmidBase + "/pmid2sent", requiredIDs=requiredDocuments) if args.load_pmc: print(datetime.datetime.now(), "Loading sents PMC") sentDBPMC = SentenceDB.loadFromFile(args.sentdir_pmc, pmcBase + "/pmc2sent", requiredIDs=requiredDocuments) print(datetime.datetime.now(), "Merging sentence DBs") sentDB.add_database(sentDBPMC) print(datetime.datetime.now(), "Finished sents") allDBsPMID = None if os.path.isfile(pmidBase + "/dbs.pickle"): print(datetime.datetime.now(), "Loading pickle PMID") with open(pmidBase + "/dbs.pickle", 'rb') as fin: allDBsPMID = pickle.load(fin) pmid2go = allDBsPMID[0] pmid2disease = allDBsPMID[1] pmid2fma = allDBsPMID[2] pmid2cell = allDBsPMID[3] pmid2ncit = allDBsPMID[4] print(datetime.datetime.now(), "Loading pickle PMID ended") allDBsPMC = None if os.path.isfile(pmcBase + "/dbs.pickle") and args.load_pmc: print(datetime.datetime.now(), "Loading pickle PMC") with open(pmidBase + "/dbs.pickle", 'rb') as fin: allDBsPMC = pickle.load(fin) pmc2go = allDBsPMC[0] pmc2disease = allDBsPMC[1] pmc2fma = allDBsPMC[2] pmc2cell = allDBsPMC[3] pmc2ncit = allDBsPMC[4] print(datetime.datetime.now(), "Loading pickle PMC ended") print(datetime.datetime.now(), "Loading ontologies") diseaseObo = GeneOntology(args.obodir + "/doid.obo") goObo = GeneOntology(args.obodir + "/go.obo") cellObo = GeneOntology(args.obodir + "/meta_cells.obo") ncitObo = GeneOntology(args.obodir + "/ncit.obo") fmaObo = GeneOntology(args.obodir + "/fma_obo.obo") print(datetime.datetime.now(), "Loading ontologies finished") if allDBsPMID is None: pmid2go = None pmid2disease = None pmid2fma = None pmid2cell = None pmid2ncit = None print(datetime.datetime.now(), "Loading GO") pmid2go = PMID2XDB.loadFromFile(pmidBase + "/go.pmid", goObo, requiredDocuments) print(datetime.datetime.now(), "Loading Disease") pmid2disease = PMID2XDB.loadFromFile(pmidBase + "/disease.pmid", diseaseObo, requiredDocuments) print(datetime.datetime.now(), "Loading FMA") pmid2fma = PMID2XDB.loadFromFile(pmidBase + "/model_anatomy.pmid", fmaObo, requiredDocuments) print(datetime.datetime.now(), "Loading cellline") pmid2cell = PMID2XDB.loadFromFile(pmidBase + "/celllines.pmid", cellObo, requiredDocuments) print(datetime.datetime.now(), "Loading ncit") pmid2ncit = PMID2XDB.loadFromFile(pmidBase + "/ncit.pmid", ncitObo, requiredDocuments) allDBsPMID = (pmid2go, pmid2disease, pmid2fma, pmid2cell, pmid2ncit) print(datetime.datetime.now(), "Writing Pickle") with open(pmidBase + "/dbs.pickle", 'wb') as fout: pickle.dump(allDBsPMID, fout) print(datetime.datetime.now(), "Finished Writing Pickle") if allDBsPMC is None and args.load_pmc: pmc2go = None pmc2disease = None pmc2fma = None pmc2cell = None pmc2ncit = None print(datetime.datetime.now(), "Loading GO") pmc2go = PMID2XDB.loadFromFile(pmcBase + "/go.pmid", goObo, requiredDocuments) print(datetime.datetime.now(), "Loading Disease") pmc2disease = PMID2XDB.loadFromFile(pmcBase + "/disease.pmid", diseaseObo, requiredDocuments) print(datetime.datetime.now(), "Loading FMA") pmc2fma = PMID2XDB.loadFromFile(pmcBase + "/model_anatomy.pmid", fmaObo, requiredDocuments) print(datetime.datetime.now(), "Loading cellline") pmc2cell = PMID2XDB.loadFromFile(pmcBase + "/celllines.pmid", cellObo, requiredDocuments) print(datetime.datetime.now(), "Loading ncit") pmc2ncit = PMID2XDB.loadFromFile(pmcBase + "/ncit.pmid", ncitObo, requiredDocuments) allDBsPMID = (pmc2go, pmc2disease, pmc2fma, pmc2cell, pmc2ncit) print(datetime.datetime.now(), "Writing Pickle") with open(pmcBase + "/dbs.pickle", 'wb') as fout: pickle.dump(allDBsPMID, fout) print(datetime.datetime.now(), "Finished Writing Pickle") if args.load_pmc: print(datetime.datetime.now(), "Merging Context DBs") print(datetime.datetime.now(), "Merging Context GO") pmid2go.add_database(pmc2go) print(datetime.datetime.now(), "Merging Context DISEASE") pmid2disease.add_database(pmc2disease) print(datetime.datetime.now(), "Merging Context FMA") pmid2fma.add_database(pmc2fma) print(datetime.datetime.now(), "Merging Context CELL") pmid2cell.add_database(pmc2cell) print(datetime.datetime.now(), "Merging Context NCIT") pmid2ncit.add_database(pmc2ncit) print(datetime.datetime.now(), "Finished Merging Context DBs") if celllInfos != None: print(datetime.datetime.now(), "Adding CelllInfo Features") for celllInfo in celllInfos: pmid2cell.docid2info[celllInfo['docid']].append(celllInfo) print(datetime.datetime.now(), "Finished Adding CelllInfo Features") print(datetime.datetime.now(), "Loading Features") #rfDB = RFamDB.loadFromFile(fileurl + "/dbs/rfam.regions.mirexplore") #featureViewerMMU = FeatureViewer('mmu', args.obodir, rfamDB=rfDB) #featureViewerHSA = FeatureViewer('hsa', args.obodir, rfamDB=rfDB) print(datetime.datetime.now(), "Loading Features finished") print(datetime.datetime.now(), "Loading finished")