import glob import os from mjoppich.geneontology import GeneOntology from porestat.utils.Parallel import MapReduce from database.Neo4JInterface import neo4jInterface from synonymes.SynfileMap import SynfileMap from textmining.SyngrepHitFile import SyngrepHitFile from utils.idutils import dataDir, eprint resultBase = dataDir + "/miRExplore/textmine/results/" diseaseMap = SynfileMap(resultBase + "/disease/synfile.map") diseaseMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) diseaseObo = GeneOntology(dataDir + "miRExplore/doid.obo") db = neo4jInterface(simulate=False) db.deleteRelationship('n', ['DISEASE'], None, 'm', ['PUBMED'], None, ['DISEASE_MENTION'], None) allfiles = glob.glob(resultBase + "/hgnc/medline17n*.index") allfileIDs = [ int(os.path.basename(x).replace('medline17n', '').replace('.index', '')) for x in allfiles ] allfileIDs = sorted(allfileIDs, reverse=True) addUnknownPubmeds = False retVal = db.matchNodes(['PUBMED'], None, nodename='n') relevantPMIDs = set()
'--resultdir', type=str, help='where are all the index-files?', required=True) parser.add_argument('-d', '--datadir', type=str, help='where is te miRExplore bsae?', required=True) args = parser.parse_args() #resultBase = dataDir + "/miRExplore/textmine/results_pmc/" resultBase = args.resultdir diseaseSyns = SynfileMap(resultBase + "/model_anatomy/synfile.map") diseaseSyns.loadSynFiles( ('/home/users/joppich/ownCloud/data/', args.datadir)) allfiles = glob.glob(resultBase + "/model_anatomy/*.index") allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles] allfileIDs = sorted(allfileIDs, reverse=True) #allfileIDs = [894] fmaObo = GeneOntology(args.datadir + "miRExplore/foundational_model_anatomy/fma_obo.obo") def getTerm(synid, obo): if synid in obo.dTerms:
from collections import Counter from synonymes.SynfileMap import SynfileMap from textmining.SyngrepHitFile import SyngrepHitFile from utils.idutils import dataDir, loadExludeWords resultBase = dataDir + "/miRExplore/textmine/results/" indexFoundSyns = Counter() excludedSyns = loadExludeWords() checkResultsFor = 'disease' analyseFiles = 100 maxFiles = 892 checkSynsMap = SynfileMap(resultBase + "/" + checkResultsFor + "/synfile.map") checkSynsMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) for splitFileID in range(maxFiles, maxFiles - analyseFiles - 1, -1): fileID = "{:>4}".format(splitFileID).replace(" ", "0") print(fileID) indexFile = resultBase + "/" + checkResultsFor + "/medline17n" + fileID + ".index" foundHits = SyngrepHitFile(indexFile, checkSynsMap) for doc in foundHits: docHits = foundHits.getHitsForDocument(doc) for hit in docHits:
import glob import os from mjoppich.geneontology import GeneOntology from porestat.utils.Parallel import MapReduce from database.Neo4JInterface import neo4jInterface from synonymes.SynfileMap import SynfileMap from textmining.SyngrepHitFile import SyngrepHitFile from utils.idutils import dataDir, speciesName2TaxID, eprint resultBase = dataDir + "/miRExplore/textmine/results/" celllinesMap = SynfileMap(resultBase + "/cellline/synfile.map") celllinesMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) knownTaxIDs = set() knownTaxIDs.add('all') for org in speciesName2TaxID: knownTaxIDs.add(str(speciesName2TaxID[org])) synfileID2tax = {} for synfileID in celllinesMap.synfiles: synfileName = celllinesMap.synfiles[synfileID] hitOrgs = [] for org in knownTaxIDs: if "." + org + "." in synfileName: hitOrgs.append(org) if len(hitOrgs) != 1: print("No or multiple files for org: " + str(synfileName) + " " +
import re from database.ORGMIRs import ORGMIRDB from synonymes.SynfileMap import SynfileMap from synonymes.SynonymFile import Synfile from synonymes.mirnaID import miRNA, miRNAPART from textmining.SentenceDB import SentenceDB, RegPos from textmining.SyngrepHitFile import SyngrepHitFile from utils.idutils import ltype2label, makeDBGeneID, mirtarbase_exp_type, mirtarbase_function_label, speciesName2TaxID, \ dataDir from database.Neo4JInterface import neo4jInterface from utils.parallel import MapReduce from enum import Enum resultBase = dataDir + "/miRExplore/textmine/results/" mirnaSyns = SynfileMap(resultBase + "/mirna/synfile.map") mirnaSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) hgncSyns = SynfileMap(resultBase + "/hgnc/synfile.map") hgncSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) db = None if False: db = neo4jInterface(simulate=False) db.deleteRelationship('n', ['GENE'], None, 'm', ['PUBMED'], None, ['ST_MENTION'], None, 'r') db.deleteRelationship('n', ['PUBMED_AUTHOR'], None, 'm', ['PUBMED'], None, ['IS_AUTHOR'], None, 'r')
nlp_ent = spacy.load(args.nlpent) print("NLPs loaded", file=sys.stderr) print("Creating relChecker", file=sys.stderr) relChecker = SentenceRelationChecker(nlp, nlp_ent) print("Creating relClassifier", file=sys.stderr) relClassifier = SentenceRelationClassifier(args.datadir + '/obodir/allrels.csv') print("miRExplore relation extraction models loaded", file=sys.stderr) #resultBase = dataDir + "/miRExplore/textmine/results_pmc/" resultBase = args.resultdir dataDir = args.datadir print("Getting Folder1 synfile.map", file=sys.stderr) ent1Syns = SynfileMap(resultBase + "/" + args.folder1 + "/synfile.map") ent1Syns.loadSynFiles((args.mine_path, dataDir)) print("Getting Folder2 synfile.map", file=sys.stderr) ent2Syns = SynfileMap(resultBase + "/" + args.folder2 + "/synfile.map") ent2Syns.loadSynFiles((args.mine_path, dataDir)) print("Getting relations synfile.map", file=sys.stderr) relSyns = SynfileMap(resultBase + "/relations/synfile.map") relSyns.loadSynFiles((args.mine_path, dataDir)) print("Getting obodir/allrels.csv", file=sys.stderr) relationSyns = AssocSynfile(args.datadir + '/obodir/allrels.csv') print("All maps loaded", file=sys.stderr) accept_pmids = None
parser.add_argument('-d', '--datadir', type=str, help='where is te miRExplore bsae?', required=True) parser.add_argument('-f1', '--folder1', type=str, help='entity 1: hgnc, mirna', default="hgnc", required=False) parser.add_argument('-f2', '--folder2', type=str, help='entity 2: mgi, mirna', default="mirna", required=False) parser.add_argument('-ft1', '--folderType1', type=str, help='entity type 1: entity: mirna, gene, lncrna, ...', default="gene", required=False) parser.add_argument('-ft2', '--folderType2', type=str, help='entity type 2: entity: mirna', default="mirna", required=False) args = parser.parse_args() #resultBase = dataDir + "/miRExplore/textmine/results_pmc/" resultBase = args.resultdir dataDir = args.datadir ent1Syns = SynfileMap(resultBase + "/"+args.folder1+"/synfile.map") ent1Syns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir)) ent2Syns = SynfileMap(resultBase + "/"+args.folder2+"/synfile.map") ent2Syns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir)) relSyns = SynfileMap(resultBase + "/relations/synfile.map") relSyns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir)) relationSyns = AssocSynfile(args.datadir + '/miRExplore/relations/allrels.csv') idTuple2Pubmed = defaultdict(set) orgmirDB = ORGMIRDB(dataDir + "/miRExplore/orgmir.tsv") allfiles = glob.glob(resultBase + "/"+args.folder1+"/*.index")
if args.accept_pmids != None: accept_pmids = set() for line in args.accept_pmids: line = line.strip() if len(line) > 0: accept_pmids.add(line) #resultBase = dataDir + "/miRExplore/textmine/results_pmc/" resultBase = args.resultdir oboSyns = SynfileMap(resultBase + "/synfile.map") oboSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', args.datadir)) allfiles = glob.glob(resultBase + "/*.index") allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles] allfileIDs = sorted(allfileIDs, reverse=True) #allfileIDs = [894] celloObo = GeneOntology(args.obo.name) def getTerm(synid, obo): if synid in obo.dTerms: return obo.getID(synid)
['FMA:67498', 'FMA:9637', 'FMA:68646']) doidSynIDs = getSynIDs(dataDir + "miRExplore/doid.obo", ['DOID:104']) goSynIDs = getSynIDs(dataDir + "miRExplore/textmine/neutrophils.obo", ['NP:001']) for x in neutrophilSynIDs: if x in tissueIDs: tissueIDs.remove(x) print(neutrophilSynIDs) print(tissueIDs) print(doidSynIDs) print(goSynIDs) resultBase = dataDir + "/miRExplore/textmine/results/" fmaSyns = SynfileMap(resultBase + "/model_anatomy/synfile.map") fmaSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) doidSyns = SynfileMap(resultBase + "/disease/synfile.map") doidSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) goSyns = SynfileMap(resultBase + "/neutrophils/synfile.map") goSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) allfiles = glob.glob(resultBase + "/hgnc/pubmed18n*.index") allfileIDs = [ int(os.path.basename(x).replace('pubmed18n', '').replace('.index', '')) for x in allfiles ] allfileIDs = sorted(allfileIDs, reverse=True)
if args.accept_pmids != None: accept_pmids = set() for line in args.accept_pmids: line = line.strip() if len(line) > 0: accept_pmids.add(line) #resultBase = dataDir + "/miRExplore/textmine/results_pmc/" resultBase = args.resultdir oboSyns = SynfileMap(resultBase + "/synfile.map") oboSyns.loadSynFiles((args.mine_path, args.datadir)) allfiles = glob.glob(resultBase + "/*.index") allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles] allfileIDs = sorted(allfileIDs, reverse=True) #allfileIDs = [894] celloObo = GeneOntology(args.obo.name) def getTerm(synid, obo): if synid in obo.dTerms: return obo.getID(synid)