예제 #1
0
import glob
import os

from mjoppich.geneontology import GeneOntology
from porestat.utils.Parallel import MapReduce

from database.Neo4JInterface import neo4jInterface
from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, eprint

resultBase = dataDir + "/miRExplore/textmine/results/"
diseaseMap = SynfileMap(resultBase + "/disease/synfile.map")
diseaseMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))
diseaseObo = GeneOntology(dataDir + "miRExplore/doid.obo")

db = neo4jInterface(simulate=False)
db.deleteRelationship('n', ['DISEASE'], None, 'm', ['PUBMED'], None,
                      ['DISEASE_MENTION'], None)

allfiles = glob.glob(resultBase + "/hgnc/medline17n*.index")
allfileIDs = [
    int(os.path.basename(x).replace('medline17n', '').replace('.index', ''))
    for x in allfiles
]
allfileIDs = sorted(allfileIDs, reverse=True)

addUnknownPubmeds = False

retVal = db.matchNodes(['PUBMED'], None, nodename='n')
relevantPMIDs = set()
예제 #2
0
                        '--resultdir',
                        type=str,
                        help='where are all the index-files?',
                        required=True)
    parser.add_argument('-d',
                        '--datadir',
                        type=str,
                        help='where is te miRExplore bsae?',
                        required=True)

    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    diseaseSyns = SynfileMap(resultBase + "/model_anatomy/synfile.map")
    diseaseSyns.loadSynFiles(
        ('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/model_anatomy/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    fmaObo = GeneOntology(args.datadir +
                          "miRExplore/foundational_model_anatomy/fma_obo.obo")

    def getTerm(synid, obo):

        if synid in obo.dTerms:
예제 #3
0
from collections import Counter

from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, loadExludeWords

resultBase = dataDir + "/miRExplore/textmine/results/"
indexFoundSyns = Counter()
excludedSyns = loadExludeWords()

checkResultsFor = 'disease'
analyseFiles = 100
maxFiles = 892

checkSynsMap = SynfileMap(resultBase + "/" + checkResultsFor + "/synfile.map")
checkSynsMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

for splitFileID in range(maxFiles, maxFiles - analyseFiles - 1, -1):

    fileID = "{:>4}".format(splitFileID).replace(" ", "0")

    print(fileID)

    indexFile = resultBase + "/" + checkResultsFor + "/medline17n" + fileID + ".index"
    foundHits = SyngrepHitFile(indexFile, checkSynsMap)

    for doc in foundHits:

        docHits = foundHits.getHitsForDocument(doc)

        for hit in docHits:
예제 #4
0
import glob
import os

from mjoppich.geneontology import GeneOntology
from porestat.utils.Parallel import MapReduce

from database.Neo4JInterface import neo4jInterface
from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, speciesName2TaxID, eprint

resultBase = dataDir + "/miRExplore/textmine/results/"
celllinesMap = SynfileMap(resultBase + "/cellline/synfile.map")
celllinesMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

knownTaxIDs = set()
knownTaxIDs.add('all')
for org in speciesName2TaxID:
    knownTaxIDs.add(str(speciesName2TaxID[org]))

synfileID2tax = {}
for synfileID in celllinesMap.synfiles:
    synfileName = celllinesMap.synfiles[synfileID]

    hitOrgs = []
    for org in knownTaxIDs:
        if "." + org + "." in synfileName:
            hitOrgs.append(org)

    if len(hitOrgs) != 1:
        print("No or multiple files for org: " + str(synfileName) + " " +
예제 #5
0
import re

from database.ORGMIRs import ORGMIRDB
from synonymes.SynfileMap import SynfileMap
from synonymes.SynonymFile import Synfile
from synonymes.mirnaID import miRNA, miRNAPART
from textmining.SentenceDB import SentenceDB, RegPos
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import ltype2label, makeDBGeneID, mirtarbase_exp_type, mirtarbase_function_label, speciesName2TaxID, \
    dataDir
from database.Neo4JInterface import neo4jInterface
from utils.parallel import MapReduce
from enum import Enum

resultBase = dataDir + "/miRExplore/textmine/results/"
mirnaSyns = SynfileMap(resultBase + "/mirna/synfile.map")
mirnaSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

hgncSyns = SynfileMap(resultBase + "/hgnc/synfile.map")
hgncSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

db = None

if False:

    db = neo4jInterface(simulate=False)
    db.deleteRelationship('n', ['GENE'], None, 'm', ['PUBMED'], None,
                          ['ST_MENTION'], None, 'r')

    db.deleteRelationship('n', ['PUBMED_AUTHOR'], None, 'm', ['PUBMED'], None,
                          ['IS_AUTHOR'], None, 'r')
예제 #6
0
    nlp_ent = spacy.load(args.nlpent)
    print("NLPs loaded", file=sys.stderr)

    print("Creating relChecker", file=sys.stderr)
    relChecker = SentenceRelationChecker(nlp, nlp_ent)
    print("Creating relClassifier", file=sys.stderr)
    relClassifier = SentenceRelationClassifier(args.datadir +
                                               '/obodir/allrels.csv')
    print("miRExplore relation extraction models loaded", file=sys.stderr)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir
    dataDir = args.datadir

    print("Getting Folder1 synfile.map", file=sys.stderr)
    ent1Syns = SynfileMap(resultBase + "/" + args.folder1 + "/synfile.map")
    ent1Syns.loadSynFiles((args.mine_path, dataDir))

    print("Getting Folder2 synfile.map", file=sys.stderr)
    ent2Syns = SynfileMap(resultBase + "/" + args.folder2 + "/synfile.map")
    ent2Syns.loadSynFiles((args.mine_path, dataDir))

    print("Getting relations synfile.map", file=sys.stderr)
    relSyns = SynfileMap(resultBase + "/relations/synfile.map")
    relSyns.loadSynFiles((args.mine_path, dataDir))

    print("Getting obodir/allrels.csv", file=sys.stderr)
    relationSyns = AssocSynfile(args.datadir + '/obodir/allrels.csv')
    print("All maps loaded", file=sys.stderr)

    accept_pmids = None
예제 #7
0
    parser.add_argument('-d', '--datadir', type=str, help='where is te miRExplore bsae?', required=True)

    parser.add_argument('-f1', '--folder1', type=str, help='entity 1: hgnc, mirna', default="hgnc", required=False)
    parser.add_argument('-f2', '--folder2', type=str, help='entity 2: mgi, mirna', default="mirna", required=False)

    parser.add_argument('-ft1', '--folderType1', type=str, help='entity type 1: entity: mirna, gene, lncrna, ...', default="gene", required=False)
    parser.add_argument('-ft2', '--folderType2', type=str, help='entity type 2: entity: mirna', default="mirna", required=False)


    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir
    dataDir = args.datadir

    ent1Syns = SynfileMap(resultBase + "/"+args.folder1+"/synfile.map")
    ent1Syns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir))

    ent2Syns = SynfileMap(resultBase + "/"+args.folder2+"/synfile.map")
    ent2Syns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir))

    relSyns = SynfileMap(resultBase + "/relations/synfile.map")
    relSyns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir))

    relationSyns = AssocSynfile(args.datadir + '/miRExplore/relations/allrels.csv')


    idTuple2Pubmed = defaultdict(set)
    orgmirDB = ORGMIRDB(dataDir + "/miRExplore/orgmir.tsv")

    allfiles = glob.glob(resultBase + "/"+args.folder1+"/*.index")
예제 #8
0
    if args.accept_pmids != None:

        accept_pmids = set()

        for line in args.accept_pmids:

            line = line.strip()

            if len(line) > 0:
                accept_pmids.add(line)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    oboSyns = SynfileMap(resultBase + "/synfile.map")
    oboSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.obo.name)

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)
    ['FMA:67498', 'FMA:9637', 'FMA:68646'])
doidSynIDs = getSynIDs(dataDir + "miRExplore/doid.obo", ['DOID:104'])
goSynIDs = getSynIDs(dataDir + "miRExplore/textmine/neutrophils.obo",
                     ['NP:001'])

for x in neutrophilSynIDs:
    if x in tissueIDs:
        tissueIDs.remove(x)

print(neutrophilSynIDs)
print(tissueIDs)
print(doidSynIDs)
print(goSynIDs)

resultBase = dataDir + "/miRExplore/textmine/results/"
fmaSyns = SynfileMap(resultBase + "/model_anatomy/synfile.map")
fmaSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

doidSyns = SynfileMap(resultBase + "/disease/synfile.map")
doidSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

goSyns = SynfileMap(resultBase + "/neutrophils/synfile.map")
goSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

allfiles = glob.glob(resultBase + "/hgnc/pubmed18n*.index")
allfileIDs = [
    int(os.path.basename(x).replace('pubmed18n', '').replace('.index', ''))
    for x in allfiles
]
allfileIDs = sorted(allfileIDs, reverse=True)
예제 #10
0
    if args.accept_pmids != None:

        accept_pmids = set()

        for line in args.accept_pmids:

            line = line.strip()

            if len(line) > 0:
                accept_pmids.add(line)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    oboSyns = SynfileMap(resultBase + "/synfile.map")
    oboSyns.loadSynFiles((args.mine_path, args.datadir))

    allfiles = glob.glob(resultBase + "/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.obo.name)

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)