コード例 #1
0
import glob
import os

from mjoppich.geneontology import GeneOntology
from porestat.utils.Parallel import MapReduce

from database.Neo4JInterface import neo4jInterface
from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, eprint

resultBase = dataDir + "/miRExplore/textmine/results/"
diseaseMap = SynfileMap(resultBase + "/disease/synfile.map")
diseaseMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))
diseaseObo = GeneOntology(dataDir + "miRExplore/doid.obo")

db = neo4jInterface(simulate=False)
db.deleteRelationship('n', ['DISEASE'], None, 'm', ['PUBMED'], None,
                      ['DISEASE_MENTION'], None)

allfiles = glob.glob(resultBase + "/hgnc/medline17n*.index")
allfileIDs = [
    int(os.path.basename(x).replace('medline17n', '').replace('.index', ''))
    for x in allfiles
]
allfileIDs = sorted(allfileIDs, reverse=True)

addUnknownPubmeds = False

retVal = db.matchNodes(['PUBMED'], None, nodename='n')
relevantPMIDs = set()
コード例 #2
0
ファイル: createFMAAssoc.py プロジェクト: mjoppich/miRExplore
                        '--resultdir',
                        type=str,
                        help='where are all the index-files?',
                        required=True)
    parser.add_argument('-d',
                        '--datadir',
                        type=str,
                        help='where is te miRExplore bsae?',
                        required=True)

    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    diseaseSyns = SynfileMap(resultBase + "/model_anatomy/synfile.map")
    diseaseSyns.loadSynFiles(
        ('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/model_anatomy/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    fmaObo = GeneOntology(args.datadir +
                          "miRExplore/foundational_model_anatomy/fma_obo.obo")

    def getTerm(synid, obo):

        if synid in obo.dTerms:
コード例 #3
0
ファイル: tooCommonHits.py プロジェクト: mjoppich/miRExplore
from collections import Counter

from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, loadExludeWords

resultBase = dataDir + "/miRExplore/textmine/results/"
indexFoundSyns = Counter()
excludedSyns = loadExludeWords()

checkResultsFor = 'disease'
analyseFiles = 100
maxFiles = 892

checkSynsMap = SynfileMap(resultBase + "/" + checkResultsFor + "/synfile.map")
checkSynsMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

for splitFileID in range(maxFiles, maxFiles - analyseFiles - 1, -1):

    fileID = "{:>4}".format(splitFileID).replace(" ", "0")

    print(fileID)

    indexFile = resultBase + "/" + checkResultsFor + "/medline17n" + fileID + ".index"
    foundHits = SyngrepHitFile(indexFile, checkSynsMap)

    for doc in foundHits:

        docHits = foundHits.getHitsForDocument(doc)

        for hit in docHits:
コード例 #4
0
import glob
import os

from mjoppich.geneontology import GeneOntology
from porestat.utils.Parallel import MapReduce

from database.Neo4JInterface import neo4jInterface
from synonymes.SynfileMap import SynfileMap
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import dataDir, speciesName2TaxID, eprint

resultBase = dataDir + "/miRExplore/textmine/results/"
celllinesMap = SynfileMap(resultBase + "/cellline/synfile.map")
celllinesMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

knownTaxIDs = set()
knownTaxIDs.add('all')
for org in speciesName2TaxID:
    knownTaxIDs.add(str(speciesName2TaxID[org]))

synfileID2tax = {}
for synfileID in celllinesMap.synfiles:
    synfileName = celllinesMap.synfiles[synfileID]

    hitOrgs = []
    for org in knownTaxIDs:
        if "." + org + "." in synfileName:
            hitOrgs.append(org)

    if len(hitOrgs) != 1:
        print("No or multiple files for org: " + str(synfileName) + " " +
コード例 #5
0
import re

from database.ORGMIRs import ORGMIRDB
from synonymes.SynfileMap import SynfileMap
from synonymes.SynonymFile import Synfile
from synonymes.mirnaID import miRNA, miRNAPART
from textmining.SentenceDB import SentenceDB, RegPos
from textmining.SyngrepHitFile import SyngrepHitFile
from utils.idutils import ltype2label, makeDBGeneID, mirtarbase_exp_type, mirtarbase_function_label, speciesName2TaxID, \
    dataDir
from database.Neo4JInterface import neo4jInterface
from utils.parallel import MapReduce
from enum import Enum

resultBase = dataDir + "/miRExplore/textmine/results/"
mirnaSyns = SynfileMap(resultBase + "/mirna/synfile.map")
mirnaSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

hgncSyns = SynfileMap(resultBase + "/hgnc/synfile.map")
hgncSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

db = None

if False:

    db = neo4jInterface(simulate=False)
    db.deleteRelationship('n', ['GENE'], None, 'm', ['PUBMED'], None,
                          ['ST_MENTION'], None, 'r')

    db.deleteRelationship('n', ['PUBMED_AUTHOR'], None, 'm', ['PUBMED'], None,
                          ['IS_AUTHOR'], None, 'r')
コード例 #6
0
    nlp_ent = spacy.load(args.nlpent)
    print("NLPs loaded", file=sys.stderr)

    print("Creating relChecker", file=sys.stderr)
    relChecker = SentenceRelationChecker(nlp, nlp_ent)
    print("Creating relClassifier", file=sys.stderr)
    relClassifier = SentenceRelationClassifier(args.datadir +
                                               '/obodir/allrels.csv')
    print("miRExplore relation extraction models loaded", file=sys.stderr)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir
    dataDir = args.datadir

    print("Getting Folder1 synfile.map", file=sys.stderr)
    ent1Syns = SynfileMap(resultBase + "/" + args.folder1 + "/synfile.map")
    ent1Syns.loadSynFiles((args.mine_path, dataDir))

    print("Getting Folder2 synfile.map", file=sys.stderr)
    ent2Syns = SynfileMap(resultBase + "/" + args.folder2 + "/synfile.map")
    ent2Syns.loadSynFiles((args.mine_path, dataDir))

    print("Getting relations synfile.map", file=sys.stderr)
    relSyns = SynfileMap(resultBase + "/relations/synfile.map")
    relSyns.loadSynFiles((args.mine_path, dataDir))

    print("Getting obodir/allrels.csv", file=sys.stderr)
    relationSyns = AssocSynfile(args.datadir + '/obodir/allrels.csv')
    print("All maps loaded", file=sys.stderr)

    accept_pmids = None
コード例 #7
0
    parser.add_argument('-d', '--datadir', type=str, help='where is te miRExplore bsae?', required=True)

    parser.add_argument('-f1', '--folder1', type=str, help='entity 1: hgnc, mirna', default="hgnc", required=False)
    parser.add_argument('-f2', '--folder2', type=str, help='entity 2: mgi, mirna', default="mirna", required=False)

    parser.add_argument('-ft1', '--folderType1', type=str, help='entity type 1: entity: mirna, gene, lncrna, ...', default="gene", required=False)
    parser.add_argument('-ft2', '--folderType2', type=str, help='entity type 2: entity: mirna', default="mirna", required=False)


    args = parser.parse_args()

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir
    dataDir = args.datadir

    ent1Syns = SynfileMap(resultBase + "/"+args.folder1+"/synfile.map")
    ent1Syns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir))

    ent2Syns = SynfileMap(resultBase + "/"+args.folder2+"/synfile.map")
    ent2Syns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir))

    relSyns = SynfileMap(resultBase + "/relations/synfile.map")
    relSyns.loadSynFiles(('/mnt/c/ownCloud/data', dataDir))

    relationSyns = AssocSynfile(args.datadir + '/miRExplore/relations/allrels.csv')


    idTuple2Pubmed = defaultdict(set)
    orgmirDB = ORGMIRDB(dataDir + "/miRExplore/orgmir.tsv")

    allfiles = glob.glob(resultBase + "/"+args.folder1+"/*.index")
コード例 #8
0
ファイル: createOboAssoc.py プロジェクト: mjoppich/miRExplore
    if args.accept_pmids != None:

        accept_pmids = set()

        for line in args.accept_pmids:

            line = line.strip()

            if len(line) > 0:
                accept_pmids.add(line)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    oboSyns = SynfileMap(resultBase + "/synfile.map")
    oboSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', args.datadir))

    allfiles = glob.glob(resultBase + "/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.obo.name)

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)
コード例 #9
0
    ['FMA:67498', 'FMA:9637', 'FMA:68646'])
doidSynIDs = getSynIDs(dataDir + "miRExplore/doid.obo", ['DOID:104'])
goSynIDs = getSynIDs(dataDir + "miRExplore/textmine/neutrophils.obo",
                     ['NP:001'])

for x in neutrophilSynIDs:
    if x in tissueIDs:
        tissueIDs.remove(x)

print(neutrophilSynIDs)
print(tissueIDs)
print(doidSynIDs)
print(goSynIDs)

resultBase = dataDir + "/miRExplore/textmine/results/"
fmaSyns = SynfileMap(resultBase + "/model_anatomy/synfile.map")
fmaSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

doidSyns = SynfileMap(resultBase + "/disease/synfile.map")
doidSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

goSyns = SynfileMap(resultBase + "/neutrophils/synfile.map")
goSyns.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir))

allfiles = glob.glob(resultBase + "/hgnc/pubmed18n*.index")
allfileIDs = [
    int(os.path.basename(x).replace('pubmed18n', '').replace('.index', ''))
    for x in allfiles
]
allfileIDs = sorted(allfileIDs, reverse=True)
コード例 #10
0
    if args.accept_pmids != None:

        accept_pmids = set()

        for line in args.accept_pmids:

            line = line.strip()

            if len(line) > 0:
                accept_pmids.add(line)

    #resultBase = dataDir + "/miRExplore/textmine/results_pmc/"
    resultBase = args.resultdir

    oboSyns = SynfileMap(resultBase + "/synfile.map")
    oboSyns.loadSynFiles((args.mine_path, args.datadir))

    allfiles = glob.glob(resultBase + "/*.index")
    allfileIDs = [os.path.basename(x).replace(".index", "") for x in allfiles]
    allfileIDs = sorted(allfileIDs, reverse=True)

    #allfileIDs = [894]

    celloObo = GeneOntology(args.obo.name)

    def getTerm(synid, obo):

        if synid in obo.dTerms:
            return obo.getID(synid)