def analyseFile(file, envPMIDs): allpmids = defaultdict(set) print("Starting file: ", file) procDB = neo4jInterface(simulate=False, printQueries=True) with open(file, 'r') as infile: for line in infile: aline = line.strip().split('\t') pmid_cites = aline[0] pmid_cited_by = aline[1] if pmid_cites in envPMIDs and pmid_cited_by in envPMIDs: allpmids[pmid_cites].add(pmid_cited_by) if len(allpmids) > 0: for pmid in allpmids: for opmid in allpmids[pmid]: if not opmid in envPMIDs: continue procDB.createRelationship('cpmid', ['PUBMED'], {'id': pmid}, 'opmid', ['PUBMED'], {'id': opmid}, ['PUBMED_CITED_BY'], None) procDB.close()
def analyseFile(splitFileID, relPMIDs): fileID = "{:>4}".format(splitFileID).replace(" ", "0") diseaseHitsFile = resultBase + "/disease/medline17n" + fileID + ".index" hitsFile = SyngrepHitFile(diseaseHitsFile, diseaseMap) if len(hitsFile) == 0: return print("Document: " + str(fileID)) print("Start Document: " + str(fileID)) procDB = neo4jInterface(simulate=False, printQueries=False) for docID in hitsFile: if not docID in relPMIDs: continue synHits = hitsFile.getHitsForDocument(docID) foundUniqueHits = set() for hit in synHits: if len(hit.foundSyn) < 5: if not hit.perfectHit: continue hitSyn = hit.synonym foundUniqueHits.add(hitSyn.id.replace('_', ':')) for synonymID in foundUniqueHits: pubmedExists = False if addUnknownPubmeds: procDB.createNodeIfNotExists(['EVIDENCE', 'PUBMED'], {'id': docID}) pubmedExists = True else: if procDB.nodeExists(['PUBMED'], {'id': docID}): pubmedExists = True if pubmedExists: res = procDB.createRelationship('disease', ['DISEASE'], {'id': synonymID}, 'pubmed', ['PUBMED'], {'id': docID}, ['DISEASE_MENTION'], None) print("Add: ", fileID, docID, synonymID, [x for x in res if res != None]) print("End Document: " + str(fileID)) procDB.close()
def __init__(self, chemokines=list(), db=neo4jInterface(simulate=False, printQueries=False)): self.chemokines = list(chemokines) self.db = db self.all_nodes = dict() self.all_relation_ids = set() self.all_relations = dict() self.simple_connections = set() if self.chemokines != None and len(self.chemokines) > 0: for chemokine in self.chemokines: self._queryDB(chemokine)
def addMIRTs(mirtarbaseEvidences, mirtarEvs): print("Starting MIRTS: ", len(mirtarbaseEvidences[0])) dbcreatedMIRT2TAX = defaultdict(set) dbcreatedMIRT2PUBMED = defaultdict(set) dbcreatedMIRT2ExpTypes = defaultdict(set) dbcreatedMIRT2SupportTypes = defaultdict(set) dbcreatedMIRT2MIRNA = defaultdict(set) dbcreatedMIRT2GENE = defaultdict(set) dbcreatedMIRTIDs = set() procDB = neo4jInterface(simulate=False, printQueries=False) for mirnaEvidence in mirtarbaseEvidences[0]: # Species (miRNA) Target Gene Target Gene (Entrez Gene ID) Species (Target Gene) Experiments Support Type References (PMID) mirtarID = mirnaEvidence['miRTarBase ID'] mirtarMIRNA = mirnaEvidence['miRNA'] mirtarMIRNASpecies = mirnaEvidence['Species (miRNA)'] mirtarGENE = mirnaEvidence['Target Gene'].upper() mirtarGENESpecies = mirnaEvidence['Species (Target Gene)'] mirtarRefs = mirnaEvidence['References (PMID)'] """ EXPeriment and Functional Type """ mirtarExperiment = mirnaEvidence['Experiments'] mirtarExperiment = mirtarExperiment.split("/") if mirtarExperiment != None else [] mirtarExperimentNew = [] for x in [y.split(";") for y in mirtarExperiment if len(y) > 0]: for elem in x: mirtarExperimentNew.append(mirtarbase_exp_type(elem)) mirtarExperiment = mirtarExperimentNew mirtarSupport = mirtarbase_function_label(mirnaEvidence['Support Type']) mirnaSpeciesID = speciesName2TaxID.get(mirtarMIRNASpecies, None) geneSpeciesID = speciesName2TaxID.get(mirtarGENESpecies, None) if mirnaSpeciesID == None and geneSpeciesID == None: continue if mirnaSpeciesID == geneSpeciesID: commonTaxID = mirnaSpeciesID else: commonTaxID = None procDB.createNodeIfNotExists(['PUBMED', 'EVIDENCE'], {'id': mirtarRefs}, 'n', ['PUBMED'], {'id': mirtarRefs}) if not mirtarID in dbcreatedMIRTIDs: dbcreatedMIRTIDs.add(mirtarID) procDB.createNode(['MIRTARBASE', 'EVIDENCE'], {'id': mirtarID, 'tax_gene': geneSpeciesID, 'tax_mirna': mirnaSpeciesID}) if not mirtarSupport in dbcreatedMIRT2SupportTypes[mirtarID]: dbcreatedMIRT2SupportTypes[mirtarID].add(mirtarSupport) procDB.createRelationship('ms', ['MIRTARBASE_SUPPORT'], {'id': mirtarSupport}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['MIRTARBASE_FUNCTIONAL_SUPPORT'], None) for expType in mirtarExperiment: if not expType in dbcreatedMIRT2ExpTypes[mirtarID]: dbcreatedMIRT2ExpTypes[mirtarID].add(expType) procDB.createRelationship('me', ['MIRTARBASE_EXPERIMENT'], {'id': expType}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['MIRTARBASE_EXPERIMENT_SUPPORT'], None) # TODO add relation props? if not mirtarRefs in dbcreatedMIRT2PUBMED[mirtarID]: dbcreatedMIRT2PUBMED[mirtarID].add(mirtarRefs) procDB.createRelationship('pb', ['PUBMED'], {'id': mirtarRefs}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['MIRTARBASE_LITERATURE_SUPPORT'], {}) if commonTaxID != None: if not mirnaSpeciesID in dbcreatedMIRT2TAX[mirtarID]: dbcreatedMIRT2TAX[mirtarID].add(mirnaSpeciesID) procDB.createRelationship('mtb', ['MIRTARBASE'], {'id': mirtarID}, 'taxid', ['TAX'], {'id': mirnaSpeciesID}, ['ORGANISM_SUPPORT'], {}) if not mirtarGENE in dbcreatedMIRT2GENE[mirtarID]: dbcreatedMIRT2GENE[mirtarID].add(mirtarGENE) procDB.createRelationship('gene', ['GENE'], {'id': mirtarGENE}, 'mtb', ['MIRTARBASE'], {'id': mirtarID}, ['GENE_MENTION'], {'tax': geneSpeciesID}) if not mirtarMIRNA in dbcreatedMIRT2MIRNA[mirtarID]: dbcreatedMIRT2MIRNA[mirtarID].add(mirtarMIRNA) procDB.createRelationship('mtb', ['MIRTARBASE'], {'id': mirtarID}, 'mirna', ['MIRNA'], {'name': mirtarMIRNA}, ['MIRNA_MENTION'], {'tax': mirnaSpeciesID}) procDB.close()
from porestat.utils.DataFrame import DataFrame from utils.idutils import ltype2label, makeDBGeneID, mirtarbase_exp_type, mirtarbase_function_label, speciesName2TaxID, \ dataDir from database.Neo4JInterface import neo4jInterface from utils.parallel import MapReduce mirtarbaseEvidences = DataFrame.parseFromFile(dataDir + "/miRExplore/miRTarBase.csv", bConvertTextToNumber=False) print(mirtarbaseEvidences.getHeader()) experimentTypes = Counter() supportTypes = Counter() referencesWithComma = Counter() db = neo4jInterface(simulate=False, printQueries=False) db.deleteRelationship('n', ['GENE'], None, 'm', ['MIRTARBASE'], None, ['GENE_MENTION'], None, 'r') db.deleteRelationship('n', ['MIRTARBASE'], None, 'm', ['MIRNA'], None, ['MIRNA_MENTION'], None, 'r') db.deleteRelationship('n', ['MIRTARBASE'], None, 'm', ['PUBMED'], None, ['MIRTARBASE_LITERATURE_SUPPORT'], None, 'r') db.deleteRelationship('n', ['MIRTARBASE_SUPPORT'], None, 'm', ['MIRTARBASE'], None, ['MIRTARBASE_FUNCTIONAL_SUPPORT'], None, 'r') db.deleteRelationship('n', ['MIRTARBASE_EXPERIMENT'], None, 'm', ['MIRTARBASE'], None, ['MIRTARBASE_EXPERIMENT_SUPPORT'], None, 'r') db.deleteRelationship('n', ['MIRTARBASE'], None, 'm', ['TAX'], None, ['ORGANISM_SUPPORT'], None, 'r') db.deleteNode(["MIRTARBASE"], None) db.deleteNode(["MIRTARBASE_SUPPORT"], None) db.deleteNode(["MIRTARBASE_EXPERIMENT"], None) db.createUniquenessConstraint('MIRTARBASE', 'id') if False: db.close() exit(0)
import os from mjoppich.geneontology import GeneOntology from porestat.utils.Parallel import MapReduce from database.Neo4JInterface import neo4jInterface from synonymes.SynfileMap import SynfileMap from textmining.SyngrepHitFile import SyngrepHitFile from utils.idutils import dataDir, eprint resultBase = dataDir + "/miRExplore/textmine/results/" diseaseMap = SynfileMap(resultBase + "/disease/synfile.map") diseaseMap.loadSynFiles(('/home/users/joppich/ownCloud/data/', dataDir)) diseaseObo = GeneOntology(dataDir + "miRExplore/doid.obo") db = neo4jInterface(simulate=False) db.deleteRelationship('n', ['DISEASE'], None, 'm', ['PUBMED'], None, ['DISEASE_MENTION'], None) allfiles = glob.glob(resultBase + "/hgnc/medline17n*.index") allfileIDs = [ int(os.path.basename(x).replace('medline17n', '').replace('.index', '')) for x in allfiles ] allfileIDs = sorted(allfileIDs, reverse=True) addUnknownPubmeds = False retVal = db.matchNodes(['PUBMED'], None, nodename='n') relevantPMIDs = set()
def analyseFile(splitFileID, relPMIDs): fileID = "{:>4}".format(splitFileID).replace(" ", "0") diseaseHitsFile = resultBase + "/cellline/medline17n" + fileID + ".index" hitsFile = SyngrepHitFile(diseaseHitsFile, celllinesMap) if len(hitsFile) == 0: return print("Start Document: " + str(fileID)) procDB = neo4jInterface(simulate=False, printQueries=False) for docID in hitsFile: if not docID in relPMIDs: continue synHits = hitsFile.getHitsForDocument(docID) foundUniqueHits = set() foundOrgs = set() for hit in synHits: if len(hit.foundSyn) < 5: if not hit.perfectHit: continue hitSynFileID = hit.synonymID.synfile foundOrgs.add(synfileID2tax[hitSynFileID]) hitSyn = hit.synonym foundUniqueHits.add(hitSyn.id) if len(foundUniqueHits) == 0: continue for celllineID in foundUniqueHits: pubmedExists = False if addUnknownPubmeds: procDB.createNodeIfNotExists(['EVIDENCE', 'PUBMED'], {'id': docID}) pubmedExists = True else: if procDB.nodeExists(['PUBMED'], {'id': docID}): pubmedExists = True if pubmedExists: res = procDB.createRelationship('cellline', ['CELLLINE'], {'id': celllineID}, 'pubmed', ['PUBMED'], {'id': docID}, ['CELLLINE_MENTION'], None) print("Add: ", fileID, docID, celllineID, [x for x in res if res != None]) foundOrgs = foundOrgs.difference(allSet) if len(foundOrgs) == 1: pass # create relation # print('Associate: ' + str(foundOrgs)) elif len(foundOrgs) == 0: pass elif len(foundOrgs) > 1: # print('Ambiguous pubmed: ' + docID) pass print("End Document: " + str(fileID)) procDB.close()