def harmonizeMIRNA(mirna): """ :param mirna: :return: tries to return a normalized name ... 9761 microRNA 7958 MicroRNA 2311 MiRNA 2191 miRNA 1844 hsa 1440 let 578 miRNAS 437 MICRORNA 299 microRNAS 256 MIRNA 155 mmu 125 Micro 116 micro """ possibleOrgStarts = ['mmu', 'hsa'] recOrg = None for x in possibleOrgStarts: if mirna.startswith(x + "-"): recOrg = x mirna = mirna.replace(x + "-", "", 1) break possibleStarts = [ 'microRNA', 'MicroRNA', 'MiRNA', 'miRNA', 'MICRORNA', 'microRNA', 'MIRNA' ] for x in possibleStarts: if mirna.startswith(x + "-"): mirna = mirna.replace(x, "miR", 1) break try: oMirna = miRNA(mirna) mirna = oMirna.getStringFromParts( [miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR]) except: pass return (recOrg, mirna)
def handleHarmonizedNameMirna(x): idx = x.synonym.syns.index(x.hitSyn) if idx >= 0: try: test = miRNA(x.synonym.syns[idx]) outstr = test.getStringFromParts([ miRNAPART.ORGANISM, miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR, miRNAPART.MATURE_SEQS, miRNAPART.ARM ], normalized=True) return outstr except: # sys.stderr.write("cannot parse mirna: " + x.synonym.syns[idx]) if __debug__: pass # miRNA(x.synonym.syns[idx]) # exit(-1) for mirnaSyn in x.synonym.syns: if mirnaSyn.startswith("miR-") and not 'mediated' in mirnaSyn: test = miRNA(mirnaSyn) outstr = test.getStringFromParts([ miRNAPART.ORGANISM, miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR, miRNAPART.MATURE_SEQS, miRNAPART.ARM ], normalized=True) return outstr if __debug__: print("Could not match", x.hitSyn) return None
def processFile(fin, org): foundRels = defaultdict(list) for line in fin: line = line.strip().split("\t") # miR-29-b Mir-29b MIRNA GRN PGRN GENE 20479936 True True [('12', '1V2', 'NEG', 'downregulat', '20479936.2.4', False, (0, 7), (74, 78), (8, 21), 'all_rels', 1, 1, 2, 0)] mirna = line[0] gene = line[3] docid = line[6] evs = eval(line[9]) if gene in normGeneSymbols: gene = normGeneSymbols[gene] elif gene.upper() in normGeneSymbols: gene = normGeneSymbols[gene.upper()] for ev in evs: relAcc = ev[-4] != 0 or ev[-3] != 0 or ev[-2] != 0 if relAcc: try: rel = { 'mirna': miRNA(mirna).getStringFromParts([ miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR ]), 'gene': gene, 'docid': docid, 'org': org } foundRels[docid].append(rel) except: continue return foundRels
def loadFromFile(cls, infile="/mnt/c/ownCloud/data/miRExplore/obodir/mirnas_mirbase.csv"): retDB = MI2Mirna() with open(infile, 'r') as fin: for line in fin: line = line.strip() if len(line) == 0: continue line = line.split("\t") miID = line[0] mirna = line[1] if not (mirna.startswith("mmu") or mirna.startswith("hsa")): continue try: miObj = miRNA(mirna) miNum = miObj.getPart(miRNAPART.ID, None) if miNum == None: continue org = mirna[0:3] retDB.mi2mirna[miID] = miNum retDB.mirnaNum2mi[(org, miNum)] = miID except: continue return retDB
def getOrgMIRNAID(mirnaid, listedIDs, prefix, getID): mirna = miRNA(matureID1) idPart = getID(mirna) if idPart == None: print("no ID! " + matureID1) return None idPart = idPart.upper() idpartIdx = -1 if idPart in listedIDs: idpartIdx = listedIDs.index(idPart) else: idpartIdx = len(listedIDs) listedIDs.append(idPart) synid = prefix + str(idpartIdx) return (synid,listedIDs)
'miR-23a-3p', 'miR-338-3p', 'miR-103-3p', 'miR-362-3p', 'let-7g-5p', 'miR-155-5p', 'miR-140-5p', 'miR-122-5p', 'miR-22-3p', 'miR-3470a', 'let-7d-5p' ] } ti = 0 for x in interactions: ti += len(interactions[x]) print("Total Interactions", ti) for gene in interactions: mirlist = interactions[gene] mirids = [miRNA(x) for x in mirlist] interactions[gene] = mirids graph = nx.Graph() foundInteractions = defaultdict(set) graphConnections = defaultdict(list) saseRels = 0 def loadTextmining(path): global saseRels with open(path, 'r') as fin: #/tmp/mirtex/mirel_sase_new
# number of genes with interaction allGenes = set() for rdb in relDBs: allGenes = allGenes.union(set(rdb.all_ltypes)) print("Number of genes with interaction", len(allGenes)) # number of miRNAs with interaction ## restrict to miR-x allMirnas = set() for rdb in relDBs: for mirna in rdb.all_rtypes: try: mirObj = miRNA(mirna) allMirnas.add( mirObj.getStringFromParts( [miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR])) except: print(mirna) exit(-1) print("Number of mirnas with interaction", len(allMirnas)) # number of gene-mirna interactions with disease association distinctInteractionsWithDisease = set() interactionsWithDisease = set()
def fetchGenes(cls, requestDict, gene2name=None, minPMIDEvCount=0, minTgtCount=0, MIRNASTRPARTS=[miRNAPART.MATURE, miRNAPART.ID], acceptEv=None, verbose=False): jsonRes = cls.fetchSimple(requestDict) graph = networkx.Graph() if verbose: print(len(jsonRes['rels'])) nodeCounter = Counter() allGenes2Name = {} if gene2name != None: for gene in gene2name: for elem in gene2name[gene]: allGenes2Name[elem.upper()] = gene targets2sources = defaultdict(set) edge2datasourceCount = defaultdict(lambda: Counter()) edge2celltypes = defaultdict(set) edge2celltypePMID = defaultdict(lambda: defaultdict(set)) for rel in jsonRes['rels']: source = rel['lid'] target = rel['rid'] if gene2name != None: if source.upper() in allGenes2Name: source = allGenes2Name[source.upper()] if target.upper() in allGenes2Name: target = allGenes2Name[target.upper()] try: target = miRNA(target) target = target.getStringFromParts(MIRNASTRPARTS, normalized=True) except: pass edge = (source, target) for ev in rel['evidences']: ds = ev['data_source'] if acceptEv != None: evRes = acceptEv(ev) else: evRes = True if evRes: edge2datasourceCount[edge][ds] += 1 if ds in ["pmid"]: docid = ev["docid"] allCellEvs = jsonRes['pmidinfo'].get("cells", {}).get( docid, None) if allCellEvs != None: for cellEv in allCellEvs: cellInfo = (cellEv['termid'], cellEv['termname']) if cellInfo[1].lower() in [ 'cell', 'protein', 'has', 'signaling', 'function', 'role', 'sfswt-1', 'has-15' ]: continue if not cellInfo[0].startswith( "CL" ): # and not cellInfo[0].startswith("CVCL"): continue if not docid in jsonRes['pmidinfo']['disease']: continue edge2celltypes[edge].add(cellInfo) edge2celltypePMID[edge][cellInfo].add(docid) targets2sources[target].add(source) for rel in jsonRes['rels']: source = rel['lid'] target = rel['rid'] if gene2name != None: if source.upper() in allGenes2Name: source = allGenes2Name[source.upper()] if target.upper() in allGenes2Name: target = allGenes2Name[target.upper()] if target.upper().startswith("MIR") or target.upper().startswith( "LET"): try: target = miRNA(target) target = target.getStringFromParts(MIRNASTRPARTS, normalized=True) except: pass elif source.upper().startswith("MIR") or source.upper().startswith( "LET"): try: source = miRNA(source) source = source.getStringFromParts(MIRNASTRPARTS, normalized=True) except: pass edge = (source, target) edgeCounts = edge2datasourceCount[edge] allEvCount = sum([1 for x in edgeCounts]) otherEvCount = sum([1 for x in edgeCounts if x != "pmid"]) if allEvCount == 0: if verbose: print("Removing edge", edge, "for 0 count") continue if otherEvCount == 0 and edge2datasourceCount[edge][ "pmid"] < minPMIDEvCount: continue if len(targets2sources[target]) < minTgtCount: continue graph.add_node(source, color='red') graph.add_node(target, color='blue') graph.add_edge(source, target, celldata=edge2celltypes[edge], cellEvidence=edge2celltypePMID[edge]) nodeCounter[source] += 1 nodeCounter[target] += 1 return graph, nodeCounter, edge2datasourceCount, jsonRes
def findCooccurrences(pubmed, hgncHits, mirnaHits, sentDB): def checkSynHit(synhit): if len(synhit.foundSyn) <= 5: return synhit.perfectHit == True return True def chekSynHitMirna(synhit): if len(synhit.foundSyn) <= 5: foundSyn = synhit.foundSyn.lower() return foundSyn.startswith('mir') or foundSyn.startswith('micro') return True setAllGenes = set([x for x in hgncHits if checkSynHit(x)]) setAllMirnas = set([x for x in mirnaHits if chekSynHitMirna(x)]) hgncBySent = defaultdict(list) mirnaBySent = defaultdict(list) hgncToSent = {} mirnaToSent = {} for hit in hgncHits: parSenID = (hit.documentID.parID, hit.documentID.senID) hgncBySent[parSenID].append(hit) hgncToSent[hit] = parSenID for hit in mirnaHits: parSenID = (hit.documentID.parID, hit.documentID.senID) mirnaBySent[parSenID].append(hit) mirnaToSent[hit] = parSenID allCoocs = [] for x in setAllMirnas: for y in setAllGenes: foundCooc = Cooccurrence() foundCooc.pubmed = pubmed if re.match('MIPF[0-9]+', x.synonym.id) != None: foundCooc.idtype = "MIRNA_FAMILY" elif re.match('MIMAT[0-9]+', x.synonym.id) != None: foundCooc.idtype = "MIRNA" elif re.match('MI[0-9]+', x.synonym.id) != None: foundCooc.idtype = 'MIRNA_PRE' elif re.match('ORGMIR[0-9]+', x.synonym.id) != None: foundCooc.idtype = 'MIRNA_ORGMIR' elif re.match('ORGMI[0-9]+', x.synonym.id) != None: foundCooc.idtype = 'MIRNA_ORGMIR' else: foundCooc.idtype = 'UNKNOWN' foundCooc.gene = y.synonym.id foundCooc.mirna = x.synonym.id foundCooc.mirnadesc = str(x.synonym) foundCooc.mirnaFound = x.hitSyn idx = x.synonym.syns.index(x.hitSyn) foundCooc.mirnaFound = None if idx >= 0: try: test = miRNA(x.synonym.syns[idx]) outstr = test.getStringFromParts([ miRNAPART.ORGANISM, miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR, miRNAPART.MATURE_SEQS, miRNAPART.ARM ], normalized=True) foundCooc.mirnaFound = outstr except: print("cannot parse mirna ", x.synonym.syns[idx]) sys.stderr.write("cannot parse mirna: " + x.synonym.syns[idx]) if __debug__: miRNA(x.synonym.syns[idx]) exit(-1) foundCooc.mirnaFound = None if idx < 0 or foundCooc.mirnaFound == None: for mirnaSyn in x.synonym.syns: if mirnaSyn.startswith( "miR-") and not 'mediated' in mirnaSyn: test = miRNA(mirnaSyn) outstr = test.getStringFromParts([ miRNAPART.ORGANISM, miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR, miRNAPART.MATURE_SEQS, miRNAPART.ARM ]) foundCooc.mirnaFound = outstr break if True and pubmed == '21682933': print(pubmed) miRNALoc = mirnaToSent[x] hgncLoc = hgncToSent[y] if miRNALoc[0] == hgncLoc[0]: foundCooc.sameParagraph = True if miRNALoc[1] == hgncLoc[1]: foundCooc.sameSentence = True foundCooc.relation = findRelation(x, y, sentDB) allCoocs.append(foundCooc) return allCoocs
for geneName in ["MEG3", "NEAT1","MMP2", "MMP9", "MMP12"]: requestData = {} requestData['gene'] = [geneName] requestData['sentences'] = "false" #print(requestData) _,_,_, json = DataBasePlotter.fetchGenes(requestData, gene2name=None, minPMIDEvCount=0, minTgtCount=0, acceptEv=acceptEvidence, MIRNASTRPARTS=[miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR]) for x in json["rels"]: mirnastr = x["rid"] omir = miRNA(mirnastr) simpleStr = omir.getStringFromParts(miRNACOMPARISONLEVEL.PRECURSOR.value) if simpleStr == "miR-7": simpleStr = "let-7" #print(x["lid"], x["rid"], simpleStr) gene2result[geneName].add(simpleStr) for ev in x["evidences"]: if "docid" in ev: rel2docs[(geneName, simpleStr)].add((ev["docid"], ev["data_source"])) else: rel2docs[(geneName, simpleStr)].add(ev["data_source"])
'miR-16-1-3p', 'miR-882', 'miR-497-5p', 'miR-26a-5p', 'miR-124-3p', 'miR-26b-5p', 'miR-5620-3p', 'mIR-19a-3p', 'miR-130a-3p', 'miR-690', 'miR-185-5p', 'miR-31-5p', 'miR-340-5p', 'miR-1843-5p', 'miR-466f-3p', 'miR-301a-3p', 'miR-101a-3p', 'miR-210-3p', 'miR-107-3p', 'miR-706', 'miR-23b-3p', 'miR-146a-5p', 'miR-467f', 'miR-322-5p', 'miR-15a-5p', 'miR-29b-1-5p', 'let-7e-5p', 'miR-23a-3p', 'miR-338-3p', 'miR-103-3p', 'miR-362-3p', 'let-7g-5p', 'miR-155-5p', 'miR-140-5p', 'miR-122-5p', 'miR-22-3p', 'miR-3470a', 'let-7d-5p' ] } for gene in interactions: mirlist = interactions[gene] mirids = [miRNA(x) for x in mirlist] interactions[gene] = mirids graph = nx.Graph() foundInteractions = defaultdict(set) pickleFile = '/home/mjoppich/chemokines.upd.graph.pickle' if os.path.isfile(pickleFile): with open(pickleFile, 'rb') as infile: graphConnections = pickle.load(infile) else: test = InteractionRetriever(
elemsByGene = defaultdict(lambda: defaultdict(set)) allMirna = set(networks[network]) miStr2mirna = {} allTargetMirna = [] mirnaObj2str = {} mirna2evs = defaultdict(set) newAllMirna = set() for x in allMirna: try: oMirna = miRNA(x) allTargetMirna.append(oMirna) miStr = oMirna.getStringFromParts( [miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR]) miStr2mirna[miStr] = oMirna mirnaObj2str[oMirna] = miStr newAllMirna.add(miStr) except: pass allMirna = newAllMirna #allMirna = set([str(x) for x in allTargetMirna])
matureAcc1 = None matureID1 = None matureAcc2 = None matureID2 = None MIid = mirnaAccession = row['Accession'] matureAcc1 = row['Mature1_Acc'] matureID1 = row['Mature1_ID'] matureID2 = row['Mature2_ID'] matureAcc2 = row['Mature2_Acc'] if not (matureAcc1 == None or matureAcc1 == 'None'): MIMAT2MIRNA[matureAcc1] = miRNA(matureID1) MI2MIMAT[MIid].add(matureAcc1) if not (matureAcc2 == None or matureAcc2 == 'None'): MIMAT2MIRNA[matureAcc2] = miRNA(matureID2) MI2MIMAT[MIid].add(matureAcc2) def makeFamilySynonymes(): vFamSyns = [] for family in familyDB: for (miID, miName) in family.childMIMATs:
interactions = networks[network] acceptedInteractions = defaultdict(set) typeByGene = defaultdict(lambda: Counter()) elemsByGene = defaultdict(lambda: defaultdict(set)) allMirna = set() miStr2mirna = {} normalizedInteractions = defaultdict(set) for gene in interactions: for mirna in interactions[gene]: try: oMirna = miRNA(mirna) mirnaN = oMirna.getStringFromParts( [miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR]) mirna = mirnaN except: pass allMirna.add(mirna) normalizedInteractions[gene].add(mirna) allTargetMirna = [] for x in allMirna: try:
def hfi_general(): interactReq = request.get_json(force=True, silent=True) if interactReq == None: return app.make_response((jsonify({'error': 'invalid json'}), 400, None)) entType = interactReq.get('type', "gene") if not entType.upper() in ['GENE', 'MIRNA']: return app.make_response( (jsonify({'error': 'invalid enttity type ' + entType}), 400, None)) mirnas = [] genes = [] if entType.upper() == 'GENE': entName = interactReq.get('name', None) if entName != None: genes.append(entName) elif entType.upper() == 'MIRNA': entName = interactReq.get('name', None) if entName != None: mirnas.append(entName) if all([len(x) == 0 for x in [mirnas, genes]]): return app.make_response( (jsonify({'error': 'no entity names given'}), 400, None)) relObj = returnInteractions(genes, mirnas, None, loadSentences=False) rels = relObj['rels'] seenInteractors = defaultdict(set) seenPMIDs = set() seenEvidenceTypes = Counter() for rel in rels: """ allrels.append({'lid':lent[0], 'rid': rent[0], 'ltype': lent[1], 'rtype': rent[1], 'evidences': okEvs }) """ if entType.upper() == 'GENE': for ev in rel['evidences']: docid = ev.get('docid', None) if docid != None: seenPMIDs.add(docid) data_source = ev.get('data_source', None) seenEvidenceTypes[data_source] += 1 if rel['ltype'].upper() == entType.upper(): seenInteractors[rel['rtype']].add(rel['rid']) else: seenInteractors[rel['ltype']].add(rel['lid']) seenMirnaIDs = set() if entType.upper() == 'GENE': for x in seenInteractors['mirna']: txtMirna = x try: mirnaID = miRNA(txtMirna) mirID = mirnaID.getPart(miRNAPART.ID, None) mirID = int(mirID) seenMirnaIDs.add(mirID) except: seenMirnaIDs.add(x) answer = { 'search': genes, 'interactor_count': len(seenInteractors), 'interactor_types': [x for x in seenInteractors], 'data_source_count': len(seenEvidenceTypes), 'data_sources': [x for x in seenEvidenceTypes], 'evidence_count': sum([seenEvidenceTypes[x] for x in seenEvidenceTypes]), 'interactors': list(seenMirnaIDs) } return app.make_response((jsonify(answer), 200, None))
entText = entity.attrib['text'] entType = entity.attrib['type'] entOffset = tuple( [int(x) for x in entity.attrib['charOffset'].split("-")]) if entType in ["Specific_miRNAs", "Genes/Proteins"]: if "Genes" in entType: if entText in normGeneSymbols: entText = normGeneSymbols[entText] elif entText.upper() in normGeneSymbols: gene = normGeneSymbols[entText.upper()] else: try: entText = miRNA(entText).getStringFromParts([ miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR ]) except: pass entTuple = (entText, entType, (entOffset[0], entOffset[1] + 1)) entId2elem[entId] = entTuple sentEntText = sentText[entTuple[2][0]:entTuple[2][1]] for pair in allPairs: validInteraction = pair.attrib['interaction'].lower() == "true" pairE1 = pair.attrib['e1'] pairE2 = pair.attrib['e2']
def getCoExpression(): global geneNeighbourHoods global symbol2ensemblDB global mi2mirna interactReq = request.get_json(force=True, silent=True) if interactReq == None: return app.make_response((jsonify({'error': 'invalid json'}), 400, None)) allGenes = interactReq['genes'] if allGenes == None or len(allGenes) == 0: return app.make_response((jsonify({'error': 'no genes'}), 400, None)) netw = ExpressionNetwork() org2networg = {'mmu': "Mouse", 'hsa': "Human"} genesByOrg = defaultdict(set) for gene in allGenes: ensGenes = symbol2ensemblDB.get_all_genes(gene) if ensGenes == None or len(ensGenes) == 0: ensGenes = {} if gene.startswith("LNC"): if "mm10" in gene: ensGenes['mmu'] = {gene} elif "hg38" in gene: ensGenes["hsa"] = {gene} else: try: miobj = miRNA(gene) minum = miobj.getPart(miRNAPART.ID, None) if minum != None: mmuMI = mi2mirna.mirnaNum2mi.get(("mmu", minum), None) hsaMI = mi2mirna.mirnaNum2mi.get(("hsa", minum), None) if mmuMI != None: ensGenes['mmu'] = {mmuMI} if hsaMI != None: ensGenes['hsa'] = {hsaMI} except: pass for org in ensGenes: for ensGeneID in ensGenes[org]: genesByOrg[org2networg[org]].add(ensGeneID) print("Genes by org") for x in genesByOrg: print(x, genesByOrg[x]) allEdges = [] for org in genesByOrg: ensGenes = genesByOrg[org] edges = netw.getEdgesFeature(ensGenes, org) for edge in edges: src = edge['source'] tgt = edge['target'] newsrc = symbol2ensemblDB.get_symbol_for_ensembl(src) if newsrc == None and src.startswith("MI"): newsrc = mi2mirna.mi2mirna.get(src, None) if newsrc != None: edge['source'] = newsrc newtgt = symbol2ensemblDB.get_symbol_for_ensembl(tgt) if newtgt == None and tgt.startswith("MI"): newtgt = mi2mirna.mi2mirna.get(tgt, None) if newtgt != None: edge['target'] = newtgt allEdges.append(edge) print(allEdges) return app.make_response((jsonify(allEdges), 200, None))
requestData['mirna'] = [miRName] requestData['sentences'] = "true" #print(requestData) json = DataBasePlotter.fetchSimple(requestData) allDocEvidences = defaultdict(lambda: defaultdict(list)) for x in json["rels"]: genestr = x["lid"] mirnastr = x["rid"] accMir = None try: origTarget = miRNA(miRName) target = miRNA(mirnastr) accMir = origTarget if not origTarget.accept(target, compLevel=miRNACOMPARISONLEVEL.MATUREID): print("Not accepted:", target, " as ", origTarget) pass except: #print("skipping", x) continue #print(x["lid"], x["rid"], simpleStr)
gene2mirna = defaultdict(set) for mirelPMID in [mirelPMIDhsa, mirelPMIDmmu]: for gene in mirelPMID.ltype2rel: allMirRels = mirelPMID.ltype2rel[gene] allNormedMirs = set() for rel in allMirRels: relMir = rel.rid try: testMirna = miRNA(relMir) nMirna = testMirna.getStringFromParts( [miRNAPART.MATURE, miRNAPART.ID, miRNAPART.PRECURSOR]) allNormedMirs.add(nMirna) except: print("error loading miRNA", relMir) for x in allNormedMirs: gene2mirna[gene].add(x) with open("/mnt/c/ownCloud/data/mirpredict/mirexplore_rels.tsv", 'w') as outfile: for gene in gene2mirna: allmirs = gene2mirna[gene] for mirna in allmirs: