# miRNAs mirna_rnacentral = re.split('[:_]', row[0]) mirna_rnacentral_id = mirna_rnacentral[1] mirna_hgnc_id = 'None' with io.open(mirna_mapping_file, 'r', encoding='utf-8', newline='') as mm: mirna_mapping_reader = csv.reader(mm, delimiter='\t') next(mirna_mapping_reader, None) for mirna_mapping_row in mirna_mapping_reader: if mirna_mapping_row[0] == mirna_rnacentral_id: mirna_hgnc_id = mirna_mapping_row[2] break mirna_name = re.split('[" ]', row[4]) mirna_name = mirna_name[4] if mirna_hgnc_id != 'None': mirna = MiRNA([mirna_rnacentral_id, mirna_hgnc_id], [mirna_name]) network.add_node(mirna) else: mirna = MiRNA([mirna_rnacentral_id], [mirna_name]) network.add_node(mirna) # genes gene_ensembl = row[1].split(':') gene_ensembl_id = gene_ensembl[1] gene_hgnc_id = 'None' with io.open(gene_mapping_file, 'r', encoding='utf-8', newline='') as gm: gene_mapping_reader = csv.reader(gm, delimiter='\t') next(gene_mapping_reader, None) for gene_mapping_row in gene_mapping_reader: if gene_mapping_row[2] == gene_ensembl_id: gene_hgnc_id = 'HGNC:' + gene_mapping_row[1]
def add_rna(name, type, node_lookup): key = name + '$' + type if key in node_lookup.keys(): node = node_lookup[key] return node else: if type == 'mRNA' or type == 'DNA' or type == 'TF' or type == 'protein' or type == 'RBP': interactor_id = check_hgnc_id(name) elif type == 'miRNA': interactor_id = get_mirna_id(name) else: rnacentral_id, interactor_id = get_rna_ids(name) if rnacentral_id == 'None': interactor_id = check_hgnc_id(name) if interactor_id != 'None': if type == 'DNA' or type == 'TF' or type == 'protein' or type == 'RBP': node = Gene([interactor_id], []) network.add_node(node) elif type == 'miRNA': node = MiRNA([interactor_id], [name]) network.add_node(node) elif type == 'mRNA': node = MRNA([interactor_id], []) network.add_node(node) else: if rnacentral_id == 'None': if type == 'circRNA': node = CircRNA([interactor_id], []) elif type == 'eRNA': node = ERNA([interactor_id], []) elif type == 'lncRNA': node = LncRNA([interactor_id], []) elif type == 'ncRNA': node = NcRNA([interactor_id], []) elif type == 'piRNA': node = PiRNA([interactor_id], []) elif type == 'pseudo': node = Pseudogene([interactor_id], []) elif type == 'ribozyme': node = Ribozyme([interactor_id], []) elif type == 'rRNA': node = RRNA([interactor_id], []) elif type == 'scaRNA': node = ScaRNA([interactor_id], []) elif type == 'scRNA': node = ScRNA([interactor_id], []) elif type == 'snoRNA': node = SnoRNA([interactor_id], []) elif type == 'snRNA': node = SnRNA([interactor_id], []) else: node = RNA([interactor_id], []) network.add_node(node) else: if type == 'circRNA': node = CircRNA([rnacentral_id, interactor_id], []) elif type == 'eRNA': node = ERNA([rnacentral_id, interactor_id], []) elif type == 'lncRNA': node = LncRNA([rnacentral_id, interactor_id], []) elif type == 'ncRNA': node = NcRNA([rnacentral_id, interactor_id], []) elif type == 'piRNA': node = PiRNA([rnacentral_id, interactor_id], []) elif type == 'pseudo': node = Pseudogene([rnacentral_id, interactor_id], []) elif type == 'ribozyme': node = Ribozyme([rnacentral_id, interactor_id], []) elif type == 'rRNA': node = RRNA([rnacentral_id, interactor_id], []) elif type == 'scaRNA': node = ScaRNA([rnacentral_id, interactor_id], []) elif type == 'scRNA': node = ScRNA([rnacentral_id, interactor_id], []) elif type == 'snoRNA': node = SnoRNA([rnacentral_id, interactor_id], []) elif type == 'snRNA': node = SnRNA([rnacentral_id, interactor_id], []) else: node = RNA([rnacentral_id, interactor_id], []) network.add_node(node) node_lookup[key] = node return node else: return None
row = sh.row_values(rownum) if 'Weak' not in row[7] and row[2] == 'H**o sapiens' and row[5] == 'H**o sapiens': mirna_name = row[1] gene_hgnc_id = 'HGNC:' + row[3] gene_entrez_id = int(row[4]) gene_entrez_id = 'Entrez:' + str(gene_entrez_id) pmid = int(row[8]) pmid = str(pmid) with io.open(mirna_to_URS_mapping_file, 'r', encoding='utf-8', newline='') as mapping_file: mapping_reader = csv.reader(mapping_file, delimiter='\t') next(mapping_reader, None) for mapping_row in mapping_reader: if mirna_name == mapping_row[2]: mirna_rnacentral_id = mapping_row[0] mirna = MiRNA([mirna_rnacentral_id], [mirna_name]) network.add_node(mirna) gene = Gene([gene_hgnc_id, gene_entrez_id], []) network.add_node(gene) if (mirna_rnacentral_id + '$' + gene_hgnc_id) in edge_source_target_lookup: edges = network.get_edges_from_to(mirna, gene, 'REGULATES') for edge in edges: pmid = edge.attributes['pmid'] + ', ' + str(pmid) network.delete_edge(edge) e = Edge(mirna, gene, 'REGULATES', {'source': 'miRTarBase', 'pmid': pmid}) network.add_edge(e) edge_source_target_lookup.append(mirna_rnacentral_id + '$' + gene_hgnc_id) else: e = Edge(mirna, gene, 'REGULATES', {'source': 'miRTarBase', 'pmid': pmid}) network.add_edge(e) edge_source_target_lookup.append(mirna_rnacentral_id + '$' + gene_hgnc_id)
def get_given_drugs_related_info(disease_pairs, drugs): # first disease pair with first drug array all_networks = [] # contains an array for each disease pair for index, disease_pair in enumerate(disease_pairs): networks_per_drug = [] # contains a network for each drug pair_drugs_ids = drugs[index] temp_id1 = disease_pair[0].replace(':', '-') temp_id2 = disease_pair[1].replace(':', '-') path = '../analysis/disease_pairs/' + temp_id1 + '_' + temp_id2 for drug_id in pair_drugs_ids: try: os.mkdir(path) except FileExistsError: pass network = Network() d1 = Disease([disease_pair[0]], []) network.add_node(d1) d2 = Disease([disease_pair[1]], []) network.add_node(d2) drug = Drug([drug_id], []) network.add_node(drug) temp_drug_id = drug_id.replace(':', '-') with io.open(path + '/' + temp_id1 + '_' + temp_id2 + '_' + temp_drug_id + '_results.txt', 'w', encoding='utf-8', newline='') as results_file: results_file.write('In this file all information about the connection between ' + disease_pair[0] + ' and ' + disease_pair[1] + ' and the drug ' + drug_id + ' is summarized:\n') # the drug INDICATES, CONTRAINDICATES or INDUCES the disease query = """ MATCH (d:Disease)-[a]-(n:Drug) WHERE {d1_id} IN d.ids AND {n_id} in n.ids RETURN distinct(type(a)) """ d1_results = session.run(query, parameters={'d1_id': disease_pair[0], 'n_id': drug_id}) for result in d1_results: results_file.write(drug_id + ' ' + result['(type(a))'] + ' ' + disease_pair[0] + '\n') network.add_edge(Edge(drug, d1, result['(type(a))'], {})) query = """ MATCH (d:Disease)-[a]-(n:Drug) WHERE {d2_id} IN d.ids AND {n_id} in n.ids RETURN distinct(type(a)) """ d2_results = session.run(query, parameters={'d2_id': disease_pair[1], 'n_id': drug_id}) for result in d2_results: results_file.write(drug_id + ' ' + result['(type(a))'] + ' ' + disease_pair[1] + '\n') network.add_edge(Edge(drug, d2, result['(type(a))'], {})) # the drug targets a gene which is associated to the disease d1_genes = set() query = """ MATCH (n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d:Disease) WHERE {d1_id} IN d.ids AND {n_id} in n.ids RETURN g.`_id` """ d1_results = session.run(query, parameters={'d1_id': disease_pair[0], 'n_id': drug_id}) for gene in d1_results: d1_genes.add(gene['g.`_id`']) g = Gene([gene['g.`_id`']], []) network.add_node(g) network.add_edge(Edge(drug, g, 'TARGETS', {'actions': []})) #TODO network.add_edge(Edge(g, d1, 'ASSOCIATES_WITH', {})) d2_genes = set() query = """ MATCH (n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d:Disease) WHERE {d2_id} IN d.ids AND {n_id} in n.ids RETURN g.`_id` """ d2_results = session.run(query, parameters={'d2_id': disease_pair[1], 'n_id': drug_id}) for gene in d2_results: d2_genes.add(gene['g.`_id`']) g = Gene([gene['g.`_id`']], []) network.add_node(g) network.add_edge(Edge(drug, g, 'TARGETS', {'actions': []})) #TODO network.add_edge(Edge(g, d2, 'ASSOCIATES_WITH', {})) common_drug_genes = d1_genes.intersection(d2_genes) # genes associated to the drug and both diseases # relevant_genes are all genes associated to at least one disease and the drug, below the common genes # with the most disease associated references are added relevant_genes = d1_genes.union(d2_genes) if len(d1_genes) > 0: nbr = str(len(d1_genes)) d1_genes = str(d1_genes) d1_genes = d1_genes.replace('{', '') d1_genes = d1_genes.replace('}', '') d1_genes = d1_genes.replace('\'', '') results_file.write(drug_id + ' targets following ' + nbr + ' genes which are associated to ' + disease_pair[0] + ': ' + d1_genes + '\n') if len(d2_genes) > 0: nbr = str(len(d2_genes)) d2_genes = str(d2_genes) d2_genes = d2_genes.replace('{', '') d2_genes = d2_genes.replace('}', '') d2_genes = d2_genes.replace('\'', '') results_file.write(drug_id + ' targets following ' + nbr + ' genes which are associated to ' + disease_pair[1] + ': ' + d2_genes + '\n') if len(common_drug_genes) > 0: nbr = str(len(common_drug_genes)) cdgs = str(common_drug_genes) cdgs = cdgs.replace('{', '') cdgs = cdgs.replace('}', '') cdgs = cdgs.replace('\'', '') results_file.write('The disease pair has ' + nbr + ' common genes which are targeted by the drug: ' + cdgs + '\n') # add the common genes with the most disease associated references # no given num_pmids is similar to num_pmids = 0 all_d1_genes, all_d2_genes = get_genes(disease_pair) all_common_genes = all_d1_genes.intersection(all_d2_genes) relevant_common_genes = [] # the genes with the most cited gene-disease association, threshold 10 if len(all_common_genes) > 0: results_file.write('The disease pair has ' + str(len(all_common_genes)) + ' common genes, not considering the connection to the drug.' ' Following genes have the most references regarding their connection to both diseases:\n') for gene in all_common_genes: query = """ MATCH (d1:Disease)-[a]-(g:Gene) WHERE {g_id} IN g.ids AND {d1_id} IN d1.ids RETURN a.num_pmids """ results = session.run(query, parameters={'g_id': gene, 'd1_id': disease_pair[0]}) num_pmids = 0 for result in results: # multiple edges to the same gene temp = result['a.num_pmids'] if temp is not None: num_pmids = num_pmids + temp query = """ MATCH (d2:Disease)-[a]-(g:Gene) WHERE {g_id} IN g.ids AND {d2_id} IN d2.ids RETURN a.num_pmids """ results = session.run(query, parameters={'g_id': gene, 'd2_id': disease_pair[1]}) for result in results: # multiple edges to the same gene temp = result['a.num_pmids'] if temp is not None: num_pmids = num_pmids + temp relevant_common_genes.append([gene, num_pmids]) # sort by number of pmids relevant_common_genes = sorted(relevant_common_genes, key=lambda item: item[1], reverse=True) relevant_common_genes = relevant_common_genes[:10] # threshold rcgs = str(relevant_common_genes) rcgs = rcgs[1:-1] rcgs = rcgs.replace('\'', '') results_file.write(rcgs + '\n') for g in relevant_common_genes: gene = Gene([g[0]], []) network.add_node(gene) network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {})) network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {})) relevant_genes.add(g[0]) # add the common disease associated variants with most references # no given num_pmids is similar to num_pmids = 0 disease_variants = {} query = """ MATCH (d1:Disease)-[a]-(v:Variant)--(d2:Disease) WHERE {d1_id} in d1.ids AND {d2_id} in d2.ids RETURN distinct(a.num_pmids), v.`_id` """ results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]}) for variant in results: num_pmids = variant['(a.num_pmids)'] if num_pmids is None: num_pmids = 0 var_id = variant['v.`_id`'] if var_id in disease_variants: temp = disease_variants[var_id] disease_variants[var_id] = temp + num_pmids else: disease_variants[var_id] = num_pmids query = """ MATCH (d2:Disease)-[a]-(v:Variant)--(d1:Disease) WHERE {d1_id} in d1.ids AND {d2_id} in d2.ids RETURN distinct(a.num_pmids), v.`_id` """ results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]}) for variant in results: num_pmids = variant['(a.num_pmids)'] if num_pmids is None: num_pmids = 0 var_id = variant['v.`_id`'] if var_id in disease_variants: temp = disease_variants[var_id] disease_variants[var_id] = temp + num_pmids else: disease_variants[var_id] = num_pmids dvs = '' i = 0 for key, value in sorted(disease_variants.items(), key=lambda item: item[1], reverse=True): if i < 9: # threshold num_pmids = disease_variants[key] variant = Variant([key], []) network.add_node(variant) network.add_edge(Edge(variant, d1, 'ASSOCIATES_WITH', {})) network.add_edge(Edge(variant, d2, 'ASSOCIATES_WITH', {})) dvs = dvs + key + ':' + str(num_pmids) + ' PMIDs, ' i += 1 dvs = dvs[:-2] # add the gene associated variants with smallest pvalues # if no pvalue is given, pvalue is set to 1 gene_variants = [] for gene in relevant_genes: query = """ MATCH (g:Gene)-[a]-(v:Variant) WHERE {g_id} in g.ids RETURN v.`_id`, a.pvalue, type(a) """ results = session.run(query, parameters={'g_id': gene}) for variant in results: pvalue = variant['a.pvalue'] if pvalue is None: pvalue = 1 else: pvalue = float(pvalue) gene_variants.append([variant['v.`_id`'] + '-' + gene, pvalue, variant['type(a)']]) gene_variants = sorted(gene_variants, key=lambda item: item[1]) gene_variants = gene_variants[:10] # threshold for v in gene_variants: temp = v[0].split('-') v_id = temp[0] g_id = temp[1] variant = Variant([v_id], []) network.add_node(variant) gene = Gene([g_id], []) network.add_node(gene) network.add_edge(Edge(gene, variant, v[2], {'pvalue': v[1]})) if len(gene_variants) > 0: gvs = str(gene_variants) gvs = gvs[1:-1] gvs = gvs.replace('\'', '') else: gvs = '' if len(disease_variants) > 0 or len(gene_variants) > 0: results_file.write('The disease pair has at least ' + str(i) + ' variants associated to both diseases: ' + dvs + ' and at least ' + str(len(gene_variants)) + ' gene associated variants: ' + gvs + '\n') # dict with RNA name as key and an array as value # first array position is the number of regulated genes, second position is an array with the gene names relevant_rnas = {} for gene in relevant_genes: query = """ MATCH (r:RNA)--(g:Gene) WHERE {g_id} in g.ids AND NOT r.label_id CONTAINS "MRNA" return r.`_id` """ results = session.run(query, parameters={'g_id': gene}) for result in results: key = result['r.`_id`'] if key in relevant_rnas: value = relevant_rnas[key] genes = value[1] if gene not in genes: genes.add(gene) relevant_rnas[key] = [value[0] + 1, genes] else: genes = set() genes.add(gene) relevant_rnas[key] = [1, genes] if len(relevant_rnas) > 0: i = 0 for key, value in sorted(relevant_rnas.items(), key=lambda item: item[1], reverse=True): # sort by the number of regulated genes if i > 9: # threshold break elif value[0] > 1: # only add and print RNAs which regulate more than one gene if i == 0: results_file.write('RNAs with the number and names of the genes they regulate: \n') rna_id = key for gene_id in value[1]: rna = RNA([rna_id], []) network.add_node(rna) gene = Gene([gene_id], []) network.add_node(gene) network.add_edge(Edge(rna, gene, 'REGULATES', {})) regulated_genes = str(value[1]) regulated_genes = regulated_genes[1:-1] regulated_genes = regulated_genes.replace('\'', '') results_file.write(rna_id + '\t' + str(value[0]) + '\t' + regulated_genes + '\n') i += 1 # append regulating RNAs to one RNA which regulates the most genes, MRNAs are not added for key, value in sorted(relevant_rnas.items(), key=lambda item: item[1], reverse=True): if value[0] > 1: most_relevant_rna = RNA([key], []) network.add_node(most_relevant_rna) query = """ MATCH (r:RNA)--(n:RNA) WHERE {r_id} in r.ids AND NOT n.label_id CONTAINS "MRNA" RETURN n.`_id`, labels(n) """ results = session.run(query, parameters={'r_id': key}) reg_rnas = '' for result in results: rna_id = result['n.`_id`'] types = result['labels(n)'] for type in types: if type != 'RNA': if type == 'CircRNA': rna = CircRNA([rna_id], []) if type == 'ERNA': rna = ERNA([rna_id], []) if type == 'LncRNA': rna = LncRNA([rna_id], []) if type == 'MiRNA': rna = MiRNA([rna_id], []) if type == 'NcRNA': rna = NcRNA([rna_id], []) if type == 'PiRNA': rna = PiRNA([rna_id], []) if type == 'Pseudogene': rna = Pseudogene([rna_id], []) if type == 'Ribozyme': rna = Ribozyme([rna_id], []) if type == 'RRNA': rna = RRNA([rna_id], []) if type == 'ScaRNA': rna = ScaRNA([rna_id], []) if type == 'ScRNA': rna = ScRNA([rna_id], []) if type == 'SnoRNA': rna = SnoRNA([rna_id], []) if type == 'SnRNA': rna = SnRNA([rna_id], []) network.add_node(rna) network.add_edge(Edge(rna, most_relevant_rna, 'REGULATES', {})) reg_rnas = reg_rnas + rna_id + ', ' reg_rnas = reg_rnas[:-2] results_file.write(key + ' is the RNA which regulates the most genes in this subgraph. It is regulated by ' + reg_rnas + '.\n') break json_file = path + '/' + temp_id1 + '_' + temp_id2 + '_' + temp_drug_id + '_graph.json' network.save(json_file) draw_drug_subgraph(json_file) networks_per_drug.append(network) all_networks.append(networks_per_drug) return all_networks
def load_from_dict(self, source: {}): py_class_map = {} for label in source['node_types']: if ';' not in label: module_name = source['node_types'][label] module = __import__(module_name) for package in module_name.split('.')[1:]: module = getattr(module, package) py_class_map[label] = getattr(module, label) for node in source['nodes']: node_instance: Node if ';' not in node['_label']: class_ = py_class_map[node['_label']] node_instance = class_(node['ids'], node['names']) elif 'RNA' in node['_label']: label = node['_label'] if 'CircRNA' in label: node_instance = CircRNA(node['ids'], node['names']) elif 'ERNA' in label: node_instance = ERNA(node['ids'], node['names']) elif 'LncRNA' in label: node_instance = LncRNA(node['ids'], node['names']) elif 'MiRNA' in label: node_instance = MiRNA(node['ids'], node['names']) elif 'MRNA' in label: node_instance = MRNA(node['ids'], node['names']) elif 'NcRNA' in label: node_instance = NcRNA(node['ids'], node['names']) elif 'PiRNA' in label: node_instance = PiRNA(node['ids'], node['names']) elif 'Pseudogene' in label: node_instance = Pseudogene(node['ids'], node['names']) elif 'Ribozyme' in label: node_instance = Ribozyme(node['ids'], node['names']) elif 'RRNA' in label: node_instance = RRNA(node['ids'], node['names']) elif 'ScaRNA' in label: node_instance = ScaRNA(node['ids'], node['names']) elif 'ScRNA' in label: node_instance = ScRNA(node['ids'], node['names']) elif 'SnoRNA' in label: node_instance = SnoRNA(node['ids'], node['names']) elif 'SnRNA' in label: node_instance = SnRNA(node['ids'], node['names']) else: node_instance = RNA(node['ids'], node['names']) else: print('[Err ] Failed to load node with multiple labels', node) continue for key in node.keys(): if key not in ['_id', 'ids', 'names', '_label']: node_instance.attributes[key] = node[key] self.add_node(node_instance) for edge in source['edges']: params = dict(edge) del params['_source_id'] del params['_source_label'] del params['_target_id'] del params['_target_label'] del params['_label'] source_node = self.get_node_by_id(edge['_source_id'], edge['_source_label']) if source_node is None: print( 'Failed to load edge: could not find source node with label %s and id %s' % (edge['_source_label'], edge['_source_id'])) target_node = self.get_node_by_id(edge['_target_id'], edge['_target_label']) if target_node is None: print( 'Failed to load edge: could not find target node with label %s and id %s' % (edge['_target_label'], edge['_target_id'])) self.add_edge( Edge(source_node, target_node, edge['_label'], params))