Ejemplo n.º 1
0
    def populate_map_with_edges(self):
        for from_edge in self.edges:
            from_node_sequence = from_edge.get_start()
            vertex = self.nodes_map[from_node_sequence]
            has = has = self.has_in_list(from_edge)
            if not has:
                self.node_to_edges[vertex.get_sequence()].append(from_edge)
            residual_edge = Edge(from_edge.get_end(), from_edge.get_start(), 0)
            self.residual_network[from_edge.get_end()].append(residual_edge)

            if not from_edge.get_start() in self.floyd_warshall_map:
                self.floyd_warshall_map[from_edge.get_start()] = {}
            self.floyd_warshall_map[from_edge.get_start()][
                from_edge.get_end()] = from_edge.get_weight()

            if not self.is_directed:
                to_edge = Edge(from_edge.get_end(), from_edge.get_start(),
                               from_edge.get_weight())

                has = self.has_in_list(to_edge)

                if not has:
                    self.node_to_edges[to_edge.get_start()].append(to_edge)
                #TODO fill map when not directed

        for node in self.get_nodes():
            if not node.get_sequence() in self.floyd_warshall_map:
                self.floyd_warshall_map[node.get_sequence()] = {}
Ejemplo n.º 2
0
def add_lane(lane_id, source_location_number, destination_location_number):
    lane_1_edge = Edge(lane_id, nodes_vertex_list[source_location_number],
                       nodes_vertex_list[destination_location_number], 1)
    edges_vertex_list.append(lane_1_edge)
    lane_2_edge = Edge(lane_id, nodes_vertex_list[destination_location_number],
                       nodes_vertex_list[source_location_number], 1)
    edges_vertex_list.append(lane_2_edge)
Ejemplo n.º 3
0
 def add_lane(self, lane_id, source_loc_no, dest_loc_no):
     lane1 = Edge(lane_id, self.nodes_list[source_loc_no],
                  self.nodes_list[dest_loc_no], 1)
     self.edges_list.append(lane1)
     lane2 = Edge(lane_id, self.nodes_list[dest_loc_no],
                  self.nodes_list[source_loc_no], 1)
     self.edges_list.append(lane2)
Ejemplo n.º 4
0
def get_common_genes(disease_pairs, networks, writing_files):
    new_networks = []
    for index, disease_pair in enumerate(disease_pairs):
        network = networks[index]
        d1_genes, d2_genes = get_genes(disease_pair)
        common_genes = d1_genes.intersection(d2_genes)
        d1 = Disease([disease_pair[0]], [])
        network.add_node(d1)
        d2 = Disease([disease_pair[1]], [])
        network.add_node(d2)
        for g_id in common_genes:
            gene = Gene([g_id], [])
            network.add_node(gene)
            network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {}))
            network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {}))
        if len(common_genes) > 0 and writing_files:
            temp_id1 = disease_pair[0].replace(':', '-')
            temp_id2 = disease_pair[1].replace(':', '-')
            path = '../analysis/disease_pairs/' + temp_id1 + '_' + temp_id2
            try:
                os.mkdir(path)
            except FileExistsError:
                pass
            with io.open(path + '/' + temp_id1 + '_' + temp_id2 + '_common_genes.tsv', 'w', encoding='utf-8', newline='') as common_genes_file:
                common_genes_file.write('#Common genes of ' + disease_pair[0] + ' and ' + disease_pair[1] + '\n')
                for gene in common_genes:
                    common_genes_file.write(gene + '\n')
        new_networks.append(network)
    print('Done getting genes')
    return new_networks
Ejemplo n.º 5
0
 def draw(self):
     glMatrixMode(GL_MODELVIEW)
     glPushMatrix()
     glMultMatrixf(self._identity_mat)
     glColor3f(self._color[0],self._color[1],self._color[2])
     for edge in self._edges:
         Edge.draw_edge(Edge(self._vertices[edge[0]],self._vertices[edge[1]]))
     glPopMatrix()
Ejemplo n.º 6
0
def get_common_variants(disease_pairs, networks, writing_files):
    new_networks = []
    for index, disease_pair in enumerate(disease_pairs):
        network = networks[index]
        d1 = Disease([disease_pair[0]], [])
        network.add_node(d1)
        d2 = Disease([disease_pair[1]], [])
        network.add_node(d2)

        common_variants = []    # each variant is an array
        query = """ MATCH (d1:Disease)--(v:Variant)--(d2:Disease) WHERE {d1_id} in d1.ids AND {d2_id} in d2.ids RETURN v.`_id` """
        results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
        for result in results:
            v_id = result['v.`_id`']
            common_variants.append([v_id, 'disease associated'])
            variant = Variant([v_id], [])
            network.add_node(variant)
            network.add_edge(Edge(d1, variant, 'ASSOCIATES_WITH', {}))
            network.add_edge(Edge(d2, variant, 'ASSOCIATES_WITH', {}))

        # variants associated to common genes
        d1_genes, d2_genes = get_genes(disease_pair)
        common_genes_ids = d1_genes.intersection(d2_genes)
        for gene_id in common_genes_ids:
            query = """ MATCH (g:Gene)-[a]-(v:Variant) WHERE {g_id} in g.ids RETURN v.`_id`, type(a) """
            results = session.run(query, parameters={'g_id': gene_id})
            for result in results:
                v_id = result['v.`_id`']
                type = result['type(a)']    # can be CODES or EQTL
                variant_pair = v_id + '-' + gene_id
                common_variants.append([variant_pair, 'gene associated'])
                variant = Variant([v_id], [])
                network.add_node(variant)
                gene = Gene([gene_id], [])
                network.add_node(gene)
                network.add_edge(Edge(gene, variant, type, {}))
                network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {}))
                network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {}))

        new_networks.append(network)

        if len(common_variants) > 0 and writing_files:
            temp_id1 = disease_pair[0].replace(':', '-')
            temp_id2 = disease_pair[1].replace(':', '-')
            path = '../analysis/disease_pairs/' + temp_id1 + '_' + temp_id2
            try:
                os.mkdir(path)
            except FileExistsError:
                pass
            with io.open(path + '/' + temp_id1 + '_' + temp_id2 + '_common_variants.tsv', 'w', encoding='utf-8',
                         newline='') as common_variants_file:
                common_variants_file.write(
                    '#Common variants associated with ' + disease_pair[0] + ' and ' + disease_pair[1] + '\n')
                for variant in common_variants:
                    common_variants_file.write(variant[0] + '\t' + variant[1] + '\n')
    print('Done getting variants')
    return new_networks
Ejemplo n.º 7
0
def populate_edges(edges_representation):
    edges = []
    for representation in edges_representation:
        start = representation[0]
        end = representation[1]
        weight = representation[2]
        edge = Edge(start, end, weight)
        edges.append(edge)
    return edges
Ejemplo n.º 8
0
    def get_transpose_graph(self):

        edges = []
        for edge in self.edges:
            transposed_edge = Edge(edge.get_end(), edge.get_start(),
                                   edge.get_weight())
            edges.append(transposed_edge)

        transposed_graph = Graph(self.root, self.nodes, edges,
                                 self.is_directed)
        return transposed_graph
Ejemplo n.º 9
0
def generate(filePath):
    number_of_cities, costs, reliabilities = readValueFromFile(filePath)
    city_list = alphabet_list[0:int(number_of_cities)]
    edge_list = list()
    row = 0
    col = 1

    for reliability, cost in zip(reliabilities, costs):
        edge_list.append(
            Edge(city_list[row], city_list[col], float(cost),
                 float(reliability)))
        if (col == len(city_list) - 1):
            row = row + 1
            col = row + 1
        else:
            col = col + 1
    return city_list, edge_list
Ejemplo n.º 10
0
    def draw(self):

        glMatrixMode(GL_MODELVIEW)
        glPushMatrix()
        gluLookAt(-2, 2, -6, 0, 0, 0, 0, 1, 0)
        glMultMatrixf(self._identity_mat)
        color = 0
        colors = [(1, 0, 0), (1, 1, 0), (0, 1, 1), (1, 0, 0), (1, 1, 0),
                  (0, 1, 1)]

        for edge in self._edges:
            glColor3f(colors[color][0], colors[color][1], colors[color][2])
            if color > 2:
                Edge.draw_dotted_edge(
                    Edge(self._vertices[edge[0]], self._vertices[edge[1]]))
            else:
                Edge.draw_edge(
                    Edge(self._vertices[edge[0]], self._vertices[edge[1]]))
            color += 1

        glPopMatrix()
Ejemplo n.º 11
0
    if row[0] in external_id_lookup:
        drug_ids.extend(external_id_lookup[row[0]])
    drug = Drug(drug_ids, [row[1]])
    network.add_node(drug)
    gene_ids = ['HGNC:%s' % row[2]]
    if row[4]:
        gene_ids.append(row[4])
    gene = Gene(gene_ids, [row[3]])
    network.add_node(gene)
    rel = {
        'source': 'DrugBank',
        'known_action': row[5] == 1,
        'actions': row[6].split(',') if row[6] else [],
        'simplified_action': row[7]
    }
    network.add_edge(Edge(drug, gene, 'TARGETS', rel))
for row in interactions_results:
    drug1 = Drug(['DrugBank:%s' % row[0]], [row[1]])
    network.add_node(drug1)
    drug2 = Drug(['DrugBank:%s' % row[2]], [row[3]])
    network.add_node(drug2)
    rel = {
        'source': 'DrugBank',
        'description': row[4]
    }
    network.add_edge(Edge(drug1, drug2, 'INTERACTS', rel))
adr_id_counter = 1
for row in snp_adrs_results:
    # drugbank_id, gene_symbol, rs_id, adverse_reaction, description, pubmed_id
    adr = AdverseDrugReaction(['GenCoNet:DrugBank_ADR_%s' % adr_id_counter], [])
    adr_id_counter += 1
Ejemplo n.º 12
0
def get_common_rnas(disease_pairs, networks, writing_files):
    new_networks = []
    for index, disease_pair in enumerate(disease_pairs):
        network = networks[index]
        d1 = Disease([disease_pair[0]], [])
        network.add_node(d1)
        d2 = Disease([disease_pair[1]], [])
        network.add_node(d2)

        d1_genes_ids, d2_genes_ids = get_genes(disease_pair)

        # this differentiation is done to get the correct number of regulated, in this subgraph present genes
        d1_only_genes_ids = d1_genes_ids.difference(d2_genes_ids)
        d2_only_genes_ids = d2_genes_ids.difference(d1_genes_ids)
        common_genes_ids = d1_genes_ids.intersection(d2_genes_ids)

        common_rnas = {}    #dict with the RNA name as key and the regulated genes as an array as value
        for gene_id in common_genes_ids:
            query = """ MATCH (g:Gene)-[:REGULATES]-(r:RNA) WHERE {gene_id} IN g.ids RETURN distinct(r.`_id`) """
            results = session.run(query, parameters={'gene_id': gene_id})
            for result in results:
                rna_id = result['(r.`_id`)']
                if rna_id in common_rnas:
                    gene_ids = common_rnas[rna_id]
                    gene_ids.append(gene_id)
                    common_rnas[rna_id] = gene_ids
                else:
                    common_rnas[rna_id] = [gene_id]
                    gene = Gene([gene_id], [])
                    network.add_node(gene)
                    rna = RNA([rna_id], [])
                    network.add_node(rna)
                    network.add_edge(Edge(rna, gene, 'REGULATES', {}))
                    network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {}))
                    network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {}))

        rnas_d1_only_genes = {}
        for gene_id in d1_only_genes_ids:
            query = """ MATCH (g:Gene)-[:REGULATES]-(r:RNA) WHERE {gene_id} IN g.ids RETURN distinct(r.`_id`) """
            results = session.run(query, parameters={'gene_id': gene_id})
            for result in results:
                rna_id = result['(r.`_id`)']
                if rna_id in rnas_d1_only_genes:
                    gene_ids = rnas_d1_only_genes[rna_id]
                    gene_ids.append(gene_id)
                    rnas_d1_only_genes[rna_id] = gene_ids
                else:
                    rnas_d1_only_genes[rna_id] = [gene_id]

        rnas_d2_only_genes = {}
        for gene_id in d2_only_genes_ids:
            query = """ MATCH (g:Gene)-[:REGULATES]-(r:RNA) WHERE {gene_id} IN g.ids RETURN distinct(r.`_id`) """
            results = session.run(query, parameters={'gene_id': gene_id})
            for result in results:
                rna_id = result['(r.`_id`)']
                if rna_id in rnas_d2_only_genes:
                    gene_ids = rnas_d2_only_genes[rna_id]
                    gene_ids.append(gene_id)
                    rnas_d2_only_genes[rna_id] = gene_ids
                else:
                    rnas_d2_only_genes[rna_id] = [gene_id]

        #common_rnas = {'A':1, 'B':1, 'D':1}
        #rnas_d1_only_genes = {'A':2, 'B':1, 'E':1}
        #rnas_d2_only_genes = {'A':2, 'C':1, 'E':1}

        for rna_id in rnas_d1_only_genes:
            if rna_id in common_rnas:
                # common_rnas have already been added to the network, here the number of regulated genes is updated
                common_rnas[rna_id] = common_rnas[rna_id] + rnas_d1_only_genes[rna_id]
            elif rna_id in rnas_d2_only_genes:
                # RNA regulates genes associated to d1 and genes associated to d2, RNA does not regulate a common gene
                common_rnas[rna_id] = rnas_d1_only_genes[rna_id] + rnas_d2_only_genes[rna_id]
                g1_ids = rnas_d1_only_genes[rna_id]
                g2_ids = rnas_d2_only_genes[rna_id]
                rna = RNA([rna_id], [])
                network.add_node(rna)
                for g_id in g1_ids:
                    gene = Gene([g_id], [])
                    network.add_node(gene)
                    network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {}))
                    network.add_edge(Edge(rna, gene, 'REGULATES', {}))
                for g_id in g2_ids:
                    gene = Gene([g_id], [])
                    network.add_node(gene)
                    network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {}))
                    network.add_edge(Edge(rna, gene, 'REGULATES', {}))
                del rnas_d2_only_genes[rna_id]
        for rna_id in rnas_d2_only_genes:
            if rna_id in common_rnas:
                # common_rnas have already been added to the network, here the number of regulated genes is updated
                common_rnas[rna_id] = common_rnas[rna_id] + rnas_d2_only_genes[rna_id]

        # for each RNA add an array of RNAs, which regulate this RNA. MRNAs are not included
        for rna_id in common_rnas:
            second_rnas = []
            query = """MATCH (r:RNA)-[:REGULATES]-(n:RNA) WHERE {r_id} IN r.ids AND NOT n.label_id CONTAINS "MRNA" RETURN distinct(n.`_id`) """
            results = session.run(query, parameters={'r_id': rna_id})
            rna = RNA([rna_id], [])
            network.add_node(rna)
            for result in results:
                second_rna_id = result['(n.`_id`)']
                second_rnas.append(second_rna_id)
                second_rna = RNA([second_rna_id], [])
                network.add_node(second_rna)
                network.add_edge(Edge(second_rna, rna, 'REGULATES', {}))
            # the value of common_rnas is now changed to an array where at the first position the array with the regulated
            # genes from this subgraph is stored and at the second position the array with RNAs regulating the RNA is stored
            common_rnas[rna_id] = [common_rnas[rna_id], second_rnas]

        new_networks.append(network)

        if len(common_rnas) > 0 and writing_files:
            temp_id1 = disease_pair[0].replace(':', '-')
            temp_id2 = disease_pair[1].replace(':', '-')
            path = '../analysis/disease_pairs/' + temp_id1 + '_' + temp_id2
            try:
                os.mkdir(path)
            except FileExistsError:
                pass
            with io.open(path + '/' + temp_id1 + '_' + temp_id2 + '_common_rnas.tsv', 'w', encoding='utf-8', newline='') as common_rnas_file:
                common_rnas_file.write('#Common rnas of ' + disease_pair[0] + ' and ' + disease_pair[1] + '\tsorted by number of regulated genes\tRegulated genes\tRNAs regulating the RNA\n')
                for key, value in sorted(common_rnas.items(), key=lambda item: len(item[1][0]), reverse=True):
                    # sort by the number of genes in this subgraph which are regulated by the RNA
                    regulated_genes = str(value[0])
                    regulated_genes = regulated_genes.replace('[', '')
                    regulated_genes = regulated_genes.replace(']', '')
                    regulated_genes = regulated_genes.replace('\'', '')
                    second_rnas = str(value[1])
                    second_rnas = second_rnas.replace('[', '')
                    second_rnas = second_rnas.replace(']', '')
                    second_rnas = second_rnas.replace('\'', '')
                    common_rnas_file.write(key + '\t' + str(len(value[0])) + '\t' + regulated_genes + '\t' + second_rnas + '\n')
    print('Done getting RNAs')
    return new_networks
Ejemplo n.º 13
0
def get_common_drugs(disease_pairs, networks, writing_files):
    new_networks = []
    for index, disease_pair in enumerate(disease_pairs):
        network = networks[index]
        d1 = Disease([disease_pair[0]], [])
        network.add_node(d1)
        d2 = Disease([disease_pair[1]], [])
        network.add_node(d2)

        # the drug INDICATES, CONTRAINDICATES or INDUCES both diseases
        common_drugs = set()
        query = """ MATCH (d1:Disease)-[a]-(n:Drug)--(d2:Disease) WHERE {d1_id} IN d1.ids AND {d2_id} IN d2.ids RETURN distinct(type(a)), n.`_id` """
        results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
        for result in results:
            drug_id = result['n.`_id`']
            type = result['(type(a))']
            common_drugs.add(drug_id)
            drug = Drug([drug_id], [])
            network.add_node(drug)
            network.add_edge(Edge(drug, d1, type, {}))
        query = """ MATCH (d1:Disease)--(n:Drug)-[a]-(d2:Disease) WHERE {d1_id} IN d1.ids AND {d2_id} IN d2.ids RETURN distinct(type(a)), n.`_id` """
        results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
        for result in results:
            drug_id = result['n.`_id`']
            type = result['(type(a))']
            common_drugs.add(drug_id)
            drug = Drug([drug_id], [])
            network.add_node(drug)
            network.add_edge(Edge(drug, d2, type, {}))

        # the drug targets a gene of one disease and is associated to the other disease
        query = """ MATCH (d1:Disease)-[a]-(n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d2:Disease) WHERE {d1_id} IN d1.ids AND {d2_id} IN d2.ids RETURN distinct(type(a)), n.`_id`, g.`_id` """
        results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
        for result in results:
            drug_id = result['n.`_id`']
            type = result['(type(a))']
            common_drugs.add(drug_id)
            drug = Drug([drug_id], [])
            network.add_node(drug)
            network.add_edge(Edge(drug, d1, type, {}))
            gene_id = result['g.`_id`']
            gene = Gene([gene_id], [])
            network.add_node(gene)
            network.add_edge(Edge(drug, gene, 'TARGETS', {'actions': []}))
            network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {}))
        query = """ MATCH (d2:Disease)-[a]-(n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d1:Disease) WHERE {d1_id} IN d1.ids AND {d2_id} IN d2.ids RETURN distinct(type(a)), n.`_id`, g.`_id` """
        results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
        for result in results:
            drug_id = result['n.`_id`']
            type = result['(type(a))']
            common_drugs.add(drug_id)
            drug = Drug([drug_id], [])
            network.add_node(drug)
            network.add_edge(Edge(drug, d2, type, {}))
            gene_id = result['g.`_id`']
            gene = Gene([gene_id], [])
            network.add_node(gene)
            network.add_edge(Edge(drug, gene, 'TARGETS', {'actions': []}))
            network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {}))

        # the drug targets one gene which is associated to both diseases or the drug targets two different genes
        # where each gene is associated to one of the diseases
        query = """ MATCH (d1:Disease)-[:ASSOCIATES_WITH]-(g1:Gene)-[:TARGETS]-(n:Drug)-[:TARGETS]-(g2:Gene)-
        [:ASSOCIATES_WITH]-(d2:Disease) WHERE {d1_id} IN d1.ids AND {d2_id} IN d2.ids RETURN n.`_id`, g1.`_id`, g2.`_id` """
        results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
        for result in results:
            drug_id = result['n.`_id`']
            common_drugs.add(drug_id)
            g1_id = result['g1.`_id`']
            g2_id = result['g2.`_id`']
            g1 = Gene([g1_id], [])
            network.add_node(g1)
            network.add_edge(Edge(g1, d1, 'ASSOCIATES_WITH', {}))
            drug = Drug([drug_id], [])
            network.add_node(drug)
            network.add_edge(Edge(drug, g1, 'TARGETS', {'actions': []}))
            g2 = Gene([g2_id], [])
            network.add_node(g2)
            network.add_edge(Edge(drug, g2, 'TARGETS', {'actions': []}))
            network.add_edge(Edge(g2, d2, 'ASSOCIATES_WITH', {}))

        new_networks.append(network)

        if len(common_drugs) > 0 and writing_files:
            temp_id1 = disease_pair[0].replace(':', '-')
            temp_id2 = disease_pair[1].replace(':', '-')
            path = '../analysis/disease_pairs/' + temp_id1 + '_' + temp_id2
            try:
                os.mkdir(path)
            except FileExistsError:
                pass
            with io.open(path + '/' + temp_id1 + '_' + temp_id2 + '_common_drugs.tsv', 'w', encoding='utf-8', newline='') as common_drugs_file:
                common_drugs_file.write('#Common drugs of ' + disease_pair[0] + ' and ' + disease_pair[1] + '\n')
                for drug in common_drugs:
                    common_drugs_file.write(drug + '\n')
    print('Done getting drugs')
    return new_networks
Ejemplo n.º 14
0
#!/usr/bin/env python3

import io
import csv
from model.network import Network
from model.drug import Drug
from model.disease import Disease
from model.edge import Edge

network = Network()

with io.open('../data/PubMed/drug_disease.csv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    next(reader, None)
    for row in reader:
        drug = Drug(['DrugBank:%s' % row[1]], [row[0]])
        disease = Disease([row[4]], [row[3]])
        network.add_node(drug)
        network.add_node(disease)
        network.add_edge(
            Edge(drug, disease, row[2], {
                'source': 'PubMed',
                'pmid': row[5]
            }))

network.save('../data/PubMed/graph.json')
Ejemplo n.º 15
0
    to_namespace = association.find('to_namespace').text
    to_id = association.find('to_code').text
    to_name = association.find('to_name').text
    if from_namespace != 'RxNorm' or to_namespace != 'MeSH':
        continue
    if association_type not in ['induces', 'CI_with', 'may_treat']:
        continue
    drug_id = 'RxNorm:%s' % from_id
    if from_id not in added_rxnorm_drugs:
        drug = Drug([drug_id], [from_name])
        network.add_node(drug)
        added_rxnorm_drugs.add(from_id)
    else:
        drug = network.get_node_by_id(drug_id, 'Drug')
    disease_id = 'MeSH:%s' % to_id
    if to_id not in added_mesh_diseases:
        disease = Disease([disease_id], [to_name])
        network.add_node(disease)
        added_mesh_diseases.add(to_id)
    else:
        disease = network.get_node_by_id(disease_id, 'Disease')
    rel = {'source': 'MEDRT'}
    if association_type == 'induces':
        network.add_edge(Edge(drug, disease, 'INDUCES', rel))
    elif association_type == 'CI_with':
        network.add_edge(Edge(drug, disease, 'CONTRAINDICATES', rel))
    elif association_type == 'may_treat':
        network.add_edge(Edge(drug, disease, 'INDICATES', rel))

network.save('../data/MED-RT/graph.json')
Ejemplo n.º 16
0
 pmid = row[8].split(':')
 pmid = pmid[1]
 source_database = row[12]
 source_database = source_database.replace('\"', '')
 if (mirna_rnacentral_id + '$' +
         gene_hgnc_id) in edge_source_target_lookup:
     reg_edges = network.get_edges_from_to(
         mirna, gene, 'REGULATES')
     for reg_edge in reg_edges:
         if reg_edge.attributes['source'] == (
                 'EBI-GOA-miRNA, ' + source_database):
             pmid = reg_edge.attributes['pmid'] + ', ' + pmid
             network.delete_edge(reg_edge)
             e = Edge(
                 mirna, gene, 'REGULATES', {
                     'source':
                     'EBI-GOA-miRNA, ' + source_database,
                     'pmid': pmid
                 })
             network.add_edge(e)
             edge_source_target_lookup.append(
                 mirna_rnacentral_id + '$' + gene_hgnc_id)
 else:
     e = Edge(
         mirna, gene, 'REGULATES', {
             'source': 'EBI-GOA-miRNA, ' + source_database,
             'pmid': pmid
         })
     network.add_edge(e)
     edge_source_target_lookup.append(mirna_rnacentral_id +
                                      '$' + gene_hgnc_id)
 # GOs
Ejemplo n.º 17
0
network = Network()

drug_lookup = {}
with io.open(drug_file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    for row in reader:
        drug_lookup[row[0].strip()] = row[1].strip()

# 1: STITCH compound id (flat, see above)
# 2: UMLS concept id as it was found on the label
# 3: method of detection: NLP_indication / NLP_precondition / text_mention
# 4: concept name
# 5: MedDRA concept type (LLT = lowest level term, PT = preferred term; in a few cases the term is neither LLT nor PT)
# 6: UMLS concept id for MedDRA term
# 7: MedDRA concept name

# All side effects found on the labels are given as LLT. Additionally, the PT is shown. There is at least one
# PT for every LLT, but sometimes the PT is the same as the LLT.
with io.open(file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    for row in reader:
        pubchem_id = row[0][4::].lstrip('0')
        drug = Drug(['PubChem:CID%s' % pubchem_id], [drug_lookup[row[0]]] if row[0] in drug_lookup else [])
        network.add_node(drug)
        disease = Disease(['UMLS:%s' % row[1], 'UMLS:%s' % row[5]], [row[3], row[6]])
        network.add_node(disease)
        network.add_edge(Edge(drug, disease, 'INDICATES', {'source': 'SIDER'}))

network.save('../data/SIDER/graph.json')
Ejemplo n.º 18
0
network = Network()

with io.open(file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:
        row = [x.strip() for x in row]
        if not row[0] or not row[7] or not row[8]:
            continue
        gene_ids = {'HGNC:%s' % row[0]}
        if row[2]:
            gene_ids.add('Entrez:%s' % row[2])
        gene = Gene(gene_ids, [])
        network.add_node(gene)
        drug_name = row[7].replace('(%s)' % row[8], '').replace(row[8],
                                                                '').strip()
        drug = Drug(['ChEMBL:%s' % row[8]], [drug_name] if drug_name else [])
        network.add_node(drug)
        rel = {
            'source': 'DGIdb,%s' % row[3],
            'actions': [row[4]],
        }
        if row[9]:
            pubmed_ids = ','.join(
                ['PMID:%s' % x for x in row[9].strip().split(',')])
            rel['source'] += ',%s' % pubmed_ids
        network.add_edge(Edge(drug, gene, 'TARGETS', rel))

network.save('../data/DGIdb/graph.json')
Ejemplo n.º 19
0
    id_node = owl_class.find(obo_in_owl_ns + 'id')
    obo_ns_node = owl_class.find(obo_in_owl_ns + 'hasOBONamespace')
    label_node = owl_class.find(rdfs_ns + 'label')
    if id_node is not None and obo_ns_node is not None:
        go_class = GOClass([id_node.text], [label_node.text])
        network.add_node(go_class)
        go_class_ns_lookup[id_node.text] = obo_ns_node.text
        for alternative_id_node in owl_class.findall(obo_in_owl_ns + 'hasAlternativeId'):
            go_class_redirects[alternative_id_node.text] = id_node.text

with io.open(annotations_file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    for row in reader:
        if not row[0][0].startswith('!') and row[12] == 'taxon:9606':
            gene = Gene(['UniProtKB:%s' % row[1], 'HGNC:%s' % row[2]], [])
            network.add_node(gene)
            if row[4] not in go_class_ns_lookup:
                # print('[WARN] GO id %s is obsolete, redirecting to %s' % (row[4], go_class_redirects[row[4]]))
                row[4] = go_class_redirects[row[4]]
            label = go_class_ns_lookup[row[4]].upper()
            if label == 'MOLECULAR_FUNCTION':
                label = 'HAS_' + label
            elif label == 'BIOLOGICAL_PROCESS':
                label = 'BELONGS_TO_' + label
            elif label == 'CELLULAR_COMPONENT':
                label = 'IN_' + label
            e = Edge(gene, network.get_node_by_id(row[4], 'GOClass'), label, {'source': 'GO,%s' % row[5]})
            network.add_edge(e)

network.save('../data/GO/graph.json')
Ejemplo n.º 20
0
# 30 OR or BETA
# 31 95% CI (TEXT)
# 32 PLATFORM [SNPS PASSING QC]
# 33 CNV
loc_pattern = re.compile(r'LOC[0-9]+')
with io.open(file, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:
        if not row[14]:
            continue
        gene_ids = row[14].replace(' x ', ', ').replace(' - ',
                                                        ', ').split(', ')
        print(row[14])
        print('\t', gene_ids)
        for gene_id in gene_ids:
            if loc_pattern.fullmatch(gene_id) is not None:
                continue
            gene = Gene(['HGNC:%s' % gene_id], [])
            network.add_node(gene)
            for variant_id in {x.strip() for x in row[21].split(';')}:
                variant = Variant(['dbSNP:%s' % variant_id], [])
                network.add_node(variant)
                network.add_edge(
                    Edge(gene, variant, 'CODES', {
                        'source': 'GWASCatalog',
                        'pmid': row[1]
                    }))

network.save('../data/GWAS-Catalog/graph.json')
Ejemplo n.º 21
0
    def load_from_dict(self, source: {}):
        py_class_map = {}
        for label in source['node_types']:
            if ';' not in label:
                module_name = source['node_types'][label]
                module = __import__(module_name)
                for package in module_name.split('.')[1:]:
                    module = getattr(module, package)
                py_class_map[label] = getattr(module, label)
        for node in source['nodes']:
            node_instance: Node
            if ';' not in node['_label']:
                class_ = py_class_map[node['_label']]
                node_instance = class_(node['ids'], node['names'])
            elif 'RNA' in node['_label']:
                label = node['_label']
                if 'CircRNA' in label:
                    node_instance = CircRNA(node['ids'], node['names'])
                elif 'ERNA' in label:
                    node_instance = ERNA(node['ids'], node['names'])
                elif 'LncRNA' in label:
                    node_instance = LncRNA(node['ids'], node['names'])
                elif 'MiRNA' in label:
                    node_instance = MiRNA(node['ids'], node['names'])
                elif 'MRNA' in label:
                    node_instance = MRNA(node['ids'], node['names'])
                elif 'NcRNA' in label:
                    node_instance = NcRNA(node['ids'], node['names'])
                elif 'PiRNA' in label:
                    node_instance = PiRNA(node['ids'], node['names'])
                elif 'Pseudogene' in label:
                    node_instance = Pseudogene(node['ids'], node['names'])
                elif 'Ribozyme' in label:
                    node_instance = Ribozyme(node['ids'], node['names'])
                elif 'RRNA' in label:
                    node_instance = RRNA(node['ids'], node['names'])
                elif 'ScaRNA' in label:
                    node_instance = ScaRNA(node['ids'], node['names'])
                elif 'ScRNA' in label:
                    node_instance = ScRNA(node['ids'], node['names'])
                elif 'SnoRNA' in label:
                    node_instance = SnoRNA(node['ids'], node['names'])
                elif 'SnRNA' in label:
                    node_instance = SnRNA(node['ids'], node['names'])
                else:
                    node_instance = RNA(node['ids'], node['names'])
            else:
                print('[Err ] Failed to load node with multiple labels', node)
                continue
            for key in node.keys():
                if key not in ['_id', 'ids', 'names', '_label']:
                    node_instance.attributes[key] = node[key]
            self.add_node(node_instance)

        for edge in source['edges']:
            params = dict(edge)
            del params['_source_id']
            del params['_source_label']
            del params['_target_id']
            del params['_target_label']
            del params['_label']
            source_node = self.get_node_by_id(edge['_source_id'],
                                              edge['_source_label'])
            if source_node is None:
                print(
                    'Failed to load edge: could not find source node with label %s and id %s'
                    % (edge['_source_label'], edge['_source_id']))
            target_node = self.get_node_by_id(edge['_target_id'],
                                              edge['_target_label'])
            if target_node is None:
                print(
                    'Failed to load edge: could not find target node with label %s and id %s'
                    % (edge['_target_label'], edge['_target_id']))
            self.add_edge(
                Edge(source_node, target_node, edge['_label'], params))
Ejemplo n.º 22
0
             drug_names.append(prop[1])
         elif property_defs[prop[0]] == 'UMLS_CUI':
             drug_ids.append('UMLS:%s' % prop[1])
         elif property_defs[prop[0]] == 'Synonym':
             drug_names.append(prop[1])
     drug_names = [
         x.replace('[VA Product]', '').strip() for x in drug_names
     ]
     drug = Drug(drug_ids, drug_names)
     network.add_node(drug)
     for role in concept['roles']:
         role_name = role_defs[role[0]]
         rel = {'source': 'NDFRT'}
         if role_name == 'induces {NDFRT}':
             network.add_edge(
                 Edge(drug, ('NDFRT:%s' % role[1], 'Disease'), 'INDUCES',
                      rel))
         elif role_name == 'CI_with {NDFRT}':
             network.add_edge(
                 Edge(drug, ('NDFRT:%s' % role[1], 'Disease'),
                      'CONTRAINDICATES', rel))
         elif role_name == 'may_treat {NDFRT}':
             network.add_edge(
                 Edge(drug, ('NDFRT:%s' % role[1], 'Disease'), 'INDICATES',
                      rel))
 elif concept['kind'] == kind_defs_rev['DISEASE_KIND']:
     disease_ids = ['NDFRT:%s' % concept['code']]
     disease_names = [concept['name']]
     for prop in concept['properties']:
         if property_defs[prop[0]] == 'SNOMED_CID':
             disease_ids.append('SnoMedCT:%s' % prop[1])
         elif property_defs[prop[0]] == 'UMLS_CUI':
Ejemplo n.º 23
0
def get_given_drugs_related_info(disease_pairs, drugs):   # first disease pair with first drug array
    all_networks = []   # contains an array for each disease pair
    for index, disease_pair in enumerate(disease_pairs):
        networks_per_drug = []  # contains a network for each drug
        pair_drugs_ids = drugs[index]
        temp_id1 = disease_pair[0].replace(':', '-')
        temp_id2 = disease_pair[1].replace(':', '-')
        path = '../analysis/disease_pairs/' + temp_id1 + '_' + temp_id2
        for drug_id in pair_drugs_ids:
            try:
                os.mkdir(path)
            except FileExistsError:
                pass

            network = Network()
            d1 = Disease([disease_pair[0]], [])
            network.add_node(d1)
            d2 = Disease([disease_pair[1]], [])
            network.add_node(d2)
            drug = Drug([drug_id], [])
            network.add_node(drug)
            temp_drug_id = drug_id.replace(':', '-')
            with io.open(path + '/' + temp_id1 + '_' + temp_id2 + '_' + temp_drug_id + '_results.txt', 'w', encoding='utf-8', newline='') as results_file:
                results_file.write('In this file all information about the connection between ' + disease_pair[0] +
                                   ' and ' + disease_pair[1] + ' and the drug ' + drug_id + ' is summarized:\n')

                # the drug INDICATES, CONTRAINDICATES or INDUCES the disease
                query = """ MATCH (d:Disease)-[a]-(n:Drug) WHERE {d1_id} IN d.ids AND {n_id} in n.ids RETURN distinct(type(a)) """
                d1_results = session.run(query, parameters={'d1_id': disease_pair[0], 'n_id': drug_id})
                for result in d1_results:
                    results_file.write(drug_id + ' ' + result['(type(a))'] + ' ' + disease_pair[0] + '\n')
                    network.add_edge(Edge(drug, d1, result['(type(a))'], {}))
                query = """ MATCH (d:Disease)-[a]-(n:Drug) WHERE {d2_id} IN d.ids AND {n_id} in n.ids RETURN distinct(type(a)) """
                d2_results = session.run(query, parameters={'d2_id': disease_pair[1], 'n_id': drug_id})
                for result in d2_results:
                    results_file.write(drug_id + ' ' + result['(type(a))'] + ' ' + disease_pair[1] + '\n')
                    network.add_edge(Edge(drug, d2, result['(type(a))'], {}))

                # the drug targets a gene which is associated to the disease
                d1_genes = set()
                query = """ MATCH (n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d:Disease) WHERE {d1_id} IN d.ids AND {n_id} in n.ids RETURN g.`_id` """
                d1_results = session.run(query, parameters={'d1_id': disease_pair[0], 'n_id': drug_id})
                for gene in d1_results:
                    d1_genes.add(gene['g.`_id`'])
                    g = Gene([gene['g.`_id`']], [])
                    network.add_node(g)
                    network.add_edge(Edge(drug, g, 'TARGETS', {'actions': []})) #TODO
                    network.add_edge(Edge(g, d1, 'ASSOCIATES_WITH', {}))
                d2_genes = set()
                query = """ MATCH (n:Drug)-[:TARGETS]-(g:Gene)-[:ASSOCIATES_WITH]-(d:Disease) WHERE {d2_id} IN d.ids AND {n_id} in n.ids RETURN g.`_id` """
                d2_results = session.run(query, parameters={'d2_id': disease_pair[1], 'n_id': drug_id})
                for gene in d2_results:
                    d2_genes.add(gene['g.`_id`'])
                    g = Gene([gene['g.`_id`']], [])
                    network.add_node(g)
                    network.add_edge(Edge(drug, g, 'TARGETS', {'actions': []})) #TODO
                    network.add_edge(Edge(g, d2, 'ASSOCIATES_WITH', {}))

                common_drug_genes = d1_genes.intersection(d2_genes) # genes associated to the drug and both diseases
                # relevant_genes are all genes associated to at least one disease and the drug, below the common genes
                # with the most disease associated references are added
                relevant_genes = d1_genes.union(d2_genes)
                if len(d1_genes) > 0:
                    nbr = str(len(d1_genes))
                    d1_genes = str(d1_genes)
                    d1_genes = d1_genes.replace('{', '')
                    d1_genes = d1_genes.replace('}', '')
                    d1_genes = d1_genes.replace('\'', '')
                    results_file.write(drug_id + ' targets following ' + nbr + ' genes which are associated to ' + disease_pair[0] + ': ' + d1_genes + '\n')
                if len(d2_genes) > 0:
                    nbr = str(len(d2_genes))
                    d2_genes = str(d2_genes)
                    d2_genes = d2_genes.replace('{', '')
                    d2_genes = d2_genes.replace('}', '')
                    d2_genes = d2_genes.replace('\'', '')
                    results_file.write(drug_id + ' targets following ' + nbr + ' genes which are associated to ' + disease_pair[1] + ': ' + d2_genes + '\n')
                if len(common_drug_genes) > 0:
                    nbr = str(len(common_drug_genes))
                    cdgs = str(common_drug_genes)
                    cdgs = cdgs.replace('{', '')
                    cdgs = cdgs.replace('}', '')
                    cdgs = cdgs.replace('\'', '')
                    results_file.write('The disease pair has ' + nbr + ' common genes which are targeted by the drug: ' + cdgs + '\n')

                # add the common genes with the most disease associated references
                # no given num_pmids is similar to num_pmids = 0
                all_d1_genes, all_d2_genes = get_genes(disease_pair)
                all_common_genes = all_d1_genes.intersection(all_d2_genes)
                relevant_common_genes = []  # the genes with the most cited gene-disease association, threshold 10
                if len(all_common_genes) > 0:
                    results_file.write('The disease pair has ' + str(len(all_common_genes)) + ' common genes, not considering the connection to the drug.'
                                        ' Following genes have the most references regarding their connection to both diseases:\n')
                    for gene in all_common_genes:
                        query = """ MATCH (d1:Disease)-[a]-(g:Gene) WHERE {g_id} IN g.ids AND {d1_id} IN d1.ids RETURN a.num_pmids """
                        results = session.run(query, parameters={'g_id': gene, 'd1_id': disease_pair[0]})
                        num_pmids = 0
                        for result in results:  # multiple edges to the same gene
                            temp = result['a.num_pmids']
                            if temp is not None:
                                num_pmids = num_pmids + temp
                        query = """ MATCH (d2:Disease)-[a]-(g:Gene) WHERE {g_id} IN g.ids AND {d2_id} IN d2.ids RETURN a.num_pmids """
                        results = session.run(query, parameters={'g_id': gene, 'd2_id': disease_pair[1]})
                        for result in results:  # multiple edges to the same gene
                            temp = result['a.num_pmids']
                            if temp is not None:
                                num_pmids = num_pmids + temp
                        relevant_common_genes.append([gene, num_pmids])
                    # sort by number of pmids
                    relevant_common_genes = sorted(relevant_common_genes, key=lambda item: item[1], reverse=True)
                    relevant_common_genes = relevant_common_genes[:10]  # threshold
                    rcgs = str(relevant_common_genes)
                    rcgs = rcgs[1:-1]
                    rcgs = rcgs.replace('\'', '')
                    results_file.write(rcgs + '\n')
                    for g in relevant_common_genes:
                        gene = Gene([g[0]], [])
                        network.add_node(gene)
                        network.add_edge(Edge(gene, d1, 'ASSOCIATES_WITH', {}))
                        network.add_edge(Edge(gene, d2, 'ASSOCIATES_WITH', {}))
                        relevant_genes.add(g[0])

                # add the common disease associated variants with most references
                # no given num_pmids is similar to num_pmids = 0
                disease_variants = {}
                query = """ MATCH (d1:Disease)-[a]-(v:Variant)--(d2:Disease) WHERE {d1_id} in d1.ids AND {d2_id} in d2.ids RETURN distinct(a.num_pmids), v.`_id` """
                results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
                for variant in results:
                    num_pmids = variant['(a.num_pmids)']
                    if num_pmids is None:
                        num_pmids = 0
                    var_id = variant['v.`_id`']
                    if var_id in disease_variants:
                        temp = disease_variants[var_id]
                        disease_variants[var_id] = temp + num_pmids
                    else:
                        disease_variants[var_id] = num_pmids
                query = """ MATCH (d2:Disease)-[a]-(v:Variant)--(d1:Disease) WHERE {d1_id} in d1.ids AND {d2_id} in d2.ids RETURN distinct(a.num_pmids), v.`_id` """
                results = session.run(query, parameters={'d1_id': disease_pair[0], 'd2_id': disease_pair[1]})
                for variant in results:
                    num_pmids = variant['(a.num_pmids)']
                    if num_pmids is None:
                        num_pmids = 0
                    var_id = variant['v.`_id`']
                    if var_id in disease_variants:
                        temp = disease_variants[var_id]
                        disease_variants[var_id] = temp + num_pmids
                    else:
                        disease_variants[var_id] = num_pmids
                dvs = ''
                i = 0
                for key, value in sorted(disease_variants.items(), key=lambda item: item[1], reverse=True):
                    if i < 9:   # threshold
                        num_pmids = disease_variants[key]
                        variant = Variant([key], [])
                        network.add_node(variant)
                        network.add_edge(Edge(variant, d1, 'ASSOCIATES_WITH', {}))
                        network.add_edge(Edge(variant, d2, 'ASSOCIATES_WITH', {}))
                        dvs = dvs + key + ':' + str(num_pmids) + ' PMIDs, '
                        i += 1
                dvs = dvs[:-2]

                # add the gene associated variants with smallest pvalues
                # if no pvalue is given, pvalue is set to 1
                gene_variants = []
                for gene in relevant_genes:
                    query = """ MATCH (g:Gene)-[a]-(v:Variant) WHERE {g_id} in g.ids RETURN v.`_id`, a.pvalue, type(a) """
                    results = session.run(query, parameters={'g_id': gene})
                    for variant in results:
                        pvalue = variant['a.pvalue']
                        if pvalue is None:
                            pvalue = 1
                        else:
                            pvalue = float(pvalue)
                        gene_variants.append([variant['v.`_id`'] + '-' + gene, pvalue, variant['type(a)']])
                gene_variants = sorted(gene_variants, key=lambda item: item[1])
                gene_variants = gene_variants[:10]  # threshold
                for v in gene_variants:
                    temp = v[0].split('-')
                    v_id = temp[0]
                    g_id = temp[1]
                    variant = Variant([v_id], [])
                    network.add_node(variant)
                    gene = Gene([g_id], [])
                    network.add_node(gene)
                    network.add_edge(Edge(gene, variant, v[2], {'pvalue': v[1]}))
                if len(gene_variants) > 0:
                    gvs = str(gene_variants)
                    gvs = gvs[1:-1]
                    gvs = gvs.replace('\'', '')
                else:
                    gvs = ''

                if len(disease_variants) > 0 or len(gene_variants) > 0:
                    results_file.write('The disease pair has at least ' + str(i) + ' variants associated to both diseases: ' +
                                           dvs + ' and at least ' + str(len(gene_variants)) + ' gene associated variants: ' + gvs + '\n')

                # dict with RNA name as key and an array as value
                # first array position is the number of regulated genes, second position is an array with the gene names
                relevant_rnas = {}
                for gene in relevant_genes:
                    query = """ MATCH (r:RNA)--(g:Gene) WHERE {g_id} in g.ids AND NOT r.label_id CONTAINS "MRNA" return r.`_id` """
                    results = session.run(query, parameters={'g_id': gene})
                    for result in results:
                        key = result['r.`_id`']
                        if key in relevant_rnas:
                            value = relevant_rnas[key]
                            genes = value[1]
                            if gene not in genes:
                                genes.add(gene)
                                relevant_rnas[key] = [value[0] + 1, genes]
                        else:
                            genes = set()
                            genes.add(gene)
                            relevant_rnas[key] = [1, genes]

                if len(relevant_rnas) > 0:
                    i = 0
                    for key, value in sorted(relevant_rnas.items(), key=lambda item: item[1], reverse=True):
                    # sort by the number of regulated genes
                        if i > 9:   # threshold
                            break
                        elif value[0] > 1:  # only add and print RNAs which regulate more than one gene
                            if i == 0:
                                results_file.write('RNAs with the number and names of the genes they regulate: \n')
                            rna_id = key
                            for gene_id in value[1]:
                                rna = RNA([rna_id], [])
                                network.add_node(rna)
                                gene = Gene([gene_id], [])
                                network.add_node(gene)
                                network.add_edge(Edge(rna, gene, 'REGULATES', {}))
                            regulated_genes = str(value[1])
                            regulated_genes = regulated_genes[1:-1]
                            regulated_genes = regulated_genes.replace('\'', '')
                            results_file.write(rna_id + '\t' + str(value[0]) + '\t' + regulated_genes + '\n')
                            i += 1

                    # append regulating RNAs to one RNA which regulates the most genes, MRNAs are not added
                    for key, value in sorted(relevant_rnas.items(), key=lambda item: item[1], reverse=True):
                        if value[0] > 1:
                            most_relevant_rna = RNA([key], [])
                            network.add_node(most_relevant_rna)
                            query = """ MATCH (r:RNA)--(n:RNA) WHERE {r_id} in r.ids AND NOT n.label_id CONTAINS "MRNA" RETURN n.`_id`, labels(n) """
                            results = session.run(query, parameters={'r_id': key})
                            reg_rnas = ''
                            for result in results:
                                rna_id = result['n.`_id`']
                                types = result['labels(n)']
                                for type in types:
                                    if type != 'RNA':
                                        if type == 'CircRNA':
                                            rna = CircRNA([rna_id], [])
                                        if type == 'ERNA':
                                            rna = ERNA([rna_id], [])
                                        if type == 'LncRNA':
                                            rna = LncRNA([rna_id], [])
                                        if type == 'MiRNA':
                                            rna = MiRNA([rna_id], [])
                                        if type == 'NcRNA':
                                            rna = NcRNA([rna_id], [])
                                        if type == 'PiRNA':
                                            rna = PiRNA([rna_id], [])
                                        if type == 'Pseudogene':
                                            rna = Pseudogene([rna_id], [])
                                        if type == 'Ribozyme':
                                            rna = Ribozyme([rna_id], [])
                                        if type == 'RRNA':
                                            rna = RRNA([rna_id], [])
                                        if type == 'ScaRNA':
                                            rna = ScaRNA([rna_id], [])
                                        if type == 'ScRNA':
                                            rna = ScRNA([rna_id], [])
                                        if type == 'SnoRNA':
                                            rna = SnoRNA([rna_id], [])
                                        if type == 'SnRNA':
                                            rna = SnRNA([rna_id], [])
                                        network.add_node(rna)
                                        network.add_edge(Edge(rna, most_relevant_rna, 'REGULATES', {}))
                                        reg_rnas = reg_rnas + rna_id + ', '
                            reg_rnas = reg_rnas[:-2]
                            results_file.write(key + ' is the RNA which regulates the most genes in this subgraph. It is regulated by ' + reg_rnas + '.\n')
                        break
            json_file = path + '/' + temp_id1 + '_' + temp_id2 + '_' + temp_drug_id + '_graph.json'
            network.save(json_file)
            draw_drug_subgraph(json_file)
            networks_per_drug.append(network)
        all_networks.append(networks_per_drug)
    return all_networks
Ejemplo n.º 24
0
    next(reader, None)
    for row in reader:
        if value_empty(row[1]) or value_empty(row[13]):
            continue
        variant = Variant(['dbSNP:%s' % row[1]], [])
        network.add_node(variant)
        for gene_id in row[13].split(','):
            gene = Gene(['HGNC:%s' % gene_id], [])
            network.add_node(gene)
            rel = {
                'source': 'PMID:24013639',
                'pvalue': row[0],
                'snp_chr': row[2],
                'cis_trans': row[7]
            }
            network.add_edge(Edge(gene, variant, 'EQTL', rel))

with io.open(file_trans, 'r', encoding='utf-8', newline='') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:
        if value_empty(row[1]) or value_empty(row[16]):
            continue
        variant = Variant(['dbSNP:%s' % row[1]], [])
        network.add_node(variant)
        for gene_id in row[16].split(','):
            gene = Gene(['HGNC:%s' % gene_id], [])
            network.add_node(gene)
            rel = {
                'source': 'PMID:24013639',
                'pvalue': row[0],
Ejemplo n.º 25
0
 def getEdge(self, fr, to):
     index = self.edges.index(Edge(fr, to))
     if index != -1:
         return self.edges[index]
     else: 
         return None
Ejemplo n.º 26
0
network = Network()

# 0 Location
# 1 Phenotype
# 2 Phenotype MIM number
# 3 Inheritance
# 4 Phenotype mapping key
# 5 Gene/Locus
# 6 Gene/Locus MIM number
with io.open('../data/OMIM/filtered_associations.csv',
             'r',
             encoding='utf-8',
             newline='') as f:
    reader = csv.reader(f, delimiter=',', quotechar='"')
    next(reader, None)
    for row in reader:
        disease = Disease(['OMIM:%s' % row[2]], [])
        network.add_node(disease)
        gene = Gene(['HGNC:%s' % row[5]], [])  # , 'OMIM:%s' % row[6]
        network.add_node(gene)
        rel = {
            'source': 'OMIM',
            'location': row[0],
            'phenotype': row[1],
            'inheritance': row[2],
            'phenotype_mapping_key': row[4]
        }
        network.add_edge(Edge(gene, disease, 'ASSOCIATES_WITH', rel))

network.save('../data/OMIM/graph.json')
Ejemplo n.º 27
0
        reader = csv.reader(f, delimiter='\t', quotechar='"')
        next(reader, None)
        for row in reader:
            if row[3] == 'H**o sapiens' and row[6] == 'H**o sapiens' and float(
                    row[7]) > 0.9:
                interactor_a_name = row[1]
                interactor_a_type = row[2]
                interactor_b_name = row[4]
                interactor_b_type = row[5]
                interactor_a = add_rna(interactor_a_name, interactor_a_type,
                                       node_lookup)
                interactor_b = add_rna(interactor_b_name, interactor_b_type,
                                       node_lookup)

                if interactor_a is not None and interactor_b is not None:
                    if interactor_a_type == 'mRNA':
                        gene = Gene([interactor_a.id], [])
                        network.add_node(gene)
                        e = Edge(gene, interactor_a, 'TRANSCRIBES', {})
                        network.add_edge(e)
                    elif interactor_b_type == 'mRNA':
                        gene = Gene([interactor_b.id], [])
                        network.add_node(gene)
                        e = Edge(gene, interactor_b, 'TRANSCRIBES', {})
                        network.add_edge(e)
                    e = Edge(interactor_a, interactor_b, 'REGULATES',
                             {'source': 'RNAInter'})
                    network.add_edge(e)

    network.save('../data/RNAInter/graph.json')
Ejemplo n.º 28
0
    Button(SCREEN_SIZE[0] - 110, 230, 50, 8, 'test_image.png'),
    Button(SCREEN_SIZE[0] - 50, 290, 50, 9, 'test_image.png'),
    Button(SCREEN_SIZE[0] - 110, 290, 50, 10, 'test_image.png'),
]

vertex0 = Vertex(1, -1, -1)
vertex1 = Vertex(1, 1, -1)
vertex2 = Vertex(-1, 1, -1)
vertex3 = Vertex(-1, -1, -1)
vertex4 = Vertex(1, -1, 1)
vertex5 = Vertex(1, 1, 1)
vertex6 = Vertex(-1, -1, 1)
vertex7 = Vertex(-1, 1, 1)

edges = (
    Edge(vertex0, vertex1),
    Edge(vertex0, vertex3),
    Edge(vertex0, vertex4),
    Edge(vertex2, vertex1),
    Edge(vertex2, vertex3),
    Edge(vertex2, vertex7),
    Edge(vertex6, vertex3),
    Edge(vertex6, vertex4),
    Edge(vertex6, vertex7),
    Edge(vertex5, vertex1),
    Edge(vertex5, vertex4),
    Edge(vertex5, vertex7),
    Edge(Vertex(0, 0, 0), Vertex(-2, 0, 0)),
    Edge(Vertex(0, 0, 0), Vertex(0, 2, 0)),
    Edge(Vertex(0, 0, 0), Vertex(0, 0, -2)),
)
Ejemplo n.º 29
0
        gene_hgnc_id = 'HGNC:' + row[3]
        gene_entrez_id = int(row[4])
        gene_entrez_id = 'Entrez:' + str(gene_entrez_id)
        pmid = int(row[8])
        pmid = str(pmid)

        with io.open(mirna_to_URS_mapping_file, 'r', encoding='utf-8', newline='') as mapping_file:
            mapping_reader = csv.reader(mapping_file, delimiter='\t')
            next(mapping_reader, None)
            for mapping_row in mapping_reader:
                if mirna_name == mapping_row[2]:
                    mirna_rnacentral_id = mapping_row[0]
                    mirna = MiRNA([mirna_rnacentral_id], [mirna_name])
                    network.add_node(mirna)
                    gene = Gene([gene_hgnc_id, gene_entrez_id], [])
                    network.add_node(gene)
                    if (mirna_rnacentral_id + '$' + gene_hgnc_id) in edge_source_target_lookup:
                        edges = network.get_edges_from_to(mirna, gene, 'REGULATES')
                        for edge in edges:
                            pmid = edge.attributes['pmid'] + ', ' + str(pmid)
                            network.delete_edge(edge)
                            e = Edge(mirna, gene, 'REGULATES', {'source': 'miRTarBase', 'pmid': pmid})
                            network.add_edge(e)
                            edge_source_target_lookup.append(mirna_rnacentral_id + '$' + gene_hgnc_id)
                    else:
                        e = Edge(mirna, gene, 'REGULATES', {'source': 'miRTarBase', 'pmid': pmid})
                        network.add_edge(e)
                        edge_source_target_lookup.append(mirna_rnacentral_id + '$' + gene_hgnc_id)
                    break
network.save('data/miRTarBase/graph.json')
Ejemplo n.º 30
0
    next(reader, None)
    for row in reader:
        variant_ids = {'PharmGKB:%s' % row[0]}
        if row[1]:
            variant_ids.add('dbSNP:%s' % row[1])
        variant = Variant(variant_ids, [])
        variant.attributes['location'] = row[4]
        network.add_node(variant)
        if row[2] and len(row[2]) > 0:
            for gene_id in [
                    'PharmGKB:%s' % x.strip() for x in row[2].split(',')
            ]:
                gene = Gene([gene_id], [])
                network.add_node(gene)
                network.add_edge(
                    Edge(gene, variant, "CODES", {'source': 'PharmGKB'}))

with open_file_in_zip('../data/PharmGKB/phenotypes.zip',
                      'phenotypes.tsv') as f:
    reader = csv.reader(f, delimiter='\t', quotechar='"')
    next(reader, None)
    for row in reader:
        disease_ids = {'PharmGKB:%s' % row[0]}
        disease_names = {row[1]}
        for id_name_pair in process_disease_external_vocabulary(
                split_list(row[4])):
            disease_ids.add(id_name_pair[0])
            if id_name_pair[1] is not None:
                disease_names.add(id_name_pair[1])
        disease = Disease(disease_ids, disease_names)
        network.add_node(disease)