def _rename_network_nodes(self, nodeclass_name, nodeset_name, node_name,
                           new_node_name):
     new_mapping = {node_name: new_node_name}
     for nk in self.networks:
         if nk.graph['sourceType'] == nodeclass_name and nk.graph['source'] == nodeset_name or \
                                 nk.graph['targetType'] == nodeclass_name and nk.graph['target'] == nodeset_name:
             nx.relabel_nodes(nk, new_mapping)
Ejemplo n.º 2
0
def test_relabel_toposort():
    K5 = nx.complete_graph(4)
    G = nx.complete_graph(4)
    G = nx.relabel_nodes(G, {i: i + 1 for i in range(4)}, copy=False)
    assert nx.is_isomorphic(K5, G)

    G = nx.complete_graph(4)
    G = nx.relabel_nodes(G, {i: i - 1 for i in range(4)}, copy=False)
    assert nx.is_isomorphic(K5, G)
Ejemplo n.º 3
0
def test_relabel_selfloop():
    G = nx.DiGraph([(1, 1), (1, 2), (2, 3)])
    G = nx.relabel_nodes(G, {1: 'One', 2: 'Two', 3: 'Three'}, copy=False)
    assert sorted(G.nodes()) == ['One', 'Three', 'Two']
    G = nx.MultiDiGraph([(1, 1), (1, 2), (2, 3)])
    G = nx.relabel_nodes(G, {1: 'One', 2: 'Two', 3: 'Three'}, copy=False)
    assert sorted(G.nodes()) == ['One', 'Three', 'Two']
    G = nx.MultiDiGraph([(1, 1)])
    G = nx.relabel_nodes(G, {1: 0}, copy=False)
    assert sorted(G.nodes()) == [0]
 def _rename_network_nodes(self, nodeclass_name, nodeset_name, node_name, new_node_name):
     new_mapping = {node_name: new_node_name}
     for nk in self.networks:
         if (
             nk.graph["sourceType"] == nodeclass_name
             and nk.graph["source"] == nodeset_name
             or nk.graph["targetType"] == nodeclass_name
             and nk.graph["target"] == nodeset_name
         ):
             nx.relabel_nodes(nk, new_mapping)
Ejemplo n.º 5
0
 def make_(self):
     "Result on one (w,k,length_attri) list (usually (w,k,1)) for 1D CNN "
     forcnn = []
     self.all_subgraph = []
     f = self.select_node_sequence()
     for graph in f:
         frelabel = nx.relabel_nodes(
             graph, nx.get_node_attributes(
                 graph, 'labeling'))  #rename the nodes wrt the labeling
         self.all_subgraph.append(frelabel)
         if self.one_hot > 0:
             forcnn.append([
                 utils.indices_to_one_hot(x[1], self.one_hot)
                 for x in sorted(nx.get_node_attributes(
                     frelabel, 'attr_name').items(),
                                 key=lambda x: x[0])
             ])
         else:
             forcnn.append([
                 x[1] for x in sorted(nx.get_node_attributes(
                     frelabel, 'attr_name').items(),
                                      key=lambda x: x[0])
             ])
     if np.array(forcnn).shape[0] != self.w or np.array(
             forcnn).shape[1] != self.k:
         raise BadShapeError(
             'Shapes do not match : {0} instead of {1}'.format(
                 np.array(forcnn).shape, (self.w, self.k)))
     return forcnn
Ejemplo n.º 6
0
    def create_all_rfs(self):
        """
        Method that transforms the graph attribute of the PCSN object into suitable input for CNN

        :return: (width, rf_size, attr_dim) input for CNN
        """

        input_to_cnn = list()

        # select node sequence returns full list of receptive fields created
        receptive_fields = self.node_sequence_selection()

        for field in receptive_fields:
            relabeled_nodes = nx.relabel_nodes(field, nx.get_node_attributes(field, 'labeling'))

            input_to_cnn.append(
                [x[1] for x in sorted(nx.get_node_attributes(relabeled_nodes, 'attr_name').items(),
                                      key=lambda x: x[0])])

        return input_to_cnn
Ejemplo n.º 7
0
def plot_so(so, terms_to_include, name, joiners=[]):
    g = so.subgraph([
        node for node, data in so.nodes(data=True)
        if data['name'] in terms_to_include + joiners
    ])
    g = g.reverse()
    #print(g.edge[list(a)[0][0]])

    for node, data in g.nodes_iter(data=True):
        if data['name'] in joiners:
            g.node[node]['style'] = 'dashed'

    g = nx.relabel_nodes(
        g, {node: data['name']
            for node, data in g.nodes(data=True)})

    a = nx_agraph.to_agraph(g)

    a.layout(
        'dot',
        args=
        '-Nfontsize=12 -Nwidth=".2" -Nheight=".2" -Nshape="box" -Nmargin=.04 -Gfontsize=8 -Earrowsize=.5'
    )
    a.draw('reports/' + name + '.png')
Ejemplo n.º 8
0
    def wl_normalization(self, graph):

        result = {}

        labeled_graph = nx.Graph(graph)

        relabel_dict_ = {}
        graph_node_list = list(graph.nodes())
        for i in range(len(graph_node_list)):
            relabel_dict_[graph_node_list[i]] = i
            i += 1

        inv_relabel_dict_ = {v: k for k, v in relabel_dict_.items()}

        graph_relabel = nx.relabel_nodes(graph, relabel_dict_)

        label_lookup = {}
        label_counter = 0

        l_aux = list(
            nx.get_node_attributes(graph_relabel, 'attr_name').values())
        labels = np.zeros(len(l_aux), dtype=np.int32)
        adjency_list = list([
            list(x[1].keys()) for x in graph_relabel.adjacency()
        ])  #adjency list à l'ancienne comme version 1.0 de networkx

        for j in range(len(l_aux)):
            if not (l_aux[j] in label_lookup):
                label_lookup[l_aux[j]] = label_counter
                labels[j] = label_counter
                label_counter += 1
            else:
                labels[j] = label_lookup[l_aux[j]]
            # labels are associated to a natural number
            # starting with 0.

        new_labels = copy.deepcopy(labels)

        # create an empty lookup table
        label_lookup = {}
        label_counter = 0

        for v in range(len(adjency_list)):
            # form a multiset label of the node v of the i'th graph
            # and convert it to a string

            long_label = np.concatenate(
                (np.array([labels[v]]), np.sort(labels[adjency_list[v]])))
            long_label_string = str(long_label)
            # if the multiset label has not yet occurred, add it to the
            # lookup table and assign a number to it
            if not (long_label_string in label_lookup):
                label_lookup[long_label_string] = label_counter
                new_labels[v] = label_counter
                label_counter += 1
            else:
                new_labels[v] = label_lookup[long_label_string]
        # fill the column for i'th graph in phi
        labels = copy.deepcopy(new_labels)

        dict_ = {inv_relabel_dict_[i]: labels[i] for i in range(len(labels))}

        nx.set_node_attributes(labeled_graph, dict_, 'labeling')

        result['labeled_graph'] = labeled_graph
        result['ordered_nodes'] = [
            x[0] for x in sorted(dict_.items(), key=lambda x: x[1])
        ]

        return result
Ejemplo n.º 9
0
def poly_aaa_consequences(variants):

    mutations_in_cds_hgvs_format = defaultdict(list)
    indels = Counter()
    all = Counter()

    for variant in all_poly_a_variants(variants, preserve_sources=True):

        for transcript in variant.affected_transcripts:

            if not transcript.poly_aaa:
                continue

            for alt, aaa_data in transcript.poly_aaa.items():

                if aaa_data.increased:
                    category = 'increased'
                elif aaa_data.decreased:
                    category = 'decreased'
                else:
                    category = 'constant'

                hgvs = transcript.as_hgvs(variant.ref, alt)
                if 'del' in hgvs or 'ins' in hgvs:
                    indels[category] += 1
                all[category] += 1

                mutations_in_cds_hgvs_format[category].append(hgvs)
                mutations_in_cds_hgvs_format['all'].append(hgvs)

    print('Indels enrichment:')
    for category in indels:
        print(category, indels[category] / all[category] * 100, '%')

    for category, muts in mutations_in_cds_hgvs_format.items():
        report(
            'Mutations which result in ' + category + ' in cds hgvs formats',
            muts)

    consequences = defaultdict(Counter)
    skipped = Counter()
    for category, muts in mutations_in_cds_hgvs_format.items():
        filename = report(
            'Mutations which result in ' + category + ' in cds hgvs formats',
            muts)
        vep_filename = vep(filename)
        for line in open(vep_filename):
            if line.startswith('#'):
                continue
            line = line.split('\t')
            tested_transcript = line[0].split(':')[0]
            vep_transcript = line[4]
            if line[5] != 'Transcript':
                skipped['Not a transcript feature'] += 1
                continue
            if tested_transcript != vep_transcript:
                skipped['Different transcript'] += 1
                continue

            variant_consequences = line[6].split(',')
            for consequence in variant_consequences:
                consequences[category][consequence] += 1

        print(skipped)
        print('Raw consequences')
        print(consequences)

    graph = load_sequence_ontology()
    expanded_consequences = propagate_consequences(graph, consequences)

    for category, counts in expanded_consequences.items():

        consequences_to_include = ['coding_sequence_variant']
        consequences_to_include.extend(counts.keys())
        g = graph.subgraph([
            node for node, data in graph.nodes(data=True)
            if data['name'] in consequences_to_include
        ])
        g = g.reverse()

        max_count = max(counts.values())

        for node, data in g.nodes_iter(data=True):
            name = data['name']
            count = counts[name]
            color = (255 - int(log((count / max_count) + 1) * 255), 255, 255)
            g.node[node]['style'] = 'filled'
            g.node[node]['shape'] = 'box'
            color = '#%02x%02x%02x' % color
            g.node[node]['fillcolor'] = color
            if name not in consequences[category]:
                g.node[node]['style'] = 'dashed,filled'

        g = nx.relabel_nodes(
            g, {
                node: data['name'].replace('variant', 'v.') +
                ': %s' % counts.get(data['name'])
                for node, data in g.nodes(data=True)
            })

        a = nx_agraph.to_agraph(g)

        a.layout(
            'dot',
            args=
            '-Nfontsize=14 -Nwidth=".2" -Nheight=".2" -Nmargin=.1 -Gfontsize=8 -Earrowsize=.5'
        )
        a.draw('reports/poly_a_consequences_dag_' + category + '.svg')

    selected_consequences_groups = {
        'General coding':
        ['synonymous_variant', 'frameshift_variant', 'inframe_variant'],
        'Inframe': [
            'inframe_deletion', 'inframe_insertion', 'missense_variant',
            'stop_gained', 'stop_lost'
        ]
    }

    for group, selected_consequences in selected_consequences_groups.items():
        for category, counts in expanded_consequences.items():
            data = {
                consequence: counts[consequence]
                for consequence in selected_consequences
            }
            data = OrderedDict(sorted(data.items(), key=itemgetter(1)))

            # Create a pie chart
            wedges = plt.pie(
                list(data.values()),
                labels=list(data.keys()),
                shadow=False,
                colors=plt.cm.tab20(
                    numpy.linspace(1, 0, len(selected_consequences))),
                startangle=0,
                autopct='%1.1f%%',
            )
            for pie_wedge in wedges[0]:
                pie_wedge.set_edgecolor('black')

            # View the plot drop above
            plt.axis('equal')

            plt.title(group + ' consequences for variants causing ' +
                      category + ' in poly(A) length')
            plt.tight_layout()
            save_plot(plt, hide_title=True)