def _rename_network_nodes(self, nodeclass_name, nodeset_name, node_name, new_node_name): new_mapping = {node_name: new_node_name} for nk in self.networks: if nk.graph['sourceType'] == nodeclass_name and nk.graph['source'] == nodeset_name or \ nk.graph['targetType'] == nodeclass_name and nk.graph['target'] == nodeset_name: nx.relabel_nodes(nk, new_mapping)
def test_relabel_toposort(): K5 = nx.complete_graph(4) G = nx.complete_graph(4) G = nx.relabel_nodes(G, {i: i + 1 for i in range(4)}, copy=False) assert nx.is_isomorphic(K5, G) G = nx.complete_graph(4) G = nx.relabel_nodes(G, {i: i - 1 for i in range(4)}, copy=False) assert nx.is_isomorphic(K5, G)
def test_relabel_selfloop(): G = nx.DiGraph([(1, 1), (1, 2), (2, 3)]) G = nx.relabel_nodes(G, {1: 'One', 2: 'Two', 3: 'Three'}, copy=False) assert sorted(G.nodes()) == ['One', 'Three', 'Two'] G = nx.MultiDiGraph([(1, 1), (1, 2), (2, 3)]) G = nx.relabel_nodes(G, {1: 'One', 2: 'Two', 3: 'Three'}, copy=False) assert sorted(G.nodes()) == ['One', 'Three', 'Two'] G = nx.MultiDiGraph([(1, 1)]) G = nx.relabel_nodes(G, {1: 0}, copy=False) assert sorted(G.nodes()) == [0]
def _rename_network_nodes(self, nodeclass_name, nodeset_name, node_name, new_node_name): new_mapping = {node_name: new_node_name} for nk in self.networks: if ( nk.graph["sourceType"] == nodeclass_name and nk.graph["source"] == nodeset_name or nk.graph["targetType"] == nodeclass_name and nk.graph["target"] == nodeset_name ): nx.relabel_nodes(nk, new_mapping)
def make_(self): "Result on one (w,k,length_attri) list (usually (w,k,1)) for 1D CNN " forcnn = [] self.all_subgraph = [] f = self.select_node_sequence() for graph in f: frelabel = nx.relabel_nodes( graph, nx.get_node_attributes( graph, 'labeling')) #rename the nodes wrt the labeling self.all_subgraph.append(frelabel) if self.one_hot > 0: forcnn.append([ utils.indices_to_one_hot(x[1], self.one_hot) for x in sorted(nx.get_node_attributes( frelabel, 'attr_name').items(), key=lambda x: x[0]) ]) else: forcnn.append([ x[1] for x in sorted(nx.get_node_attributes( frelabel, 'attr_name').items(), key=lambda x: x[0]) ]) if np.array(forcnn).shape[0] != self.w or np.array( forcnn).shape[1] != self.k: raise BadShapeError( 'Shapes do not match : {0} instead of {1}'.format( np.array(forcnn).shape, (self.w, self.k))) return forcnn
def create_all_rfs(self): """ Method that transforms the graph attribute of the PCSN object into suitable input for CNN :return: (width, rf_size, attr_dim) input for CNN """ input_to_cnn = list() # select node sequence returns full list of receptive fields created receptive_fields = self.node_sequence_selection() for field in receptive_fields: relabeled_nodes = nx.relabel_nodes(field, nx.get_node_attributes(field, 'labeling')) input_to_cnn.append( [x[1] for x in sorted(nx.get_node_attributes(relabeled_nodes, 'attr_name').items(), key=lambda x: x[0])]) return input_to_cnn
def plot_so(so, terms_to_include, name, joiners=[]): g = so.subgraph([ node for node, data in so.nodes(data=True) if data['name'] in terms_to_include + joiners ]) g = g.reverse() #print(g.edge[list(a)[0][0]]) for node, data in g.nodes_iter(data=True): if data['name'] in joiners: g.node[node]['style'] = 'dashed' g = nx.relabel_nodes( g, {node: data['name'] for node, data in g.nodes(data=True)}) a = nx_agraph.to_agraph(g) a.layout( 'dot', args= '-Nfontsize=12 -Nwidth=".2" -Nheight=".2" -Nshape="box" -Nmargin=.04 -Gfontsize=8 -Earrowsize=.5' ) a.draw('reports/' + name + '.png')
def wl_normalization(self, graph): result = {} labeled_graph = nx.Graph(graph) relabel_dict_ = {} graph_node_list = list(graph.nodes()) for i in range(len(graph_node_list)): relabel_dict_[graph_node_list[i]] = i i += 1 inv_relabel_dict_ = {v: k for k, v in relabel_dict_.items()} graph_relabel = nx.relabel_nodes(graph, relabel_dict_) label_lookup = {} label_counter = 0 l_aux = list( nx.get_node_attributes(graph_relabel, 'attr_name').values()) labels = np.zeros(len(l_aux), dtype=np.int32) adjency_list = list([ list(x[1].keys()) for x in graph_relabel.adjacency() ]) #adjency list à l'ancienne comme version 1.0 de networkx for j in range(len(l_aux)): if not (l_aux[j] in label_lookup): label_lookup[l_aux[j]] = label_counter labels[j] = label_counter label_counter += 1 else: labels[j] = label_lookup[l_aux[j]] # labels are associated to a natural number # starting with 0. new_labels = copy.deepcopy(labels) # create an empty lookup table label_lookup = {} label_counter = 0 for v in range(len(adjency_list)): # form a multiset label of the node v of the i'th graph # and convert it to a string long_label = np.concatenate( (np.array([labels[v]]), np.sort(labels[adjency_list[v]]))) long_label_string = str(long_label) # if the multiset label has not yet occurred, add it to the # lookup table and assign a number to it if not (long_label_string in label_lookup): label_lookup[long_label_string] = label_counter new_labels[v] = label_counter label_counter += 1 else: new_labels[v] = label_lookup[long_label_string] # fill the column for i'th graph in phi labels = copy.deepcopy(new_labels) dict_ = {inv_relabel_dict_[i]: labels[i] for i in range(len(labels))} nx.set_node_attributes(labeled_graph, dict_, 'labeling') result['labeled_graph'] = labeled_graph result['ordered_nodes'] = [ x[0] for x in sorted(dict_.items(), key=lambda x: x[1]) ] return result
def poly_aaa_consequences(variants): mutations_in_cds_hgvs_format = defaultdict(list) indels = Counter() all = Counter() for variant in all_poly_a_variants(variants, preserve_sources=True): for transcript in variant.affected_transcripts: if not transcript.poly_aaa: continue for alt, aaa_data in transcript.poly_aaa.items(): if aaa_data.increased: category = 'increased' elif aaa_data.decreased: category = 'decreased' else: category = 'constant' hgvs = transcript.as_hgvs(variant.ref, alt) if 'del' in hgvs or 'ins' in hgvs: indels[category] += 1 all[category] += 1 mutations_in_cds_hgvs_format[category].append(hgvs) mutations_in_cds_hgvs_format['all'].append(hgvs) print('Indels enrichment:') for category in indels: print(category, indels[category] / all[category] * 100, '%') for category, muts in mutations_in_cds_hgvs_format.items(): report( 'Mutations which result in ' + category + ' in cds hgvs formats', muts) consequences = defaultdict(Counter) skipped = Counter() for category, muts in mutations_in_cds_hgvs_format.items(): filename = report( 'Mutations which result in ' + category + ' in cds hgvs formats', muts) vep_filename = vep(filename) for line in open(vep_filename): if line.startswith('#'): continue line = line.split('\t') tested_transcript = line[0].split(':')[0] vep_transcript = line[4] if line[5] != 'Transcript': skipped['Not a transcript feature'] += 1 continue if tested_transcript != vep_transcript: skipped['Different transcript'] += 1 continue variant_consequences = line[6].split(',') for consequence in variant_consequences: consequences[category][consequence] += 1 print(skipped) print('Raw consequences') print(consequences) graph = load_sequence_ontology() expanded_consequences = propagate_consequences(graph, consequences) for category, counts in expanded_consequences.items(): consequences_to_include = ['coding_sequence_variant'] consequences_to_include.extend(counts.keys()) g = graph.subgraph([ node for node, data in graph.nodes(data=True) if data['name'] in consequences_to_include ]) g = g.reverse() max_count = max(counts.values()) for node, data in g.nodes_iter(data=True): name = data['name'] count = counts[name] color = (255 - int(log((count / max_count) + 1) * 255), 255, 255) g.node[node]['style'] = 'filled' g.node[node]['shape'] = 'box' color = '#%02x%02x%02x' % color g.node[node]['fillcolor'] = color if name not in consequences[category]: g.node[node]['style'] = 'dashed,filled' g = nx.relabel_nodes( g, { node: data['name'].replace('variant', 'v.') + ': %s' % counts.get(data['name']) for node, data in g.nodes(data=True) }) a = nx_agraph.to_agraph(g) a.layout( 'dot', args= '-Nfontsize=14 -Nwidth=".2" -Nheight=".2" -Nmargin=.1 -Gfontsize=8 -Earrowsize=.5' ) a.draw('reports/poly_a_consequences_dag_' + category + '.svg') selected_consequences_groups = { 'General coding': ['synonymous_variant', 'frameshift_variant', 'inframe_variant'], 'Inframe': [ 'inframe_deletion', 'inframe_insertion', 'missense_variant', 'stop_gained', 'stop_lost' ] } for group, selected_consequences in selected_consequences_groups.items(): for category, counts in expanded_consequences.items(): data = { consequence: counts[consequence] for consequence in selected_consequences } data = OrderedDict(sorted(data.items(), key=itemgetter(1))) # Create a pie chart wedges = plt.pie( list(data.values()), labels=list(data.keys()), shadow=False, colors=plt.cm.tab20( numpy.linspace(1, 0, len(selected_consequences))), startangle=0, autopct='%1.1f%%', ) for pie_wedge in wedges[0]: pie_wedge.set_edgecolor('black') # View the plot drop above plt.axis('equal') plt.title(group + ' consequences for variants causing ' + category + ' in poly(A) length') plt.tight_layout() save_plot(plt, hide_title=True)