def update_network(self): if self._center_nodes == []: return subnet = network.Graph() central_nodes, to_add = self._center_nodes[:], self._center_nodes[:] for l in range(self._nhops): for i in central_nodes: neig = sorted([ x for x in self._network.neighbors(i) if self._network.edge[i][x]['weight'] > self._edge_threshold ], reverse=True) if len(neig) > self._n_max_neighbors: neig = neig[:self._n_max_neighbors] to_add.extend(neig) central_nodes = neig to_add = list(set(to_add)) subnet.add_nodes_from([(x, self._network.node[x]) for x in to_add]) nodes = subnet.nodes() while nodes: i = nodes.pop() subnet.node[i] = self._network.node[i] neig = [x for x in self._network.neighbors(i) if x in nodes] subnet.add_weighted_edges_from([(i, x, w) for x, w in zip( neig, [self._network.edge[i][y]['weight'] for y in neig])]) subnet.remove_nodes_from(self._hidden_nodes) subnet = self._propagate(subnet) if self._nx_explorer is not None: self._nx_explorer.change_graph(subnet)
def to_single_mode(net, mode_mask, conn_mask, weighting): """ Convert two-mode network into a single mode Args: net: network to convert mode_mask (boolean array): a mask with nodes to connect conn_mask (boolean array): a mask with nodes to use for connecting weighting (int): normalization for edge weigthts Returns: single-mode network """ new_net = network.Graph() new_net.add_nodes_from(range(mode_mask.sum())) mode_edges = _filtered_edges(net, mode_mask, conn_mask, weighting > 1) if mode_edges is not None: new_edges = Weighting[weighting].func(mode_edges) new_edges = new_edges.tocoo() new_net.add_weighted_edges_from( zip(new_edges.row, new_edges.col, new_edges.data)) return new_net
def generateGraph(self, N_changed=False): self.error() matrix = None self.warning('') if N_changed: self.node_selection = NodeSelection.COMPONENTS if self.matrix is None: if hasattr(self, "infoa"): self.infoa.setText("No data loaded.") if hasattr(self, "infob"): self.infob.setText("") if hasattr(self, "infoc"): self.infoc.setText("") self.pconnected = 0 self.nedges = 0 self.graph = None self.sendSignals() return nEdgesEstimate = 2 * sum(y for x, y in zip(self.histogram.xData, self.histogram.yData) if x <= self.spinUpperThreshold) if nEdgesEstimate > 200000: self.graph = None nedges = 0 n = 0 self.error('Estimated number of edges is too high (%d).' % nEdgesEstimate) else: graph = network.Graph() graph.add_nodes_from(range(self.matrix.shape[0])) matrix = self.matrix if matrix is not None and matrix.row_items is not None: if isinstance(self.matrix.row_items, Orange.data.Table): graph.set_items(self.matrix.row_items) else: data = [[str(x)] for x in self.matrix.row_items] items = Orange.data.Table(Orange.data.Domain([], metas=[Orange.data.StringVariable('label')]), data) graph.set_items(items) # set the threshold # set edges where distance is lower than threshold self.warning(0) if self.kNN >= self.matrix.shape[0]: self.warning(0, "kNN larger then supplied distance matrix dimension. Using k = %i" % (self.matrix.shape[0] - 1)) def edges_from_distance_matrix(matrix, upper, knn): rows, cols = matrix.shape for i in range(rows): for j in range(i + 1, cols): if matrix[i, j] <= upper: yield i, j, matrix[i, j] if not knn: continue for j in np.argsort(matrix[i])[:knn]: yield i, j, matrix[i, j] edge_list = edges_from_distance_matrix( self.matrix, self.spinUpperThreshold, min(self.kNN, self.matrix.shape[0] - 1) if self.include_knn else 0) if self.edge_weights == EdgeWeights.INVERSE: edge_list = list(edge_list) max_weight = max(d for u, v, d in edge_list) graph.add_edges_from((u, v, {'weight': max_weight - d}) for u, v, d in edge_list) else: graph.add_edges_from((u, v, {'weight': d}) for u, v, d in edge_list) matrix = None self.graph = None component = [] # exclude unconnected if self.node_selection == NodeSelection.COMPONENTS: component = list(chain.from_iterable(x for x in network.nx.connected_components(graph) if len(x) >= self.excludeLimit)) # largest connected component only elif self.node_selection == NodeSelection.LARGEST_COMP: component = max(network.nx.connected_components(graph), key=len) else: self.graph = graph if len(component) > 1: if len(component) == graph.number_of_nodes(): self.graph = graph matrix = self.matrix else: self.graph = graph.subgraph(component) matrix = self.matrix.submatrix(sorted(component)) if matrix is not None: matrix.row_items = self.graph.items() self.graph_matrix = matrix if self.graph is None: self.pconnected = 0 self.nedges = 0 else: self.pconnected = self.graph.number_of_nodes() self.nedges = self.graph.number_of_edges() if hasattr(self, "infoa"): self.infoa.setText("Data items on input: %d" % self.matrix.shape[0]) if hasattr(self, "infob"): self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected, self.pconnected / float(self.matrix.shape[0]) * 100)) if hasattr(self, "infoc"): self.infoc.setText("Network edges: %d (%.2f edges/node)" % ( self.nedges, self.nedges / float(self.pconnected) if self.pconnected else 0)) self.warning(303) if self.pconnected > 1000 or self.nedges > 2000: self.warning(303, 'Large number of nodes/edges; performance will be hindered.') self.sendSignals() self.histogram.setRegion(0, self.spinUpperThreshold)
def generateGraph(self, N_changed=False): self.Error.clear() self.Warning.clear() matrix = None if N_changed: self.node_selection = NodeSelection.COMPONENTS if self.matrix is None: if hasattr(self, "infoa"): self.infoa.setText("No data loaded.") if hasattr(self, "infob"): self.infob.setText("") if hasattr(self, "infoc"): self.infoc.setText("") self.pconnected = 0 self.nedges = 0 self.graph = None self.sendSignals() return nEdgesEstimate = 2 * sum( y for x, y in zip(self.histogram.xData, self.histogram.yData) if x <= self.spinUpperThreshold) if nEdgesEstimate > 200000: self.graph = None nedges = 0 n = 0 self.Error.number_of_edges(nEdgesEstimate) else: graph = network.Graph() graph.add_nodes_from(range(self.matrix.shape[0])) matrix = self.matrix if matrix is not None and matrix.row_items is not None: row_items = self.matrix.row_items if isinstance(row_items, Table): if self.matrix.axis == 1: items = row_items else: items = [[v.name] for v in row_items.domain.attributes] else: items = [[str(x)] for x in self.matrix.row_items] if len(items) != self.matrix.shape[0]: self.Warning.invalid_number_of_items() else: if items and not isinstance(items, Table): items = Table(Domain([], metas=[StringVariable('label')]), items) graph.set_items(items) # set the threshold # set edges where distance is lower than threshold self.Warning.kNN_too_large.clear() if self.kNN >= self.matrix.shape[0]: self.Warning.kNN_too_large(self.matrix.shape[0] - 1) def edges_from_distance_matrix(matrix, upper, knn): rows, cols = matrix.shape for i in range(rows): for j in range(i + 1, cols): if matrix[i, j] <= upper: yield i, j, matrix[i, j] if not knn: continue for j in np.argsort(matrix[i])[:knn]: yield i, j, matrix[i, j] edge_list = edges_from_distance_matrix( self.matrix, self.spinUpperThreshold, min(self.kNN, self.matrix.shape[0] - 1) if self.include_knn else 0) if self.edge_weights == EdgeWeights.INVERSE: edge_list = list(edge_list) max_weight = max(d for u, v, d in edge_list) graph.add_edges_from((u, v, { 'weight': max_weight - d }) for u, v, d in edge_list) else: graph.add_edges_from((u, v, { 'weight': d }) for u, v, d in edge_list) matrix = None self.graph = None component = [] # exclude unconnected if self.node_selection == NodeSelection.COMPONENTS: component = list( chain.from_iterable( x for x in network.nx.connected_components(graph) if len(x) >= self.excludeLimit)) # largest connected component only elif self.node_selection == NodeSelection.LARGEST_COMP: component = max(network.nx.connected_components(graph), key=len) else: self.graph = graph if len(component) > 1: if len(component) == graph.number_of_nodes(): self.graph = graph matrix = self.matrix else: self.graph = graph.subgraph(component) matrix = self.matrix.submatrix(sorted(component)) if matrix is not None: matrix.row_items = self.graph.items() self.graph_matrix = matrix if self.graph is None: self.pconnected = 0 self.nedges = 0 else: self.pconnected = self.graph.number_of_nodes() self.nedges = self.graph.number_of_edges() if hasattr(self, "infoa"): self.infoa.setText("Data items on input: %d" % self.matrix.shape[0]) if hasattr(self, "infob"): self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected, self.pconnected / float(self.matrix.shape[0]) * 100)) if hasattr(self, "infoc"): self.infoc.setText( "Network edges: %d (%.2f edges/node)" % (self.nedges, self.nedges / float(self.pconnected) if self.pconnected else 0)) self.Warning.large_number_of_nodes.clear() if self.pconnected > 1000 or self.nedges > 2000: self.Warning.large_number_of_nodes() self.sendSignals() self.histogram.setRegion(0, self.spinUpperThreshold)
def extract_network(ppidb, query, geneinfo, include_neighborhood=True, min_score=None, progress=None): if not isinstance(query, dict): query = {name: name for name in query} report_weights = True if isinstance(ppidb, ppi.BioGRID): # BioGRID scores are not comparable (they can be p values, # confidence scores, ..., i.e. whatever was reported in the source # publication) report_weights = False if min_score is not None: raise ValueError("min_score used with BioGrid") # graph = networkx.Graph() graph = network.Graph() # node ids in Orange.network.Graph need to be in [0 .. n-1] nodeids = defaultdict(partial(next, count())) def gi_info(names): mapping = [(name, geneinfo.matcher.umatch(name)) for name in names] mapping = [(name, match) for name, match in mapping if match] entries = [(name, geneinfo[match]) for name, match in mapping] if len(entries) > 1: # try to resolve conflicts by prioritizing entries whose # symbol/gene_id/locus_tag exactly matches the synonym name. entries_ = [ (name, entry) for name, entry in entries if name in [entry.gene_id, entry.symbol, entry.locus_tag] ] if len(entries_) == 1: entries = entries_ if len(entries) == 0: return None elif len(entries) >= 1: # Need to report multiple mappings return entries[0][1] # Add query nodes. for key, query_name in query.items(): nodeid = nodeids[key] synonyms = ppidb.synonyms(key) entry = gi_info(synonyms) graph.add_node(nodeid, key=key, synonyms=synonyms, query_name=query_name, symbol=entry.symbol if entry is not None else "") if include_neighborhood: # extend the set of nodes in the network with immediate neighborers edges_iter = (edge for key in query for edge in ppidb.edges(key)) for id1, id2, score in edges_iter: if min_score is None or score >= min_score: nodeid1 = nodeids[id1] nodeid2 = nodeids[id2] if nodeid1 not in graph: synonyms1 = ppidb.synonyms(id1) entry1 = gi_info(synonyms1) symbol1 = entry1.symbol if entry1 is not None else "" graph.add_node(nodeid1, key=id1, synonyms=synonyms1, symbol=symbol1) if nodeid2 not in graph: synonyms2 = ppidb.synonyms(id2) entry2 = gi_info(synonyms2) symbol2 = entry2.symbol if entry2 is not None else "" graph.add_node(nodeid2, key=id2, synonyms=synonyms2, symbol=symbol2) # add edges between nodes for i, id1 in enumerate(nodeids.keys()): if progress is not None: progress(100.0 * i / len(nodeids)) for _, id2, score in ppidb.edges(id1): if id2 in nodeids and (min_score is None or score >= min_score): nodeid1 = nodeids[id1] nodeid2 = nodeids[id2] assert nodeid1 in graph and nodeid2 in graph if score is not None and report_weights: graph.add_edge(nodeid1, nodeid2, weight=score) else: graph.add_edge(nodeid1, nodeid2) nodedomain = Orange.data.Domain( [], [], [ Orange.data.StringVariable("Query name"), # if applicable Orange.data.StringVariable("id"), # ppidb primary key Orange.data.StringVariable("Synonyms"), # ppidb synonyms Orange.data.StringVariable("Symbol"), # ncbi gene name ?? Orange.data.DiscreteVariable("source", values=["false", "true"]) ], ) N = len(graph.nodes()) node_items = sorted(graph.node.items(), key=lambda t: nodeids[t[0]]) meta = [[ node.get("query_name", ""), node.get("key", ""), ", ".join(node.get("synonyms", [])), node.get("symbol", nodeid), (1 if "query_name" in node else 0) ] for nodeid, node in node_items] if not meta: meta = numpy.empty((0, len(nodedomain.metas)), dtype=object) nodeitems = Orange.data.Table.from_numpy(nodedomain, numpy.empty((N, 0)), numpy.empty((N, 0)), numpy.array(meta, dtype=object)) graph.set_items(nodeitems) return graph