def convet_to_Network(self, graph): if graph is None: self.Error.input_graph_is_none() self.infoa.setText( "Nothing on input yet, waiting to get something.") else: self.Error.clear() row_data = [] col_data = [] data_data = [] nb_data = len(graph) for i in range(nb_data): for j in list(graph[i].keys()): data_data.append(1.0) row_data.append(i) col_data.append(j) row = np.array(row_data) col = np.array(col_data) data = np.array(data_data) a = sp.csr_matrix((data, (row, col)), shape=(nb_data, nb_data)) items = Table(Domain([], metas=[StringVariable('label')]), [[i] for i in range(len(graph))]) self.outNetwork = Network(items, a) self.send_Network()
def send_data(self): if self.running_jobs: return if self.graph is None: self.Outputs.network.send(None) self.Outputs.items.send(None) to_report = [ method for attr, method in METHODS.items() if method.level == NODELEVEL and getattr(self, attr) and attr in self.known] items = self.items_analysis graph = self.graph n = graph.number_of_nodes() if isinstance(items, Table): dom = self.items_analysis.domain attrs, class_vars, metas = dom.attributes, dom.class_vars, dom.metas x, y, m = items.X, items.Y, items.metas else: attrs, class_vars, metas = [], [], [] x = y = m = np.empty((n, 0)) attrs += tuple(ContinuousVariable(method.label) for method in to_report) x = np.hstack( (x, ) + tuple(self.known[method.name].reshape((n, 1)) for method in to_report)) domain = Domain(attrs, class_vars, metas) table = Table(domain, x, y, m) new_graph = Network(table, graph.edges, graph.name, graph.coordinates) self.Outputs.network.send(new_graph) self.Outputs.items.send(table)
def _send_data(self): if self.partition is None or self.data is None: return domain = self.data.domain # Compute the frequency of each cluster index counts = np.bincount(self.partition) indices = np.argsort(counts)[::-1] index_map = {n: o for n, o in zip(indices, range(len(indices)))} new_partition = list(map(index_map.get, self.partition)) cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=[ "C%d" % (i + 1) for i, _ in enumerate(np.unique(new_partition)) ]) new_domain = add_columns(domain, metas=[cluster_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = new_partition summary = len(new_table) if new_table else self.info.NoOutput details = format_summary_details(new_table) if new_table else "" self.info.set_output_summary(summary, details) self.Outputs.annotated_data.send(new_table) if Network is not None: n_edges = self.graph.number_of_edges() edges = sp.coo_matrix( (np.ones(n_edges), np.array(self.graph.edges()).T), shape=(n_edges, n_edges)) graph = Network(new_table, edges) self.Outputs.graph.send(graph)
def set_matrix(self, data): if data is not None: nb_data = len(data) row_data = [] # first index of the points for each edges col_data = [] # second index of the points for each edges data_data = [] # edge weigth of each edges for i in range(nb_data): for j in range(nb_data): add_edge = True k = 0 while add_edge and k < nb_data: if i != j and i != k and data[i][k] < data[i][ j] and data[j][k] < data[i][j]: add_edge = False k += 1 if add_edge: row_data.append(i) col_data.append(j) data_data.append(data[i][j]) # transform to np array row = np.array(row_data) col = np.array(col_data) new_data = np.array(data_data) # create a csr matrix in order to create a Network new_network = sp.csr_matrix((new_data, (row, col)), shape=(nb_data, nb_data)) # Create a table which contains each points of the network items = Table(Domain([], metas=[StringVariable('label')]), [[i] for i in range(nb_data)]) self.infoa.setText("Average edges per nodes : " + str(len(row_data) / nb_data)) # Send results self.Outputs.network.send(Network(items, new_network)) self.Outputs.distances.send(data) else: self.infoa.setText( "No data on input yet, waiting to get something.") self.Outputs.network.send(None) self.Outputs.distances.send(None)
def generateGraph(self, N_changed=False): self.Error.clear() self.Warning.clear() matrix = None if N_changed: self.node_selection = NodeSelection.COMPONENTS if self.matrix is None: if hasattr(self, "infoa"): self.infoa.setText("No data loaded.") if hasattr(self, "infob"): self.infob.setText("") if hasattr(self, "infoc"): self.infoc.setText("") self.pconnected = 0 self.nedges = 0 self.graph = None self.sendSignals() return nEdgesEstimate = 2 * sum(y for x, y in zip(self.histogram.xData, self.histogram.yData) if x <= self.spinUpperThreshold) if nEdgesEstimate > 200000: graph = None self.Error.number_of_edges(nEdgesEstimate) else: items = None matrix = self.matrix if matrix is not None and matrix.row_items is not None: row_items = self.matrix.row_items if isinstance(row_items, Table): if self.matrix.axis == 1: items = row_items else: items = [[v.name] for v in row_items.domain.attributes] else: items = [[str(x)] for x in self.matrix.row_items] if len(items) != self.matrix.shape[0]: self.Warning.invalid_number_of_items() items = None if items is None: items = list(range(self.matrix.shape[0])) if not isinstance(items, Table): items = Table( Domain([], metas=[StringVariable('label')]), items) # set the threshold # set edges where distance is lower than threshold self.Warning.kNN_too_large.clear() if self.kNN >= self.matrix.shape[0]: self.Warning.kNN_too_large(self.matrix.shape[0] - 1) mask = self.matrix <= self.spinUpperThreshold if self.include_knn: mask |= mask.argsort() < self.kNN weights = matrix[mask] if weights.size and self.edge_weights == EdgeWeights.INVERSE: weights = np.max(weights) - weights edges = sp.csr_matrix((weights, mask.nonzero())) graph = Network(items, edges) self.graph = None # exclude unconnected if self.node_selection != NodeSelection.ALL_NODES: n_components, components = csgraph.connected_components(edges) counts = np.bincount(components) if self.node_selection == NodeSelection.COMPONENTS: ind = np.flatnonzero(counts >= self.excludeLimit) mask = np.in1d(components, ind) else: mask = components == np.argmax(counts) graph = graph.subgraph(mask) self.graph = graph if graph is None: self.pconnected = 0 self.nedges = 0 else: self.pconnected = self.graph.number_of_nodes() self.nedges = self.graph.number_of_edges() if hasattr(self, "infoa"): self.infoa.setText("Data items on input: %d" % self.matrix.shape[0]) if hasattr(self, "infob"): self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected, self.pconnected / float(self.matrix.shape[0]) * 100)) if hasattr(self, "infoc"): self.infoc.setText("Network edges: %d (%.2f edges/node)" % ( self.nedges, self.nedges / float(self.pconnected) if self.pconnected else 0)) self.Warning.large_number_of_nodes.clear() if self.pconnected > 1000 or self.nedges > 2000: self.Warning.large_number_of_nodes() self.sendSignals() self.histogram.setRegion(0, self.spinUpperThreshold)
class OWNxEpsilonGraph(widget.OWWidget): name = "Epsilon Proximity Graph Generator" description = ('Constructs Graph object using Epsilon algorithm. ' 'Nodes from data table are connected only if the ' 'distance between them is equal or less than a ' 'given parameter (ε).') icon = "icons/EpsilonNetworkProximityGraph.svg" priority = 6440 #priority based on NetworkFromDistances widget class Inputs: distances = Input("Distances", DistMatrix) class Outputs: network = Output("Network", Network) data = Output("Data", Table) distances = Output("Distances", DistMatrix) resizing_enabled = False class Warning(widget.OWWidget.Warning): large_number_of_nodes = widget.Msg( 'Large number of nodes/edges; performance will be hindered') class Error(widget.OWWidget.Error): number_of_edges = widget.Msg( 'Estimated number of edges is too high ({})') def __init__(self): super().__init__() self.epsilon = 0 self.matrix = None self.graph = None self.graph_matrix = None self.histogram = Histogram(self) self.mainArea.layout().addWidget(self.histogram) self.mainArea.setMinimumWidth(500) self.mainArea.setMinimumHeight(100) self.addHistogramControls() # info boxInfo = gui.widgetBox(self.controlArea, box="Info") self.infoa = gui.widgetLabel(boxInfo, "No data loaded.") self.infob = gui.widgetLabel(boxInfo, '') self.infoc = gui.widgetLabel(boxInfo, '') gui.rubber(self.controlArea) self.resize(600, 400) def addHistogramControls(self): boxHisto = gui.widgetBox(self.controlArea, box="Algorithm controls") ribg = gui.widgetBox(boxHisto, None, orientation="horizontal", addSpace=False) self.spin_high = gui.doubleSpin(boxHisto, self, 'epsilon', 0, float('inf'), 0.001, decimals=3, label='Epsilon', callback=self.changeUpperSpin, callbackOnReturn=1, keyboardTracking=False, controlWidth=60) self.histogram.region.sigRegionChangeFinished.connect( self.spinboxFromHistogramRegion) # Processing distance input @Inputs.distances def set_matrix(self, data): if data is not None and not data.size: data = None self.matrix = data if data is None: self.histogram.setValues([]) self.generateGraph() return if self.matrix.row_items is None: self.matrix.row_items = list(range(self.matrix.shape[0])) # draw histogram self.matrix_values = values = sorted(self.matrix.flat) self.histogram.setValues(values) self.generateGraph() def generateGraph(self, N_changed=False): self.Error.clear() self.Warning.clear() matrix = None if N_changed: self.node_selection = NodeSelection.COMPONENTS if self.matrix is None: if hasattr(self, "infoa"): self.infoa.setText("No data loaded.") if hasattr(self, "infob"): self.infob.setText("") if hasattr(self, "infoc"): self.infoc.setText("") self.pconnected = 0 self.nedges = 0 self.graph = None self.sendSignals() return nEdgesEstimate = 2 * sum( y for x, y in zip(self.histogram.xData, self.histogram.yData) if x <= self.epsilon) if nEdgesEstimate > 200000: self.graph = None nedges = 0 n = 0 self.Error.number_of_edges(nEdgesEstimate) else: items = None matrix = self.matrix if matrix is not None and matrix.row_items is not None: row_items = self.matrix.row_items if isinstance(row_items, Table): if self.matrix.axis == 1: items = row_items else: items = [[v.name] for v in row_items.domain.attributes] else: items = [[str(x)] for x in self.matrix.row_items] if len(items) != self.matrix.shape[0]: self.Warning.invalid_number_of_items() items = None if items is None: items = list(range(self.matrix.shape[0])) if not isinstance(items, Table): items = Table(Domain([], metas=[StringVariable('label')]), items) mask = self.matrix <= self.epsilon weights = matrix[mask] if weights.size: weights = np.max(weights) - weights edges = sp.csr_matrix((weights, mask.nonzero())) self.graph = Network(items, edges) self.graph_matrix = self.matrix if self.graph is None: self.pconnected = 0 self.nedges = 0 else: self.pconnected = self.graph.number_of_nodes() self.nedges = self.graph.number_of_edges() if hasattr(self, "infoa"): self.infoa.setText("Data items on input: %d" % self.matrix.shape[0]) if hasattr(self, "infob"): self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected, self.pconnected / float(self.matrix.shape[0]) * 100)) if hasattr(self, "infoc"): self.infoc.setText( "Network edges: %d (%.2f edges/node)" % (self.nedges, self.nedges / float(self.pconnected) if self.pconnected else 0)) self.Warning.large_number_of_nodes.clear() if self.pconnected > 1000 or self.nedges > 2000: self.Warning.large_number_of_nodes() self.sendSignals() self.histogram.setRegion(0, self.epsilon) # Outputs processing (has to be called if any modification on the network happens) def sendSignals(self): self.Outputs.network.send(self.graph) self.Outputs.distances.send(self.graph_matrix) self.Outputs.data.send(self.matrix) def changeUpperSpin(self): if self.matrix is None: return self.epsilon = np.clip(self.epsilon, *self.histogram.boundary()) self.percentil = 100 * np.searchsorted( self.matrix_values, self.epsilon) / len(self.matrix_values) self.generateGraph() def spinboxFromHistogramRegion(self): _, self.epsilon = self.histogram.getRegion() self.changeUpperSpin()
def generateGraph(self, N_changed=False): self.Error.clear() self.Warning.clear() matrix = None if N_changed: self.node_selection = NodeSelection.COMPONENTS if self.matrix is None: if hasattr(self, "infoa"): self.infoa.setText("No data loaded.") if hasattr(self, "infob"): self.infob.setText("") if hasattr(self, "infoc"): self.infoc.setText("") self.pconnected = 0 self.nedges = 0 self.graph = None self.sendSignals() return nEdgesEstimate = 2 * sum( y for x, y in zip(self.histogram.xData, self.histogram.yData) if x <= self.epsilon) if nEdgesEstimate > 200000: self.graph = None nedges = 0 n = 0 self.Error.number_of_edges(nEdgesEstimate) else: items = None matrix = self.matrix if matrix is not None and matrix.row_items is not None: row_items = self.matrix.row_items if isinstance(row_items, Table): if self.matrix.axis == 1: items = row_items else: items = [[v.name] for v in row_items.domain.attributes] else: items = [[str(x)] for x in self.matrix.row_items] if len(items) != self.matrix.shape[0]: self.Warning.invalid_number_of_items() items = None if items is None: items = list(range(self.matrix.shape[0])) if not isinstance(items, Table): items = Table(Domain([], metas=[StringVariable('label')]), items) mask = self.matrix <= self.epsilon weights = matrix[mask] if weights.size: weights = np.max(weights) - weights edges = sp.csr_matrix((weights, mask.nonzero())) self.graph = Network(items, edges) self.graph_matrix = self.matrix if self.graph is None: self.pconnected = 0 self.nedges = 0 else: self.pconnected = self.graph.number_of_nodes() self.nedges = self.graph.number_of_edges() if hasattr(self, "infoa"): self.infoa.setText("Data items on input: %d" % self.matrix.shape[0]) if hasattr(self, "infob"): self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected, self.pconnected / float(self.matrix.shape[0]) * 100)) if hasattr(self, "infoc"): self.infoc.setText( "Network edges: %d (%.2f edges/node)" % (self.nedges, self.nedges / float(self.pconnected) if self.pconnected else 0)) self.Warning.large_number_of_nodes.clear() if self.pconnected > 1000 or self.nedges > 2000: self.Warning.large_number_of_nodes() self.sendSignals() self.histogram.setRegion(0, self.epsilon)
def generateGraph(self): self.Error.clear() self.Warning.clear() matrix = None if self.graphMatrix is None: if hasattr(self, "infoa"): self.infoa.setText("No data loaded.") if hasattr(self, "infob"): self.infob.setText("") if hasattr(self, "infoc"): self.infoc.setText("") self.pconnected = 0 self.nedges = 0 self.graph = None return nEdges = len(self.graphMatrix) * self.kNN if nEdges > 200000: self.graph = None self.Error.number_of_edges(nEdges) else: items = None matrix = self.graphMatrix if matrix is not None and matrix.row_items is not None: row_items = self.graphMatrix.row_items if isinstance(row_items, Table): if self.graphMatrix.axis == 1: items = row_items else: items = [[v.name] for v in row_items.domain.attributes] else: items = [[str(x)] for x in self.graphMatrix.row_items] if len(items) != self.graphMatrix.shape[0]: self.Warning.invalid_number_of_items() items = None if items is None: items = list(range(self.graphMatrix.shape[0])) if not isinstance(items, Table): items = Table( Domain([], metas=[StringVariable('label')]), items) self.Warning.kNN_too_large.clear() if self.kNN >= self.graphMatrix.shape[0]: self.Warning.kNN_too_large(self.graphMatrix.shape[0] - 1) nb_data = len(matrix) row_index = [] col_index = [] distances_data = [] for i in range(nb_data): distances = [] for j in range(nb_data): distances.append((self.graphMatrix[i][j], j)) distances.sort() for k in range(self.kNN): row_index.append(i) col_index.append(distances[k][1]) distances_data.append(distances[k][0]) row = np.array(row_index) col = np.array(col_index) weights = np.array(distances_data) edges = sp.csr_matrix((weights, (row, col))) graph = Network(items, edges) self.graph = graph if self.graph is None: self.pconnected = 0 self.nedges = 0 else: self.pconnected = self.graph.number_of_nodes() self.nedges = self.graph.number_of_edges() if hasattr(self, "infoa"): self.infoa.setText("Data items on input: %d" % self.graphMatrix.shape[0]) if hasattr(self, "infob"): self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected, self.pconnected / float(self.graphMatrix.shape[0]) * 100)) if hasattr(self, "infoc"): self.infoc.setText("Network edges: %d (%.2f edges/node)" % ( self.nedges, self.nedges / float(self.pconnected) if self.pconnected else 0)) self.Warning.large_number_of_nodes.clear() if self.pconnected > 1000 or self.nedges > 2000: self.Warning.large_number_of_nodes() self.send_network()