Exemple #1
0
    def convet_to_Network(self, graph):
        if graph is None:
            self.Error.input_graph_is_none()
            self.infoa.setText(
                "Nothing on input yet, waiting to get something.")
        else:
            self.Error.clear()
            row_data = []
            col_data = []
            data_data = []

            nb_data = len(graph)

            for i in range(nb_data):
                for j in list(graph[i].keys()):
                    data_data.append(1.0)
                    row_data.append(i)
                    col_data.append(j)

            row = np.array(row_data)
            col = np.array(col_data)
            data = np.array(data_data)
            a = sp.csr_matrix((data, (row, col)), shape=(nb_data, nb_data))

            items = Table(Domain([], metas=[StringVariable('label')]),
                          [[i] for i in range(len(graph))])

            self.outNetwork = Network(items, a)
            self.send_Network()
    def send_data(self):
        if self.running_jobs:
            return
        if self.graph is None:
            self.Outputs.network.send(None)
            self.Outputs.items.send(None)

        to_report = [
            method for attr, method in METHODS.items()
            if method.level == NODELEVEL
            and getattr(self, attr) and attr in self.known]
        items = self.items_analysis
        graph = self.graph
        n = graph.number_of_nodes()
        if isinstance(items, Table):
            dom = self.items_analysis.domain
            attrs, class_vars, metas = dom.attributes, dom.class_vars, dom.metas
            x, y, m = items.X, items.Y, items.metas
        else:
            attrs, class_vars, metas = [], [], []
            x = y = m = np.empty((n, 0))
        attrs += tuple(ContinuousVariable(method.label) for method in to_report)
        x = np.hstack(
            (x, ) + tuple(self.known[method.name].reshape((n, 1))
                          for method in to_report))
        domain = Domain(attrs, class_vars, metas)
        table = Table(domain, x, y, m)
        new_graph = Network(table, graph.edges, graph.name, graph.coordinates)
        self.Outputs.network.send(new_graph)
        self.Outputs.items.send(table)
Exemple #3
0
    def _send_data(self):
        if self.partition is None or self.data is None:
            return
        domain = self.data.domain
        # Compute the frequency of each cluster index
        counts = np.bincount(self.partition)
        indices = np.argsort(counts)[::-1]
        index_map = {n: o for n, o in zip(indices, range(len(indices)))}
        new_partition = list(map(index_map.get, self.partition))

        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=[
                "C%d" % (i + 1) for i, _ in enumerate(np.unique(new_partition))
            ])

        new_domain = add_columns(domain, metas=[cluster_var])
        new_table = self.data.transform(new_domain)
        new_table.get_column_view(cluster_var)[0][:] = new_partition

        summary = len(new_table) if new_table else self.info.NoOutput
        details = format_summary_details(new_table) if new_table else ""
        self.info.set_output_summary(summary, details)

        self.Outputs.annotated_data.send(new_table)

        if Network is not None:
            n_edges = self.graph.number_of_edges()
            edges = sp.coo_matrix(
                (np.ones(n_edges), np.array(self.graph.edges()).T),
                shape=(n_edges, n_edges))
            graph = Network(new_table, edges)
            self.Outputs.graph.send(graph)
Exemple #4
0
    def set_matrix(self, data):
        if data is not None:

            nb_data = len(data)
            row_data = []  # first index of the points for each edges
            col_data = []  # second index of the points for each edges
            data_data = []  # edge weigth of each edges

            for i in range(nb_data):
                for j in range(nb_data):
                    add_edge = True
                    k = 0
                    while add_edge and k < nb_data:
                        if i != j and i != k and data[i][k] < data[i][
                                j] and data[j][k] < data[i][j]:
                            add_edge = False
                        k += 1
                    if add_edge:
                        row_data.append(i)
                        col_data.append(j)
                        data_data.append(data[i][j])

            # transform to np array
            row = np.array(row_data)
            col = np.array(col_data)
            new_data = np.array(data_data)

            # create a csr matrix in order to create a Network
            new_network = sp.csr_matrix((new_data, (row, col)),
                                        shape=(nb_data, nb_data))

            # Create a table which contains each points of the network
            items = Table(Domain([], metas=[StringVariable('label')]),
                          [[i] for i in range(nb_data)])

            self.infoa.setText("Average edges per nodes : " +
                               str(len(row_data) / nb_data))

            # Send results
            self.Outputs.network.send(Network(items, new_network))
            self.Outputs.distances.send(data)
        else:
            self.infoa.setText(
                "No data on input yet, waiting to get something.")
            self.Outputs.network.send(None)
            self.Outputs.distances.send(None)
    def generateGraph(self, N_changed=False):
        self.Error.clear()
        self.Warning.clear()
        matrix = None

        if N_changed:
            self.node_selection = NodeSelection.COMPONENTS

        if self.matrix is None:
            if hasattr(self, "infoa"):
                self.infoa.setText("No data loaded.")
            if hasattr(self, "infob"):
                self.infob.setText("")
            if hasattr(self, "infoc"):
                self.infoc.setText("")
            self.pconnected = 0
            self.nedges = 0
            self.graph = None
            self.sendSignals()
            return

        nEdgesEstimate = 2 * sum(y for x, y in zip(self.histogram.xData, self.histogram.yData)
                                 if x <= self.spinUpperThreshold)

        if nEdgesEstimate > 200000:
            graph = None
            self.Error.number_of_edges(nEdgesEstimate)
        else:
            items = None
            matrix = self.matrix
            if matrix is not None and matrix.row_items is not None:
                row_items = self.matrix.row_items
                if isinstance(row_items, Table):
                    if self.matrix.axis == 1:
                        items = row_items
                    else:
                        items = [[v.name] for v in row_items.domain.attributes]
                else:
                    items = [[str(x)] for x in self.matrix.row_items]
            if len(items) != self.matrix.shape[0]:
                self.Warning.invalid_number_of_items()
                items = None
            if items is None:
                items = list(range(self.matrix.shape[0]))
            if not isinstance(items, Table):
                items = Table(
                    Domain([], metas=[StringVariable('label')]),
                    items)

            # set the threshold
            # set edges where distance is lower than threshold
            self.Warning.kNN_too_large.clear()
            if self.kNN >= self.matrix.shape[0]:
                self.Warning.kNN_too_large(self.matrix.shape[0] - 1)

            mask = self.matrix <= self.spinUpperThreshold
            if self.include_knn:
                mask |= mask.argsort() < self.kNN
            weights = matrix[mask]
            if weights.size and self.edge_weights == EdgeWeights.INVERSE:
                weights = np.max(weights) - weights
            edges = sp.csr_matrix((weights, mask.nonzero()))
            graph = Network(items, edges)

            self.graph = None
            # exclude unconnected
            if self.node_selection != NodeSelection.ALL_NODES:
                n_components, components = csgraph.connected_components(edges)
                counts = np.bincount(components)
                if self.node_selection == NodeSelection.COMPONENTS:
                    ind = np.flatnonzero(counts >= self.excludeLimit)
                    mask = np.in1d(components, ind)
                else:
                    mask = components == np.argmax(counts)
                graph = graph.subgraph(mask)

        self.graph = graph
        if graph is None:
            self.pconnected = 0
            self.nedges = 0
        else:
            self.pconnected = self.graph.number_of_nodes()
            self.nedges = self.graph.number_of_edges()
        if hasattr(self, "infoa"):
            self.infoa.setText("Data items on input: %d" % self.matrix.shape[0])
        if hasattr(self, "infob"):
            self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected,
                self.pconnected / float(self.matrix.shape[0]) * 100))
        if hasattr(self, "infoc"):
            self.infoc.setText("Network edges: %d (%.2f edges/node)" % (
                self.nedges, self.nedges / float(self.pconnected)
                if self.pconnected else 0))

        self.Warning.large_number_of_nodes.clear()
        if self.pconnected > 1000 or self.nedges > 2000:
            self.Warning.large_number_of_nodes()

        self.sendSignals()
        self.histogram.setRegion(0, self.spinUpperThreshold)
class OWNxEpsilonGraph(widget.OWWidget):
    name = "Epsilon Proximity Graph Generator"
    description = ('Constructs Graph object using Epsilon algorithm. '
                   'Nodes from data table are connected only if the '
                   'distance between them is equal or less than a '
                   'given parameter (ε).')
    icon = "icons/EpsilonNetworkProximityGraph.svg"
    priority = 6440  #priority based on NetworkFromDistances widget

    class Inputs:
        distances = Input("Distances", DistMatrix)

    class Outputs:
        network = Output("Network", Network)
        data = Output("Data", Table)
        distances = Output("Distances", DistMatrix)

    resizing_enabled = False

    class Warning(widget.OWWidget.Warning):
        large_number_of_nodes = widget.Msg(
            'Large number of nodes/edges; performance will be hindered')

    class Error(widget.OWWidget.Error):
        number_of_edges = widget.Msg(
            'Estimated number of edges is too high ({})')

    def __init__(self):
        super().__init__()

        self.epsilon = 0

        self.matrix = None
        self.graph = None
        self.graph_matrix = None

        self.histogram = Histogram(self)
        self.mainArea.layout().addWidget(self.histogram)
        self.mainArea.setMinimumWidth(500)
        self.mainArea.setMinimumHeight(100)
        self.addHistogramControls()

        # info
        boxInfo = gui.widgetBox(self.controlArea, box="Info")
        self.infoa = gui.widgetLabel(boxInfo, "No data loaded.")
        self.infob = gui.widgetLabel(boxInfo, '')
        self.infoc = gui.widgetLabel(boxInfo, '')

        gui.rubber(self.controlArea)

        self.resize(600, 400)

    def addHistogramControls(self):
        boxHisto = gui.widgetBox(self.controlArea, box="Algorithm controls")
        ribg = gui.widgetBox(boxHisto,
                             None,
                             orientation="horizontal",
                             addSpace=False)
        self.spin_high = gui.doubleSpin(boxHisto,
                                        self,
                                        'epsilon',
                                        0,
                                        float('inf'),
                                        0.001,
                                        decimals=3,
                                        label='Epsilon',
                                        callback=self.changeUpperSpin,
                                        callbackOnReturn=1,
                                        keyboardTracking=False,
                                        controlWidth=60)
        self.histogram.region.sigRegionChangeFinished.connect(
            self.spinboxFromHistogramRegion)

    # Processing distance input
    @Inputs.distances
    def set_matrix(self, data):
        if data is not None and not data.size:
            data = None
        self.matrix = data
        if data is None:
            self.histogram.setValues([])
            self.generateGraph()
            return

        if self.matrix.row_items is None:
            self.matrix.row_items = list(range(self.matrix.shape[0]))

        # draw histogram
        self.matrix_values = values = sorted(self.matrix.flat)
        self.histogram.setValues(values)

        self.generateGraph()

    def generateGraph(self, N_changed=False):
        self.Error.clear()
        self.Warning.clear()
        matrix = None

        if N_changed:
            self.node_selection = NodeSelection.COMPONENTS

        if self.matrix is None:
            if hasattr(self, "infoa"):
                self.infoa.setText("No data loaded.")
            if hasattr(self, "infob"):
                self.infob.setText("")
            if hasattr(self, "infoc"):
                self.infoc.setText("")
            self.pconnected = 0
            self.nedges = 0
            self.graph = None
            self.sendSignals()
            return

        nEdgesEstimate = 2 * sum(
            y for x, y in zip(self.histogram.xData, self.histogram.yData)
            if x <= self.epsilon)

        if nEdgesEstimate > 200000:
            self.graph = None
            nedges = 0
            n = 0
            self.Error.number_of_edges(nEdgesEstimate)
        else:
            items = None
            matrix = self.matrix
            if matrix is not None and matrix.row_items is not None:
                row_items = self.matrix.row_items
                if isinstance(row_items, Table):
                    if self.matrix.axis == 1:
                        items = row_items
                    else:
                        items = [[v.name] for v in row_items.domain.attributes]
                else:
                    items = [[str(x)] for x in self.matrix.row_items]
            if len(items) != self.matrix.shape[0]:
                self.Warning.invalid_number_of_items()
                items = None
            if items is None:
                items = list(range(self.matrix.shape[0]))
            if not isinstance(items, Table):
                items = Table(Domain([], metas=[StringVariable('label')]),
                              items)

            mask = self.matrix <= self.epsilon
            weights = matrix[mask]
            if weights.size:
                weights = np.max(weights) - weights
            edges = sp.csr_matrix((weights, mask.nonzero()))
            self.graph = Network(items, edges)

        self.graph_matrix = self.matrix

        if self.graph is None:
            self.pconnected = 0
            self.nedges = 0
        else:
            self.pconnected = self.graph.number_of_nodes()
            self.nedges = self.graph.number_of_edges()
        if hasattr(self, "infoa"):
            self.infoa.setText("Data items on input: %d" %
                               self.matrix.shape[0])
        if hasattr(self, "infob"):
            self.infob.setText("Network nodes: %d (%3.1f%%)" %
                               (self.pconnected, self.pconnected /
                                float(self.matrix.shape[0]) * 100))
        if hasattr(self, "infoc"):
            self.infoc.setText(
                "Network edges: %d (%.2f edges/node)" %
                (self.nedges, self.nedges /
                 float(self.pconnected) if self.pconnected else 0))

        self.Warning.large_number_of_nodes.clear()
        if self.pconnected > 1000 or self.nedges > 2000:
            self.Warning.large_number_of_nodes()

        self.sendSignals()
        self.histogram.setRegion(0, self.epsilon)

    # Outputs processing (has to be called if any modification on the network happens)
    def sendSignals(self):
        self.Outputs.network.send(self.graph)
        self.Outputs.distances.send(self.graph_matrix)
        self.Outputs.data.send(self.matrix)

    def changeUpperSpin(self):
        if self.matrix is None: return
        self.epsilon = np.clip(self.epsilon, *self.histogram.boundary())
        self.percentil = 100 * np.searchsorted(
            self.matrix_values, self.epsilon) / len(self.matrix_values)
        self.generateGraph()

    def spinboxFromHistogramRegion(self):
        _, self.epsilon = self.histogram.getRegion()
        self.changeUpperSpin()
    def generateGraph(self, N_changed=False):
        self.Error.clear()
        self.Warning.clear()
        matrix = None

        if N_changed:
            self.node_selection = NodeSelection.COMPONENTS

        if self.matrix is None:
            if hasattr(self, "infoa"):
                self.infoa.setText("No data loaded.")
            if hasattr(self, "infob"):
                self.infob.setText("")
            if hasattr(self, "infoc"):
                self.infoc.setText("")
            self.pconnected = 0
            self.nedges = 0
            self.graph = None
            self.sendSignals()
            return

        nEdgesEstimate = 2 * sum(
            y for x, y in zip(self.histogram.xData, self.histogram.yData)
            if x <= self.epsilon)

        if nEdgesEstimate > 200000:
            self.graph = None
            nedges = 0
            n = 0
            self.Error.number_of_edges(nEdgesEstimate)
        else:
            items = None
            matrix = self.matrix
            if matrix is not None and matrix.row_items is not None:
                row_items = self.matrix.row_items
                if isinstance(row_items, Table):
                    if self.matrix.axis == 1:
                        items = row_items
                    else:
                        items = [[v.name] for v in row_items.domain.attributes]
                else:
                    items = [[str(x)] for x in self.matrix.row_items]
            if len(items) != self.matrix.shape[0]:
                self.Warning.invalid_number_of_items()
                items = None
            if items is None:
                items = list(range(self.matrix.shape[0]))
            if not isinstance(items, Table):
                items = Table(Domain([], metas=[StringVariable('label')]),
                              items)

            mask = self.matrix <= self.epsilon
            weights = matrix[mask]
            if weights.size:
                weights = np.max(weights) - weights
            edges = sp.csr_matrix((weights, mask.nonzero()))
            self.graph = Network(items, edges)

        self.graph_matrix = self.matrix

        if self.graph is None:
            self.pconnected = 0
            self.nedges = 0
        else:
            self.pconnected = self.graph.number_of_nodes()
            self.nedges = self.graph.number_of_edges()
        if hasattr(self, "infoa"):
            self.infoa.setText("Data items on input: %d" %
                               self.matrix.shape[0])
        if hasattr(self, "infob"):
            self.infob.setText("Network nodes: %d (%3.1f%%)" %
                               (self.pconnected, self.pconnected /
                                float(self.matrix.shape[0]) * 100))
        if hasattr(self, "infoc"):
            self.infoc.setText(
                "Network edges: %d (%.2f edges/node)" %
                (self.nedges, self.nedges /
                 float(self.pconnected) if self.pconnected else 0))

        self.Warning.large_number_of_nodes.clear()
        if self.pconnected > 1000 or self.nedges > 2000:
            self.Warning.large_number_of_nodes()

        self.sendSignals()
        self.histogram.setRegion(0, self.epsilon)
    def generateGraph(self):
        self.Error.clear()
        self.Warning.clear()
        matrix = None

        if self.graphMatrix is None:
            if hasattr(self, "infoa"):
                self.infoa.setText("No data loaded.")
            if hasattr(self, "infob"):
                self.infob.setText("")
            if hasattr(self, "infoc"):
                self.infoc.setText("")
            self.pconnected = 0
            self.nedges = 0
            self.graph = None
            return

        nEdges = len(self.graphMatrix) * self.kNN

        if nEdges > 200000:
            self.graph = None
            self.Error.number_of_edges(nEdges)
        else:
            items = None
            matrix = self.graphMatrix
            if matrix is not None and matrix.row_items is not None:
                row_items = self.graphMatrix.row_items
                if isinstance(row_items, Table):
                    if self.graphMatrix.axis == 1:
                        items = row_items
                    else:
                        items = [[v.name] for v in row_items.domain.attributes]
                else:
                    items = [[str(x)] for x in self.graphMatrix.row_items]
            if len(items) != self.graphMatrix.shape[0]:
                self.Warning.invalid_number_of_items()
                items = None
            if items is None:
                items = list(range(self.graphMatrix.shape[0]))
            if not isinstance(items, Table):
                items = Table(
                    Domain([], metas=[StringVariable('label')]),
                    items)

            self.Warning.kNN_too_large.clear()
            if self.kNN >= self.graphMatrix.shape[0]:
                self.Warning.kNN_too_large(self.graphMatrix.shape[0] - 1)

            nb_data = len(matrix)
            row_index = []
            col_index = []
            distances_data = []

            for i in range(nb_data):
                distances = []
                for j in range(nb_data):
                    distances.append((self.graphMatrix[i][j], j))
                distances.sort()
                for k in range(self.kNN):
                    row_index.append(i)
                    col_index.append(distances[k][1])
                    distances_data.append(distances[k][0])

            row = np.array(row_index)
            col = np.array(col_index)
            weights = np.array(distances_data)

            edges = sp.csr_matrix((weights, (row, col)))
            graph = Network(items, edges)

            self.graph = graph

        if self.graph is None:
            self.pconnected = 0
            self.nedges = 0
        else:
            self.pconnected = self.graph.number_of_nodes()
            self.nedges = self.graph.number_of_edges()
        if hasattr(self, "infoa"):
            self.infoa.setText("Data items on input: %d" % self.graphMatrix.shape[0])
        if hasattr(self, "infob"):
            self.infob.setText("Network nodes: %d (%3.1f%%)" % (self.pconnected,
                self.pconnected / float(self.graphMatrix.shape[0]) * 100))
        if hasattr(self, "infoc"):
            self.infoc.setText("Network edges: %d (%.2f edges/node)" % (
                self.nedges, self.nedges / float(self.pconnected)
                if self.pconnected else 0))

        self.Warning.large_number_of_nodes.clear()
        if self.pconnected > 1000 or self.nedges > 2000:
            self.Warning.large_number_of_nodes()
        
        self.send_network()