Esempio n. 1
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.data_normalized = None
        self.db = None
        self.model = None

        box = gui.widgetBox(self.controlArea, "Parameters")
        gui.spin(box, self, "min_samples", 1, 100, 1,
                 callback=self._min_samples_changed,
                 label="Core point neighbors")
        gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01,
                       callback=self._eps_changed,
                       label="Neighborhood distance")

        box = gui.widgetBox(self.controlArea, self.tr("Distance Metric"))
        gui.comboBox(box, self, "metric_idx",
                     items=list(zip(*self.METRICS))[0],
                     callback=self._metirc_changed)
        gui.checkBox(box, self, "normalize", "Normalize features",
                     callback=self._on_normalize_changed)

        gui.auto_apply(self.buttonsArea, self, "auto_commit")
        gui.rubber(self.controlArea)

        self.controlArea.layout().addStretch()

        self.plot = SliderGraph(
            x_axis_label="Data items sorted by score",
            y_axis_label="Distance to the k-th nearest neighbour",
            callback=self._on_cut_changed
        )

        self.mainArea.layout().addWidget(self.plot)
Esempio n. 2
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.data_normalized = None
        self.db = None
        self.model = None

        box = gui.widgetBox(self.controlArea, "参数")
        gui.spin(box, self, "min_samples", 1, 100, 1,
                 callback=self._min_samples_changed,
                 label="核心店邻近数(Core point neighbors)")
        gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01,
                       callback=self._eps_changed,
                       label="临近点距离")

        box = gui.widgetBox(self.controlArea, self.tr("距离度量"))
        gui.comboBox(box, self, "metric_idx",
                     items=list(zip(*self.METRICS))[0],
                     callback=self._metirc_changed)

        gui.auto_apply(self.controlArea, self, "auto_commit")
        gui.rubber(self.controlArea)

        self.controlArea.layout().addStretch()

        self.plot = SliderGraph(
            x_axis_label="根据评分排序的数据",
            y_axis_label="到第 k 个最邻近点的距离",
            callback=self._on_cut_changed
        )

        self.mainArea.layout().addWidget(self.plot)
Esempio n. 3
0
    def __init__(self):
        super().__init__()
        self.data = None

        self._pca = None
        self._transformed = None
        self._variance_ratio = None
        self._cumulative = None
        self._init_projector()

        # Components Selection
        box = gui.vBox(self.controlArea, "Components Selection")
        form = QFormLayout()
        box.layout().addLayout(form)

        self.components_spin = gui.spin(
            box, self, "ncomponents", 1, MAX_COMPONENTS,
            callback=self._update_selection_component_spin,
            keyboardTracking=False
        )
        self.components_spin.setSpecialValueText("All")

        self.variance_spin = gui.spin(
            box, self, "variance_covered", 1, 100,
            callback=self._update_selection_variance_spin,
            keyboardTracking=False
        )
        self.variance_spin.setSuffix("%")

        form.addRow("Components:", self.components_spin)
        form.addRow("Variance covered:", self.variance_spin)

        # Options
        self.options_box = gui.vBox(self.controlArea, "Options")
        self.normalize_box = gui.checkBox(
            self.options_box, self, "normalize",
            "Normalize data", callback=self._update_normalize
        )

        self.maxp_spin = gui.spin(
            self.options_box, self, "maxp", 1, MAX_COMPONENTS,
            label="Show only first", callback=self._setup_plot,
            keyboardTracking=False
        )

        self.controlArea.layout().addStretch()

        gui.auto_commit(self.controlArea, self, "auto_commit", "Apply",
                        checkbox_label="Apply automatically")

        self.plot = SliderGraph(
            "Principal Components", "Proportion of variance",
            self._on_cut_changed)

        self.mainArea.layout().addWidget(self.plot)
        self._update_normalize()
Esempio n. 4
0
    def __init__(self):
        super().__init__()
        self.plot = SliderGraph(x_axis_label="label1",
                                y_axis_label="label2",
                                callback=lambda x: x)

        self.mainArea.layout().addWidget(self.plot)
Esempio n. 5
0
class OWPCA(widget.OWWidget):
    name = "PCA"
    description = "Principal component analysis with a scree-diagram."
    icon = "icons/PCA.svg"
    priority = 3050
    keywords = ["principal component analysis", "linear transformation"]

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        transformed_data = Output("Transformed Data",
                                  Table,
                                  replaces=["Transformed data"])
        data = Output("Data", Table, default=True)
        components = Output("Components", Table)
        pca = Output("PCA", PCA, dynamic=False)

    ncomponents = settings.Setting(2)
    variance_covered = settings.Setting(100)
    auto_commit = settings.Setting(True)
    normalize = settings.Setting(True)
    maxp = settings.Setting(20)
    axis_labels = settings.Setting(10)

    graph_name = "plot.plotItem"

    class Warning(widget.OWWidget.Warning):
        trivial_components = widget.Msg(
            "All components of the PCA are trivial (explain 0 variance). "
            "Input data is constant (or near constant).")

    class Error(widget.OWWidget.Error):
        no_features = widget.Msg("At least 1 feature is required")
        no_instances = widget.Msg("At least 1 data instance is required")

    def __init__(self):
        super().__init__()
        self.data = None

        self._pca = None
        self._transformed = None
        self._variance_ratio = None
        self._cumulative = None
        self._init_projector()

        # Components Selection
        box = gui.vBox(self.controlArea, "Components Selection")
        form = QFormLayout()
        box.layout().addLayout(form)

        self.components_spin = gui.spin(
            box,
            self,
            "ncomponents",
            1,
            MAX_COMPONENTS,
            callback=self._update_selection_component_spin,
            keyboardTracking=False)
        self.components_spin.setSpecialValueText("All")

        self.variance_spin = gui.spin(
            box,
            self,
            "variance_covered",
            1,
            100,
            callback=self._update_selection_variance_spin,
            keyboardTracking=False)
        self.variance_spin.setSuffix("%")

        form.addRow("Components:", self.components_spin)
        form.addRow("Explained variance:", self.variance_spin)

        # Options
        self.options_box = gui.vBox(self.controlArea, "Options")
        self.normalize_box = gui.checkBox(self.options_box,
                                          self,
                                          "normalize",
                                          "Normalize variables",
                                          callback=self._update_normalize)

        self.maxp_spin = gui.spin(self.options_box,
                                  self,
                                  "maxp",
                                  1,
                                  MAX_COMPONENTS,
                                  label="Show only first",
                                  callback=self._setup_plot,
                                  keyboardTracking=False)

        self.controlArea.layout().addStretch()

        gui.auto_apply(self.controlArea, self, "auto_commit")

        self.plot = SliderGraph("Principal Components",
                                "Proportion of variance", self._on_cut_changed)

        self.mainArea.layout().addWidget(self.plot)
        self._update_normalize()

    @Inputs.data
    def set_data(self, data):
        self.clear_messages()
        self.clear()
        self.information()
        self.data = None
        if not data:
            self.clear_outputs()
        if isinstance(data, SqlTable):
            if data.approx_len() < AUTO_DL_LIMIT:
                data = Table(data)
            else:
                self.information("Data has been sampled")
                data_sample = data.sample_time(1, no_cache=True)
                data_sample.download_data(2000, partial=True)
                data = Table(data_sample)
        if isinstance(data, Table):
            if not data.domain.attributes:
                self.Error.no_features()
                self.clear_outputs()
                return
            if not data:
                self.Error.no_instances()
                self.clear_outputs()
                return

        self._init_projector()

        self.data = data
        self.fit()

    def fit(self):
        self.clear()
        self.Warning.trivial_components.clear()
        if self.data is None:
            return

        data = self.data

        if self.normalize:
            self._pca_projector.preprocessors = \
                self._pca_preprocessors + [preprocess.Normalize(center=False)]
        else:
            self._pca_projector.preprocessors = self._pca_preprocessors

        if not isinstance(data, SqlTable):
            pca = self._pca_projector(data)
            variance_ratio = pca.explained_variance_ratio_
            cumulative = numpy.cumsum(variance_ratio)

            if numpy.isfinite(cumulative[-1]):
                self.components_spin.setRange(0, len(cumulative))
                self._pca = pca
                self._variance_ratio = variance_ratio
                self._cumulative = cumulative
                self._setup_plot()
            else:
                self.Warning.trivial_components()

            self.unconditional_commit()

    def clear(self):
        self._pca = None
        self._transformed = None
        self._variance_ratio = None
        self._cumulative = None
        self.plot.clear_plot()

    def clear_outputs(self):
        self.Outputs.transformed_data.send(None)
        self.Outputs.data.send(None)
        self.Outputs.components.send(None)
        self.Outputs.pca.send(self._pca_projector)

    def _setup_plot(self):
        if self._pca is None:
            self.plot.clear_plot()
            return

        explained_ratio = self._variance_ratio
        explained = self._cumulative
        cutpos = self._nselected_components()
        p = min(len(self._variance_ratio), self.maxp)

        self.plot.update(numpy.arange(1, p + 1),
                         [explained_ratio[:p], explained[:p]],
                         [Qt.red, Qt.darkYellow],
                         cutpoint_x=cutpos,
                         names=LINE_NAMES)

        self._update_axis()

    def _on_cut_changed(self, components):
        if components == self.ncomponents \
                or self.ncomponents == 0 \
                or self._pca is not None \
                and components == len(self._variance_ratio):
            return

        self.ncomponents = components
        if self._pca is not None:
            var = self._cumulative[components - 1]
            if numpy.isfinite(var):
                self.variance_covered = int(var * 100)

        self._invalidate_selection()

    def _update_selection_component_spin(self):
        # cut changed by "ncomponents" spin.
        if self._pca is None:
            self._invalidate_selection()
            return

        if self.ncomponents == 0:
            # Special "All" value
            cut = len(self._variance_ratio)
        else:
            cut = self.ncomponents

        var = self._cumulative[cut - 1]
        if numpy.isfinite(var):
            self.variance_covered = int(var * 100)

        self.plot.set_cut_point(cut)
        self._invalidate_selection()

    def _update_selection_variance_spin(self):
        # cut changed by "max variance" spin.
        if self._pca is None:
            return

        cut = numpy.searchsorted(self._cumulative,
                                 self.variance_covered / 100.0) + 1
        cut = min(cut, len(self._cumulative))
        self.ncomponents = cut
        self.plot.set_cut_point(cut)
        self._invalidate_selection()

    def _update_normalize(self):
        self.fit()
        if self.data is None:
            self._invalidate_selection()

    def _init_projector(self):
        self._pca_projector = PCA(n_components=MAX_COMPONENTS, random_state=0)
        self._pca_projector.component = self.ncomponents
        self._pca_preprocessors = PCA.preprocessors

    def _nselected_components(self):
        """Return the number of selected components."""
        if self._pca is None:
            return 0

        if self.ncomponents == 0:
            # Special "All" value
            max_comp = len(self._variance_ratio)
        else:
            max_comp = self.ncomponents

        var_max = self._cumulative[max_comp - 1]
        if var_max != numpy.floor(self.variance_covered / 100.0):
            cut = max_comp
            assert numpy.isfinite(var_max)
            self.variance_covered = int(var_max * 100)
        else:
            self.ncomponents = cut = numpy.searchsorted(
                self._cumulative, self.variance_covered / 100.0) + 1
        return cut

    def _invalidate_selection(self):
        self.commit()

    def _update_axis(self):
        p = min(len(self._variance_ratio), self.maxp)
        axis = self.plot.getAxis("bottom")
        d = max((p - 1) // (self.axis_labels - 1), 1)
        axis.setTicks([[(i, str(i)) for i in range(1, p + 1, d)]])

    def commit(self):
        transformed = data = components = None
        if self._pca is not None:
            if self._transformed is None:
                # Compute the full transform (MAX_COMPONENTS components) once.
                self._transformed = self._pca(self.data)
            transformed = self._transformed

            domain = Domain(transformed.domain.attributes[:self.ncomponents],
                            self.data.domain.class_vars,
                            self.data.domain.metas)
            transformed = transformed.from_table(domain, transformed)
            # prevent caching new features by defining compute_value
            proposed = [a.name for a in self._pca.orig_domain.attributes]
            meta_name = get_unique_names(proposed, 'components')
            dom = Domain([
                ContinuousVariable(name, compute_value=lambda _: None)
                for name in proposed
            ],
                         metas=[StringVariable(name=meta_name)])
            metas = numpy.array(
                [['PC{}'.format(i + 1) for i in range(self.ncomponents)]],
                dtype=object).T
            components = Table(dom,
                               self._pca.components_[:self.ncomponents],
                               metas=metas)
            components.name = 'components'

            data_dom = Domain(self.data.domain.attributes,
                              self.data.domain.class_vars,
                              self.data.domain.metas + domain.attributes)
            data = Table.from_numpy(data_dom,
                                    self.data.X,
                                    self.data.Y,
                                    numpy.hstack(
                                        (self.data.metas, transformed.X)),
                                    ids=self.data.ids)

        self._pca_projector.component = self.ncomponents
        self.Outputs.transformed_data.send(transformed)
        self.Outputs.components.send(components)
        self.Outputs.data.send(data)
        self.Outputs.pca.send(self._pca_projector)

    def send_report(self):
        if self.data is None:
            return
        self.report_items(
            (("Normalize data", str(self.normalize)), ("Selected components",
                                                       self.ncomponents),
             ("Explained variance", "{:.3f} %".format(self.variance_covered))))
        self.report_plot()

    @classmethod
    def migrate_settings(cls, settings, version):
        if "variance_covered" in settings:
            # Due to the error in gh-1896 the variance_covered was persisted
            # as a NaN value, causing a TypeError in the widgets `__init__`.
            vc = settings["variance_covered"]
            if isinstance(vc, numbers.Real):
                if numpy.isfinite(vc):
                    vc = int(vc)
                else:
                    vc = 100
                settings["variance_covered"] = vc
        if settings.get("ncomponents", 0) > MAX_COMPONENTS:
            settings["ncomponents"] = MAX_COMPONENTS

        # Remove old `decomposition_idx` when SVD was still included
        settings.pop("decomposition_idx", None)

        # Remove RemotePCA settings
        settings.pop("batch_size", None)
        settings.pop("address", None)
        settings.pop("auto_update", None)
Esempio n. 6
0
class OWDBSCAN(widget.OWWidget):
    name = "DBSCAN"
    description = "Density-based spatial clustering."
    icon = "icons/DBSCAN.svg"
    priority = 2150

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table)

    class Error(widget.OWWidget.Error):
        not_enough_instances = Msg("Not enough unique data instances. "
                                   "At least two are required.")

    METRICS = [
        ("Euclidean", "euclidean"),
        ("Manhattan", "cityblock"),
        ("Cosine", "cosine")
    ]

    min_samples = Setting(4)
    eps = Setting(0.5)
    metric_idx = Setting(0)
    normalize = Setting(True)
    auto_commit = Setting(True)
    k_distances = None
    cut_point = None

    def __init__(self):
        super().__init__()

        self.data = None
        self.data_normalized = None
        self.db = None
        self.model = None

        box = gui.widgetBox(self.controlArea, "Parameters")
        gui.spin(box, self, "min_samples", 1, 100, 1,
                 callback=self._min_samples_changed,
                 label="Core point neighbors")
        gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01,
                       callback=self._eps_changed,
                       label="Neighborhood distance")

        box = gui.widgetBox(self.controlArea, self.tr("Distance Metric"))
        gui.comboBox(box, self, "metric_idx",
                     items=list(zip(*self.METRICS))[0],
                     callback=self._metirc_changed)
        gui.checkBox(box, self, "normalize", "Normalize features",
                     callback=self._on_normalize_changed)

        gui.auto_apply(self.buttonsArea, self, "auto_commit")
        gui.rubber(self.controlArea)

        self.controlArea.layout().addStretch()

        self.plot = SliderGraph(
            x_axis_label="Data items sorted by score",
            y_axis_label="Distance to the k-th nearest neighbour",
            callback=self._on_cut_changed
        )

        self.mainArea.layout().addWidget(self.plot)

    def check_data_size(self, data):
        if data is None:
            return False
        if len(data) < 2:
            self.Error.not_enough_instances()
            return False
        return True

    def commit(self):
        self.cluster()

    def cluster(self):
        if not self.check_data_size(self.data):
            return
        self.model = DBSCAN(
            eps=self.eps,
            min_samples=self.min_samples,
            metric=self.METRICS[self.metric_idx][1]
        ).get_model(self.data_normalized)
        self.send_data()

    def _compute_and_plot(self, cut_point=None):
        self._compute_kdistances()
        if cut_point is None:
            self._compute_cut_point()
        self._plot_graph()

    def _plot_graph(self):
        nonzero = np.sum(self.k_distances > EPS_BOTTOM_LIMIT)
        self.plot.update(np.arange(len(self.k_distances)),
                         [self.k_distances],
                         colors=[QColor('red')],
                         cutpoint_x=self.cut_point,
                         selection_limit=(0, nonzero - 1))

    def _compute_kdistances(self):
        self.k_distances = get_kth_distances(
            self.data_normalized, metric=self.METRICS[self.metric_idx][1],
            k=self.min_samples
        )

    def _compute_cut_point(self):
        self.cut_point = int(DEFAULT_CUT_POINT * len(self.k_distances))
        self.eps = self.k_distances[self.cut_point]

        mask = self.k_distances >= EPS_BOTTOM_LIMIT
        if self.eps < EPS_BOTTOM_LIMIT and sum(mask):
            self.eps = np.min(self.k_distances[mask])
            self.cut_point = self._find_nearest_dist(self.eps)

    @Inputs.data
    def set_data(self, data):
        self.Error.clear()
        if not self.check_data_size(data):
            data = None
        self.data = self.data_normalized = data
        if self.data is None:
            self.Outputs.annotated_data.send(None)
            self.plot.clear_plot()
            return

        if self.data is None:
            return

        self._preprocess_data()

        self._compute_and_plot()
        self.unconditional_commit()

    def _preprocess_data(self):
        self.data_normalized = self.data
        for pp in PREPROCESSORS:
            if isinstance(pp, Normalize) and not self.normalize:
                continue
            self.data_normalized = pp(self.data_normalized)

    def send_data(self):
        model = self.model

        clusters = [c if c >= 0 else np.nan for c in model.labels]
        k = len(set(clusters) - {np.nan})
        clusters = np.array(clusters)
        core_samples = set(model.projector.core_sample_indices_)
        in_core = np.array([1 if (i in core_samples) else 0
                            for i in range(len(self.data))])

        domain = self.data.domain
        attributes, classes = domain.attributes, domain.class_vars
        meta_attrs = domain.metas
        names = [var.name for var in chain(attributes, classes, meta_attrs) if var]

        u_clust_var = get_unique_names(names, "Cluster")
        clust_var = DiscreteVariable(
            u_clust_var, values=["C%d" % (x + 1) for x in range(k)])

        u_in_core = get_unique_names(names + [u_clust_var], "DBSCAN Core")
        in_core_var = DiscreteVariable(u_in_core, values=("0", "1"))

        new_table = self.data.add_column(clust_var, clusters, to_metas=True)
        new_table = new_table.add_column(in_core_var, in_core, to_metas=True)

        self.Outputs.annotated_data.send(new_table)

    def _invalidate(self):
        self.commit()

    def _find_nearest_dist(self, value):
        array = np.asarray(self.k_distances)
        idx = (np.abs(array - value)).argmin()
        return idx

    def _eps_changed(self):
        # find the closest value to eps
        if self.data is None:
            return
        self.cut_point = self._find_nearest_dist(self.eps)
        self.plot.set_cut_point(self.cut_point)
        self._invalidate()

    def _metirc_changed(self):
        if self.data is not None:
            self._compute_and_plot()
            self._invalidate()

    def _on_cut_changed(self, value):
        # cut changed by means of a cut line over the scree plot.
        self.cut_point = value
        self.eps = self.k_distances[value]

        self.commit()

    def _min_samples_changed(self):
        if self.data is None:
            return
        self._compute_and_plot(cut_point=self.cut_point)
        self._invalidate()

    def _on_normalize_changed(self):
        if not self.data:
            return
        self._preprocess_data()
        self._compute_and_plot()
        self._invalidate()
Esempio n. 7
0
    def __init__(self):
        super().__init__()
        self.data = None

        self._pca = None
        self._transformed = None
        self._variance_ratio = None
        self._cumulative = None
        self._init_projector()

        # Components Selection
        form = QFormLayout()
        box = gui.widgetBox(self.controlArea,
                            "Components Selection",
                            orientation=form)

        self.components_spin = gui.spin(
            box,
            self,
            "ncomponents",
            1,
            MAX_COMPONENTS,
            callback=self._update_selection_component_spin,
            keyboardTracking=False,
            addToLayout=False)
        self.components_spin.setSpecialValueText("All")

        self.variance_spin = gui.spin(
            box,
            self,
            "variance_covered",
            1,
            100,
            callback=self._update_selection_variance_spin,
            keyboardTracking=False,
            addToLayout=False)
        self.variance_spin.setSuffix("%")

        form.addRow("Components:", self.components_spin)
        form.addRow("Explained variance:", self.variance_spin)

        # Options
        self.options_box = gui.vBox(self.controlArea, "Options")
        self.normalize_box = gui.checkBox(self.options_box,
                                          self,
                                          "normalize",
                                          "Normalize variables",
                                          callback=self._update_normalize,
                                          attribute=Qt.WA_LayoutUsesWidgetRect)

        self.maxp_spin = gui.spin(self.options_box,
                                  self,
                                  "maxp",
                                  1,
                                  MAX_COMPONENTS,
                                  label="Show only first",
                                  callback=self._setup_plot,
                                  keyboardTracking=False)

        gui.rubber(self.controlArea)

        gui.auto_apply(self.buttonsArea, self, "auto_commit")

        self.plot = SliderGraph("Principal Components",
                                "Proportion of variance", self._on_cut_changed)

        self.mainArea.layout().addWidget(self.plot)
        self._update_normalize()
Esempio n. 8
0
class OWDBSCAN(widget.OWWidget):
    name = "DBSCAN"
    description = "基于密度的空间聚类."
    icon = "icons/DBSCAN.svg"
    priority = 2150

    class Inputs:
        data = Input("数据(Data)", Table, replaces=['Data'])

    class Outputs:
        annotated_data = Output(ANNOTATED_DATA_SIGNAL_Chinese_NAME, Table, replaces=['Data'])

    class Error(widget.OWWidget.Error):
        not_enough_instances = Msg("Not enough unique data instances. "
                                   "At least two are required.")

    METRICS = [
        ("欧几里得", "euclidean"),
        ("曼哈顿", "cityblock"),
        ("余弦", "cosine")
    ]

    min_samples = Setting(4)
    eps = Setting(0.5)
    metric_idx = Setting(0)
    auto_commit = Setting(True)
    k_distances = None
    cut_point = None

    def __init__(self):
        super().__init__()

        self.data = None
        self.data_normalized = None
        self.db = None
        self.model = None

        box = gui.widgetBox(self.controlArea, "参数")
        gui.spin(box, self, "min_samples", 1, 100, 1,
                 callback=self._min_samples_changed,
                 label="核心店邻近数(Core point neighbors)")
        gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01,
                       callback=self._eps_changed,
                       label="临近点距离")

        box = gui.widgetBox(self.controlArea, self.tr("距离度量"))
        gui.comboBox(box, self, "metric_idx",
                     items=list(zip(*self.METRICS))[0],
                     callback=self._metirc_changed)

        gui.auto_apply(self.controlArea, self, "auto_commit")
        gui.rubber(self.controlArea)

        self.controlArea.layout().addStretch()

        self.plot = SliderGraph(
            x_axis_label="根据评分排序的数据",
            y_axis_label="到第 k 个最邻近点的距离",
            callback=self._on_cut_changed
        )

        self.mainArea.layout().addWidget(self.plot)

    def check_data_size(self, data):
        if data is None:
            return False
        if len(data) < 2:
            self.Error.not_enough_instances()
            return False
        return True

    def commit(self):
        self.cluster()

    def cluster(self):
        if not self.check_data_size(self.data):
            return
        self.model = DBSCAN(
            eps=self.eps,
            min_samples=self.min_samples,
            metric=self.METRICS[self.metric_idx][1]
        ).get_model(self.data_normalized)
        self.send_data()

    def _compute_and_plot(self, cut_point=None):
        self._compute_kdistances()
        if cut_point is None:
            self._compute_cut_point()
        self._plot_graph()

    def _plot_graph(self):
        nonzero = np.sum(self.k_distances > EPS_BOTTOM_LIMIT)
        self.plot.update(np.arange(len(self.k_distances)),
                         [self.k_distances],
                         colors=[QColor('red')],
                         cutpoint_x=self.cut_point,
                         selection_limit=(0, nonzero - 1))

    def _compute_kdistances(self):
        self.k_distances = get_kth_distances(
            self.data_normalized, metric=self.METRICS[self.metric_idx][1],
            k=self.min_samples
        )

    def _compute_cut_point(self):
        self.cut_point = int(DEFAULT_CUT_POINT * len(self.k_distances))
        self.eps = self.k_distances[self.cut_point]

        if self.eps < EPS_BOTTOM_LIMIT:
            self.eps = np.min(
                self.k_distances[self.k_distances >= EPS_BOTTOM_LIMIT])
            self.cut_point = self._find_nearest_dist(self.eps)

    @Inputs.data
    def set_data(self, data):
        self.Error.clear()
        if not self.check_data_size(data):
            data = None
        self.data = self.data_normalized = data
        if self.data is None:
            self.Outputs.annotated_data.send(None)
            self.plot.clear_plot()
            return

        if self.data is None:
            return

        # preprocess data
        for pp in PREPROCESSORS:
            self.data_normalized = pp(self.data_normalized)

        self._compute_and_plot()
        self.unconditional_commit()

    def send_data(self):
        model = self.model

        clusters = [c if c >= 0 else np.nan for c in model.labels]
        k = len(set(clusters) - {np.nan})
        clusters = np.array(clusters).reshape(len(self.data), 1)
        core_samples = set(model.projector.core_sample_indices_)
        in_core = np.array([1 if (i in core_samples) else 0
                            for i in range(len(self.data))])
        in_core = in_core.reshape(len(self.data), 1)

        clust_var = DiscreteVariable(
            "Cluster", values=["C%d" % (x + 1) for x in range(k)])
        in_core_var = DiscreteVariable("DBSCAN Core", values=["0", "1"])

        domain = self.data.domain
        attributes, classes = domain.attributes, domain.class_vars
        meta_attrs = domain.metas
        x, y, metas = self.data.X, self.data.Y, self.data.metas

        meta_attrs += (clust_var, )
        metas = np.hstack((metas, clusters))
        meta_attrs += (in_core_var, )
        metas = np.hstack((metas, in_core))

        domain = Domain(attributes, classes, meta_attrs)
        new_table = Table(domain, x, y, metas, self.data.W)

        self.Outputs.annotated_data.send(new_table)

    def _invalidate(self):
        self.commit()

    def _find_nearest_dist(self, value):
        array = np.asarray(self.k_distances)
        idx = (np.abs(array - value)).argmin()
        return idx

    def _eps_changed(self):
        # find the closest value to eps
        if self.data is None:
            return
        self.cut_point = self._find_nearest_dist(self.eps)
        self.plot.set_cut_point(self.cut_point)
        self._invalidate()

    def _metirc_changed(self):
        if self.data is not None:
            self._compute_and_plot()
            self._invalidate()

    def _on_cut_changed(self, value):
        # cut changed by means of a cut line over the scree plot.
        self.cut_point = value
        self.eps = self.k_distances[value]

        self.commit()

    def _min_samples_changed(self):
        if self.data is None:
            return
        self._compute_and_plot(cut_point=self.cut_point)
        self._invalidate()
Esempio n. 9
0
    def __init__(self):
        super().__init__()
        self.data = None

        self._pca = None
        self._transformed = None
        self._variance_ratio = None
        self._cumulative = None
        self._init_projector()

        # Components Selection
        box = gui.vBox(self.controlArea, "成分选择")
        form = QFormLayout()
        box.layout().addLayout(form)

        self.components_spin = gui.spin(
            box,
            self,
            "ncomponents",
            1,
            MAX_COMPONENTS,
            callback=self._update_selection_component_spin,
            keyboardTracking=False)
        self.components_spin.setSpecialValueText("All")

        self.variance_spin = gui.spin(
            box,
            self,
            "variance_covered",
            1,
            100,
            callback=self._update_selection_variance_spin,
            keyboardTracking=False)
        self.variance_spin.setSuffix("%")

        form.addRow("成分:", self.components_spin)
        form.addRow("覆盖的贡献(Variance covered):", self.variance_spin)

        # Options
        self.options_box = gui.vBox(self.controlArea, "选项")
        self.normalize_box = gui.checkBox(self.options_box,
                                          self,
                                          "normalize",
                                          "归一化数据",
                                          callback=self._update_normalize)

        self.maxp_spin = gui.spin(self.options_box,
                                  self,
                                  "maxp",
                                  1,
                                  MAX_COMPONENTS,
                                  label="只显示前...个",
                                  callback=self._setup_plot,
                                  keyboardTracking=False)

        self.controlArea.layout().addStretch()

        gui.auto_apply(self.controlArea, self, "auto_commit")

        self.plot = SliderGraph("主成分", "贡献率(Proportion of variance)",
                                self._on_cut_changed)

        self.mainArea.layout().addWidget(self.plot)
        self._update_normalize()
Esempio n. 10
0
class OPTICS_w(widget.OWWidget):
    name = "OPTICS"
    description = "dynamicaly clustering unlabeled data by density"
    icon = "icons/OPTICS.svg"
    priority = 20

    class Inputs:
        data = Input("Data", Table)

    class Outputs:
        annotated_data = Output("Data", Table)

    class Error(widget.OWWidget.Error):
        not_enough_instances = Msg("Not enough unique data instances. "
                                   "At least two are required.")

    minimum_samples = settings.Setting(5)
    metric_methode = settings.Setting(11)
    xi_value = settings.Setting(0.05)
    algorithm_base = settings.Setting(0)
    auto_commit = settings.Setting(False)
    cut_point = xi_value
    want_main_area = True
    

    def __init__(self):
        super().__init__()

        self.data = None
        self.dataset = None
        self.annotated_data = None

        # GUI
        infobox = gui.widgetBox(self.controlArea, "Info")
        self.infoa = gui.widgetLabel(infobox, "No data on input yet, waiting to get something.")
        self.infob = gui.widgetLabel(infobox, "")
        self.infoc = gui.widgetLabel(infobox, "")
        self.infod = gui.widgetLabel(infobox, "")

        self.optionsBox = gui.widgetBox(self.controlArea, "OPTICS Options")
        gui.spin(
            self.optionsBox,
            self,
            "minimum_samples",
            minv=1,
            maxv=100,
            step=1,
            label="Core point neighbors ",
            callback=self._min_samples_changed
        )
        gui.comboBox(
            self.optionsBox,
            self,
            "metric_methode",
            orientation=Qt.Horizontal,
            label="Distance metric: ",
            items=[d[0] for d in OPTICS_METRICS],
            callback=self._metric_changed
        )
        gui.doubleSpin(
            self.optionsBox,
            self,
            "xi_value",
            minv=(0.000),
            maxv=(0.999),
            step=(0.001),
            label="Minimum steepness: ",
            callback=self._xi_changed
        )
        gui.comboBox(
            self.optionsBox,
            self,
            "algorithm_base",
            orientation=Qt.Horizontal,
            label="neighborhood algorithm: ",
            items=[d[0] for d in OPTICS_ALGORITHM],
            callback=self._algorithm_changed
        )
        self.optionsBox.setDisabled(True)
        
        gui.auto_apply(self.controlArea, self, "auto_commit")
        gui.rubber(self.controlArea)

        self.controlArea.layout().addStretch()

        self.plot = SliderGraph(
            x_axis_label="Ordering of the points as processed by OPTICS",
            y_axis_label="Reachability distance (epsilon distance)",
            callback=self._on_changed
        )

        self.mainArea.layout().addWidget(self.plot)

    def check_data_size(self, data):
        if data is None:
            return False
        if len(data) < 2:
            self.Error.not_enough_instances()
            return False
        return True

    def normalizing(self,model):
        clusters = [c if c >= 0 else np.nan for c in model.labels_]
        k = len(set(clusters) - {np.nan})
        clusters = np.array(clusters).reshape(len(self.data), 1)

        clust_var = DiscreteVariable("Cluster", values=["C%d" % (x + 1) for x in range(k)])

        domain = self.data.domain
        attributes, classes = domain.attributes, domain.class_vars
        meta_attrs = domain.metas
        x, y, metas = self.data.X, self.data.Y, self.data.metas

        meta_attrs += (clust_var, )
        metas = np.hstack((metas, clusters))

        domain = Domain(attributes, classes, meta_attrs)
        new_table = Table(domain, x, y, metas, self.data.W)

        # self.Outputs.annotated_data.send(new_table)
        return new_table

    def commit(self):
        self.cluster()
        return

    def cluster(self):
        if not self.check_data_size(self.data):
            return

        model = OPTICS(min_samples=self.minimum_samples, 
                                   metric=OPTICS_METRICS[self.metric_methode][1],
                                   xi=self.xi_value,
                                   algorithm=OPTICS_ALGORITHM[self.algorithm_base][1],
                                   )
        model.fit(self.data.X)
        self._plot_graph(model)
        self.result_OPTICS = self.normalizing(model)
        self.send_data()

    def _plot_graph(self,model):
        reachability = model.reachability_[model.ordering_]
        space = np.arange(len(reachability))
        reachability[reachability == np.inf] = np.nanmax(reachability[reachability != np.inf])
        labels = model.labels_[model.ordering_]
        cluster_count = (len(np.unique(labels[labels[:]>=0])))
        self.infoc.setText("%d values in the cluster outcome" % cluster_count)
        noisy_counter = len(space[labels==-1])
        self.infod.setText("%d noisy samples in the leaf cluster" % noisy_counter)
        
        x_plot = space
        y_plot = reachability
        self.plot.clear_plot()

        colors = np.arange(150, (150+cluster_count))
        for klaster, color in zip(range(0, cluster_count), colors):
            Xk = space[labels == klaster]
            Rk = reachability[labels == klaster]
            self.plot.plot(Xk, Rk, pen=mkPen(intColor(color), width=2), antialias=True)
        self.plot.plot(x_plot[labels==-1], y_plot[labels==-1], pen=mkPen(QColor('black'), width=2), antialias=True)

    @Inputs.data
    def set_data(self, dataset):
        self.Error.clear()
        if not self.check_data_size(dataset):
            self.optionsBox.setDisabled(True)
            self.plot.clear_plot()
            self.infoa.setText(
                "No data on input yet, waiting to get something.")
            self.infob.setText('')
            self.infoc.setText('')
            self.infod.setText('')
            self.dataset = None
            self.annotated_data = None
            self.Outputs.annotated_data.send(None)
            return

        self.data = dataset
        self.optionsBox.setDisabled(False)
            
        self.numberOfInputInstances = len(self.data)
        self.infoa.setText("%d instances in input data set" % self.numberOfInputInstances)
        numOfclasses = len(self.data.domain.class_var.values)
        self.infob.setText("%d values in the categorical outcome" % numOfclasses)
        
        self.commit()

    def checkCommit(self):
        if self.commitOnChange:
            self.commit()

    def send_data(self):
        self.Outputs.annotated_data.send(self.result_OPTICS)

    def _min_samples_changed(self):
        if self.data is None:
            return
        self.commit()

    def _metric_changed(self):
        if self.data is None:
            return
        self.algorithm_base = 0
        self.commit()

    def _xi_changed(self):
        self.commit()

    def _algorithm_changed(self):
        if self.data is None:
            return

        if self.algorithm_base != 0:
            if OPTICS_METRICS[self.metric_methode][1] not in VALID_METRICS[OPTICS_ALGORITHM[self.algorithm_base][1]]:
                self.algorithm_base = 0

        self.commit()

    def _on_changed(self, value):
        self.cut_point = value
Esempio n. 11
0
    def __init__(self):
        super().__init__()

        self.data = None
        self.dataset = None
        self.annotated_data = None

        # GUI
        infobox = gui.widgetBox(self.controlArea, "Info")
        self.infoa = gui.widgetLabel(infobox, "No data on input yet, waiting to get something.")
        self.infob = gui.widgetLabel(infobox, "")
        self.infoc = gui.widgetLabel(infobox, "")
        self.infod = gui.widgetLabel(infobox, "")

        self.optionsBox = gui.widgetBox(self.controlArea, "OPTICS Options")
        gui.spin(
            self.optionsBox,
            self,
            "minimum_samples",
            minv=1,
            maxv=100,
            step=1,
            label="Core point neighbors ",
            callback=self._min_samples_changed
        )
        gui.comboBox(
            self.optionsBox,
            self,
            "metric_methode",
            orientation=Qt.Horizontal,
            label="Distance metric: ",
            items=[d[0] for d in OPTICS_METRICS],
            callback=self._metric_changed
        )
        gui.doubleSpin(
            self.optionsBox,
            self,
            "xi_value",
            minv=(0.000),
            maxv=(0.999),
            step=(0.001),
            label="Minimum steepness: ",
            callback=self._xi_changed
        )
        gui.comboBox(
            self.optionsBox,
            self,
            "algorithm_base",
            orientation=Qt.Horizontal,
            label="neighborhood algorithm: ",
            items=[d[0] for d in OPTICS_ALGORITHM],
            callback=self._algorithm_changed
        )
        self.optionsBox.setDisabled(True)
        
        gui.auto_apply(self.controlArea, self, "auto_commit")
        gui.rubber(self.controlArea)

        self.controlArea.layout().addStretch()

        self.plot = SliderGraph(
            x_axis_label="Ordering of the points as processed by OPTICS",
            y_axis_label="Reachability distance (epsilon distance)",
            callback=self._on_changed
        )

        self.mainArea.layout().addWidget(self.plot)