def __init__(self): super().__init__() self.data = None self.data_normalized = None self.db = None self.model = None box = gui.widgetBox(self.controlArea, "Parameters") gui.spin(box, self, "min_samples", 1, 100, 1, callback=self._min_samples_changed, label="Core point neighbors") gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01, callback=self._eps_changed, label="Neighborhood distance") box = gui.widgetBox(self.controlArea, self.tr("Distance Metric")) gui.comboBox(box, self, "metric_idx", items=list(zip(*self.METRICS))[0], callback=self._metirc_changed) gui.checkBox(box, self, "normalize", "Normalize features", callback=self._on_normalize_changed) gui.auto_apply(self.buttonsArea, self, "auto_commit") gui.rubber(self.controlArea) self.controlArea.layout().addStretch() self.plot = SliderGraph( x_axis_label="Data items sorted by score", y_axis_label="Distance to the k-th nearest neighbour", callback=self._on_cut_changed ) self.mainArea.layout().addWidget(self.plot)
def __init__(self): super().__init__() self.data = None self.data_normalized = None self.db = None self.model = None box = gui.widgetBox(self.controlArea, "参数") gui.spin(box, self, "min_samples", 1, 100, 1, callback=self._min_samples_changed, label="核心店邻近数(Core point neighbors)") gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01, callback=self._eps_changed, label="临近点距离") box = gui.widgetBox(self.controlArea, self.tr("距离度量")) gui.comboBox(box, self, "metric_idx", items=list(zip(*self.METRICS))[0], callback=self._metirc_changed) gui.auto_apply(self.controlArea, self, "auto_commit") gui.rubber(self.controlArea) self.controlArea.layout().addStretch() self.plot = SliderGraph( x_axis_label="根据评分排序的数据", y_axis_label="到第 k 个最邻近点的距离", callback=self._on_cut_changed ) self.mainArea.layout().addWidget(self.plot)
def __init__(self): super().__init__() self.data = None self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self._init_projector() # Components Selection box = gui.vBox(self.controlArea, "Components Selection") form = QFormLayout() box.layout().addLayout(form) self.components_spin = gui.spin( box, self, "ncomponents", 1, MAX_COMPONENTS, callback=self._update_selection_component_spin, keyboardTracking=False ) self.components_spin.setSpecialValueText("All") self.variance_spin = gui.spin( box, self, "variance_covered", 1, 100, callback=self._update_selection_variance_spin, keyboardTracking=False ) self.variance_spin.setSuffix("%") form.addRow("Components:", self.components_spin) form.addRow("Variance covered:", self.variance_spin) # Options self.options_box = gui.vBox(self.controlArea, "Options") self.normalize_box = gui.checkBox( self.options_box, self, "normalize", "Normalize data", callback=self._update_normalize ) self.maxp_spin = gui.spin( self.options_box, self, "maxp", 1, MAX_COMPONENTS, label="Show only first", callback=self._setup_plot, keyboardTracking=False ) self.controlArea.layout().addStretch() gui.auto_commit(self.controlArea, self, "auto_commit", "Apply", checkbox_label="Apply automatically") self.plot = SliderGraph( "Principal Components", "Proportion of variance", self._on_cut_changed) self.mainArea.layout().addWidget(self.plot) self._update_normalize()
def __init__(self): super().__init__() self.plot = SliderGraph(x_axis_label="label1", y_axis_label="label2", callback=lambda x: x) self.mainArea.layout().addWidget(self.plot)
class OWPCA(widget.OWWidget): name = "PCA" description = "Principal component analysis with a scree-diagram." icon = "icons/PCA.svg" priority = 3050 keywords = ["principal component analysis", "linear transformation"] class Inputs: data = Input("Data", Table) class Outputs: transformed_data = Output("Transformed Data", Table, replaces=["Transformed data"]) data = Output("Data", Table, default=True) components = Output("Components", Table) pca = Output("PCA", PCA, dynamic=False) ncomponents = settings.Setting(2) variance_covered = settings.Setting(100) auto_commit = settings.Setting(True) normalize = settings.Setting(True) maxp = settings.Setting(20) axis_labels = settings.Setting(10) graph_name = "plot.plotItem" class Warning(widget.OWWidget.Warning): trivial_components = widget.Msg( "All components of the PCA are trivial (explain 0 variance). " "Input data is constant (or near constant).") class Error(widget.OWWidget.Error): no_features = widget.Msg("At least 1 feature is required") no_instances = widget.Msg("At least 1 data instance is required") def __init__(self): super().__init__() self.data = None self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self._init_projector() # Components Selection box = gui.vBox(self.controlArea, "Components Selection") form = QFormLayout() box.layout().addLayout(form) self.components_spin = gui.spin( box, self, "ncomponents", 1, MAX_COMPONENTS, callback=self._update_selection_component_spin, keyboardTracking=False) self.components_spin.setSpecialValueText("All") self.variance_spin = gui.spin( box, self, "variance_covered", 1, 100, callback=self._update_selection_variance_spin, keyboardTracking=False) self.variance_spin.setSuffix("%") form.addRow("Components:", self.components_spin) form.addRow("Explained variance:", self.variance_spin) # Options self.options_box = gui.vBox(self.controlArea, "Options") self.normalize_box = gui.checkBox(self.options_box, self, "normalize", "Normalize variables", callback=self._update_normalize) self.maxp_spin = gui.spin(self.options_box, self, "maxp", 1, MAX_COMPONENTS, label="Show only first", callback=self._setup_plot, keyboardTracking=False) self.controlArea.layout().addStretch() gui.auto_apply(self.controlArea, self, "auto_commit") self.plot = SliderGraph("Principal Components", "Proportion of variance", self._on_cut_changed) self.mainArea.layout().addWidget(self.plot) self._update_normalize() @Inputs.data def set_data(self, data): self.clear_messages() self.clear() self.information() self.data = None if not data: self.clear_outputs() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information("Data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) if isinstance(data, Table): if not data.domain.attributes: self.Error.no_features() self.clear_outputs() return if not data: self.Error.no_instances() self.clear_outputs() return self._init_projector() self.data = data self.fit() def fit(self): self.clear() self.Warning.trivial_components.clear() if self.data is None: return data = self.data if self.normalize: self._pca_projector.preprocessors = \ self._pca_preprocessors + [preprocess.Normalize(center=False)] else: self._pca_projector.preprocessors = self._pca_preprocessors if not isinstance(data, SqlTable): pca = self._pca_projector(data) variance_ratio = pca.explained_variance_ratio_ cumulative = numpy.cumsum(variance_ratio) if numpy.isfinite(cumulative[-1]): self.components_spin.setRange(0, len(cumulative)) self._pca = pca self._variance_ratio = variance_ratio self._cumulative = cumulative self._setup_plot() else: self.Warning.trivial_components() self.unconditional_commit() def clear(self): self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self.plot.clear_plot() def clear_outputs(self): self.Outputs.transformed_data.send(None) self.Outputs.data.send(None) self.Outputs.components.send(None) self.Outputs.pca.send(self._pca_projector) def _setup_plot(self): if self._pca is None: self.plot.clear_plot() return explained_ratio = self._variance_ratio explained = self._cumulative cutpos = self._nselected_components() p = min(len(self._variance_ratio), self.maxp) self.plot.update(numpy.arange(1, p + 1), [explained_ratio[:p], explained[:p]], [Qt.red, Qt.darkYellow], cutpoint_x=cutpos, names=LINE_NAMES) self._update_axis() def _on_cut_changed(self, components): if components == self.ncomponents \ or self.ncomponents == 0 \ or self._pca is not None \ and components == len(self._variance_ratio): return self.ncomponents = components if self._pca is not None: var = self._cumulative[components - 1] if numpy.isfinite(var): self.variance_covered = int(var * 100) self._invalidate_selection() def _update_selection_component_spin(self): # cut changed by "ncomponents" spin. if self._pca is None: self._invalidate_selection() return if self.ncomponents == 0: # Special "All" value cut = len(self._variance_ratio) else: cut = self.ncomponents var = self._cumulative[cut - 1] if numpy.isfinite(var): self.variance_covered = int(var * 100) self.plot.set_cut_point(cut) self._invalidate_selection() def _update_selection_variance_spin(self): # cut changed by "max variance" spin. if self._pca is None: return cut = numpy.searchsorted(self._cumulative, self.variance_covered / 100.0) + 1 cut = min(cut, len(self._cumulative)) self.ncomponents = cut self.plot.set_cut_point(cut) self._invalidate_selection() def _update_normalize(self): self.fit() if self.data is None: self._invalidate_selection() def _init_projector(self): self._pca_projector = PCA(n_components=MAX_COMPONENTS, random_state=0) self._pca_projector.component = self.ncomponents self._pca_preprocessors = PCA.preprocessors def _nselected_components(self): """Return the number of selected components.""" if self._pca is None: return 0 if self.ncomponents == 0: # Special "All" value max_comp = len(self._variance_ratio) else: max_comp = self.ncomponents var_max = self._cumulative[max_comp - 1] if var_max != numpy.floor(self.variance_covered / 100.0): cut = max_comp assert numpy.isfinite(var_max) self.variance_covered = int(var_max * 100) else: self.ncomponents = cut = numpy.searchsorted( self._cumulative, self.variance_covered / 100.0) + 1 return cut def _invalidate_selection(self): self.commit() def _update_axis(self): p = min(len(self._variance_ratio), self.maxp) axis = self.plot.getAxis("bottom") d = max((p - 1) // (self.axis_labels - 1), 1) axis.setTicks([[(i, str(i)) for i in range(1, p + 1, d)]]) def commit(self): transformed = data = components = None if self._pca is not None: if self._transformed is None: # Compute the full transform (MAX_COMPONENTS components) once. self._transformed = self._pca(self.data) transformed = self._transformed domain = Domain(transformed.domain.attributes[:self.ncomponents], self.data.domain.class_vars, self.data.domain.metas) transformed = transformed.from_table(domain, transformed) # prevent caching new features by defining compute_value proposed = [a.name for a in self._pca.orig_domain.attributes] meta_name = get_unique_names(proposed, 'components') dom = Domain([ ContinuousVariable(name, compute_value=lambda _: None) for name in proposed ], metas=[StringVariable(name=meta_name)]) metas = numpy.array( [['PC{}'.format(i + 1) for i in range(self.ncomponents)]], dtype=object).T components = Table(dom, self._pca.components_[:self.ncomponents], metas=metas) components.name = 'components' data_dom = Domain(self.data.domain.attributes, self.data.domain.class_vars, self.data.domain.metas + domain.attributes) data = Table.from_numpy(data_dom, self.data.X, self.data.Y, numpy.hstack( (self.data.metas, transformed.X)), ids=self.data.ids) self._pca_projector.component = self.ncomponents self.Outputs.transformed_data.send(transformed) self.Outputs.components.send(components) self.Outputs.data.send(data) self.Outputs.pca.send(self._pca_projector) def send_report(self): if self.data is None: return self.report_items( (("Normalize data", str(self.normalize)), ("Selected components", self.ncomponents), ("Explained variance", "{:.3f} %".format(self.variance_covered)))) self.report_plot() @classmethod def migrate_settings(cls, settings, version): if "variance_covered" in settings: # Due to the error in gh-1896 the variance_covered was persisted # as a NaN value, causing a TypeError in the widgets `__init__`. vc = settings["variance_covered"] if isinstance(vc, numbers.Real): if numpy.isfinite(vc): vc = int(vc) else: vc = 100 settings["variance_covered"] = vc if settings.get("ncomponents", 0) > MAX_COMPONENTS: settings["ncomponents"] = MAX_COMPONENTS # Remove old `decomposition_idx` when SVD was still included settings.pop("decomposition_idx", None) # Remove RemotePCA settings settings.pop("batch_size", None) settings.pop("address", None) settings.pop("auto_update", None)
class OWDBSCAN(widget.OWWidget): name = "DBSCAN" description = "Density-based spatial clustering." icon = "icons/DBSCAN.svg" priority = 2150 class Inputs: data = Input("Data", Table) class Outputs: annotated_data = Output(ANNOTATED_DATA_SIGNAL_NAME, Table) class Error(widget.OWWidget.Error): not_enough_instances = Msg("Not enough unique data instances. " "At least two are required.") METRICS = [ ("Euclidean", "euclidean"), ("Manhattan", "cityblock"), ("Cosine", "cosine") ] min_samples = Setting(4) eps = Setting(0.5) metric_idx = Setting(0) normalize = Setting(True) auto_commit = Setting(True) k_distances = None cut_point = None def __init__(self): super().__init__() self.data = None self.data_normalized = None self.db = None self.model = None box = gui.widgetBox(self.controlArea, "Parameters") gui.spin(box, self, "min_samples", 1, 100, 1, callback=self._min_samples_changed, label="Core point neighbors") gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01, callback=self._eps_changed, label="Neighborhood distance") box = gui.widgetBox(self.controlArea, self.tr("Distance Metric")) gui.comboBox(box, self, "metric_idx", items=list(zip(*self.METRICS))[0], callback=self._metirc_changed) gui.checkBox(box, self, "normalize", "Normalize features", callback=self._on_normalize_changed) gui.auto_apply(self.buttonsArea, self, "auto_commit") gui.rubber(self.controlArea) self.controlArea.layout().addStretch() self.plot = SliderGraph( x_axis_label="Data items sorted by score", y_axis_label="Distance to the k-th nearest neighbour", callback=self._on_cut_changed ) self.mainArea.layout().addWidget(self.plot) def check_data_size(self, data): if data is None: return False if len(data) < 2: self.Error.not_enough_instances() return False return True def commit(self): self.cluster() def cluster(self): if not self.check_data_size(self.data): return self.model = DBSCAN( eps=self.eps, min_samples=self.min_samples, metric=self.METRICS[self.metric_idx][1] ).get_model(self.data_normalized) self.send_data() def _compute_and_plot(self, cut_point=None): self._compute_kdistances() if cut_point is None: self._compute_cut_point() self._plot_graph() def _plot_graph(self): nonzero = np.sum(self.k_distances > EPS_BOTTOM_LIMIT) self.plot.update(np.arange(len(self.k_distances)), [self.k_distances], colors=[QColor('red')], cutpoint_x=self.cut_point, selection_limit=(0, nonzero - 1)) def _compute_kdistances(self): self.k_distances = get_kth_distances( self.data_normalized, metric=self.METRICS[self.metric_idx][1], k=self.min_samples ) def _compute_cut_point(self): self.cut_point = int(DEFAULT_CUT_POINT * len(self.k_distances)) self.eps = self.k_distances[self.cut_point] mask = self.k_distances >= EPS_BOTTOM_LIMIT if self.eps < EPS_BOTTOM_LIMIT and sum(mask): self.eps = np.min(self.k_distances[mask]) self.cut_point = self._find_nearest_dist(self.eps) @Inputs.data def set_data(self, data): self.Error.clear() if not self.check_data_size(data): data = None self.data = self.data_normalized = data if self.data is None: self.Outputs.annotated_data.send(None) self.plot.clear_plot() return if self.data is None: return self._preprocess_data() self._compute_and_plot() self.unconditional_commit() def _preprocess_data(self): self.data_normalized = self.data for pp in PREPROCESSORS: if isinstance(pp, Normalize) and not self.normalize: continue self.data_normalized = pp(self.data_normalized) def send_data(self): model = self.model clusters = [c if c >= 0 else np.nan for c in model.labels] k = len(set(clusters) - {np.nan}) clusters = np.array(clusters) core_samples = set(model.projector.core_sample_indices_) in_core = np.array([1 if (i in core_samples) else 0 for i in range(len(self.data))]) domain = self.data.domain attributes, classes = domain.attributes, domain.class_vars meta_attrs = domain.metas names = [var.name for var in chain(attributes, classes, meta_attrs) if var] u_clust_var = get_unique_names(names, "Cluster") clust_var = DiscreteVariable( u_clust_var, values=["C%d" % (x + 1) for x in range(k)]) u_in_core = get_unique_names(names + [u_clust_var], "DBSCAN Core") in_core_var = DiscreteVariable(u_in_core, values=("0", "1")) new_table = self.data.add_column(clust_var, clusters, to_metas=True) new_table = new_table.add_column(in_core_var, in_core, to_metas=True) self.Outputs.annotated_data.send(new_table) def _invalidate(self): self.commit() def _find_nearest_dist(self, value): array = np.asarray(self.k_distances) idx = (np.abs(array - value)).argmin() return idx def _eps_changed(self): # find the closest value to eps if self.data is None: return self.cut_point = self._find_nearest_dist(self.eps) self.plot.set_cut_point(self.cut_point) self._invalidate() def _metirc_changed(self): if self.data is not None: self._compute_and_plot() self._invalidate() def _on_cut_changed(self, value): # cut changed by means of a cut line over the scree plot. self.cut_point = value self.eps = self.k_distances[value] self.commit() def _min_samples_changed(self): if self.data is None: return self._compute_and_plot(cut_point=self.cut_point) self._invalidate() def _on_normalize_changed(self): if not self.data: return self._preprocess_data() self._compute_and_plot() self._invalidate()
def __init__(self): super().__init__() self.data = None self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self._init_projector() # Components Selection form = QFormLayout() box = gui.widgetBox(self.controlArea, "Components Selection", orientation=form) self.components_spin = gui.spin( box, self, "ncomponents", 1, MAX_COMPONENTS, callback=self._update_selection_component_spin, keyboardTracking=False, addToLayout=False) self.components_spin.setSpecialValueText("All") self.variance_spin = gui.spin( box, self, "variance_covered", 1, 100, callback=self._update_selection_variance_spin, keyboardTracking=False, addToLayout=False) self.variance_spin.setSuffix("%") form.addRow("Components:", self.components_spin) form.addRow("Explained variance:", self.variance_spin) # Options self.options_box = gui.vBox(self.controlArea, "Options") self.normalize_box = gui.checkBox(self.options_box, self, "normalize", "Normalize variables", callback=self._update_normalize, attribute=Qt.WA_LayoutUsesWidgetRect) self.maxp_spin = gui.spin(self.options_box, self, "maxp", 1, MAX_COMPONENTS, label="Show only first", callback=self._setup_plot, keyboardTracking=False) gui.rubber(self.controlArea) gui.auto_apply(self.buttonsArea, self, "auto_commit") self.plot = SliderGraph("Principal Components", "Proportion of variance", self._on_cut_changed) self.mainArea.layout().addWidget(self.plot) self._update_normalize()
class OWDBSCAN(widget.OWWidget): name = "DBSCAN" description = "基于密度的空间聚类." icon = "icons/DBSCAN.svg" priority = 2150 class Inputs: data = Input("数据(Data)", Table, replaces=['Data']) class Outputs: annotated_data = Output(ANNOTATED_DATA_SIGNAL_Chinese_NAME, Table, replaces=['Data']) class Error(widget.OWWidget.Error): not_enough_instances = Msg("Not enough unique data instances. " "At least two are required.") METRICS = [ ("欧几里得", "euclidean"), ("曼哈顿", "cityblock"), ("余弦", "cosine") ] min_samples = Setting(4) eps = Setting(0.5) metric_idx = Setting(0) auto_commit = Setting(True) k_distances = None cut_point = None def __init__(self): super().__init__() self.data = None self.data_normalized = None self.db = None self.model = None box = gui.widgetBox(self.controlArea, "参数") gui.spin(box, self, "min_samples", 1, 100, 1, callback=self._min_samples_changed, label="核心店邻近数(Core point neighbors)") gui.doubleSpin(box, self, "eps", EPS_BOTTOM_LIMIT, 1000, 0.01, callback=self._eps_changed, label="临近点距离") box = gui.widgetBox(self.controlArea, self.tr("距离度量")) gui.comboBox(box, self, "metric_idx", items=list(zip(*self.METRICS))[0], callback=self._metirc_changed) gui.auto_apply(self.controlArea, self, "auto_commit") gui.rubber(self.controlArea) self.controlArea.layout().addStretch() self.plot = SliderGraph( x_axis_label="根据评分排序的数据", y_axis_label="到第 k 个最邻近点的距离", callback=self._on_cut_changed ) self.mainArea.layout().addWidget(self.plot) def check_data_size(self, data): if data is None: return False if len(data) < 2: self.Error.not_enough_instances() return False return True def commit(self): self.cluster() def cluster(self): if not self.check_data_size(self.data): return self.model = DBSCAN( eps=self.eps, min_samples=self.min_samples, metric=self.METRICS[self.metric_idx][1] ).get_model(self.data_normalized) self.send_data() def _compute_and_plot(self, cut_point=None): self._compute_kdistances() if cut_point is None: self._compute_cut_point() self._plot_graph() def _plot_graph(self): nonzero = np.sum(self.k_distances > EPS_BOTTOM_LIMIT) self.plot.update(np.arange(len(self.k_distances)), [self.k_distances], colors=[QColor('red')], cutpoint_x=self.cut_point, selection_limit=(0, nonzero - 1)) def _compute_kdistances(self): self.k_distances = get_kth_distances( self.data_normalized, metric=self.METRICS[self.metric_idx][1], k=self.min_samples ) def _compute_cut_point(self): self.cut_point = int(DEFAULT_CUT_POINT * len(self.k_distances)) self.eps = self.k_distances[self.cut_point] if self.eps < EPS_BOTTOM_LIMIT: self.eps = np.min( self.k_distances[self.k_distances >= EPS_BOTTOM_LIMIT]) self.cut_point = self._find_nearest_dist(self.eps) @Inputs.data def set_data(self, data): self.Error.clear() if not self.check_data_size(data): data = None self.data = self.data_normalized = data if self.data is None: self.Outputs.annotated_data.send(None) self.plot.clear_plot() return if self.data is None: return # preprocess data for pp in PREPROCESSORS: self.data_normalized = pp(self.data_normalized) self._compute_and_plot() self.unconditional_commit() def send_data(self): model = self.model clusters = [c if c >= 0 else np.nan for c in model.labels] k = len(set(clusters) - {np.nan}) clusters = np.array(clusters).reshape(len(self.data), 1) core_samples = set(model.projector.core_sample_indices_) in_core = np.array([1 if (i in core_samples) else 0 for i in range(len(self.data))]) in_core = in_core.reshape(len(self.data), 1) clust_var = DiscreteVariable( "Cluster", values=["C%d" % (x + 1) for x in range(k)]) in_core_var = DiscreteVariable("DBSCAN Core", values=["0", "1"]) domain = self.data.domain attributes, classes = domain.attributes, domain.class_vars meta_attrs = domain.metas x, y, metas = self.data.X, self.data.Y, self.data.metas meta_attrs += (clust_var, ) metas = np.hstack((metas, clusters)) meta_attrs += (in_core_var, ) metas = np.hstack((metas, in_core)) domain = Domain(attributes, classes, meta_attrs) new_table = Table(domain, x, y, metas, self.data.W) self.Outputs.annotated_data.send(new_table) def _invalidate(self): self.commit() def _find_nearest_dist(self, value): array = np.asarray(self.k_distances) idx = (np.abs(array - value)).argmin() return idx def _eps_changed(self): # find the closest value to eps if self.data is None: return self.cut_point = self._find_nearest_dist(self.eps) self.plot.set_cut_point(self.cut_point) self._invalidate() def _metirc_changed(self): if self.data is not None: self._compute_and_plot() self._invalidate() def _on_cut_changed(self, value): # cut changed by means of a cut line over the scree plot. self.cut_point = value self.eps = self.k_distances[value] self.commit() def _min_samples_changed(self): if self.data is None: return self._compute_and_plot(cut_point=self.cut_point) self._invalidate()
def __init__(self): super().__init__() self.data = None self._pca = None self._transformed = None self._variance_ratio = None self._cumulative = None self._init_projector() # Components Selection box = gui.vBox(self.controlArea, "成分选择") form = QFormLayout() box.layout().addLayout(form) self.components_spin = gui.spin( box, self, "ncomponents", 1, MAX_COMPONENTS, callback=self._update_selection_component_spin, keyboardTracking=False) self.components_spin.setSpecialValueText("All") self.variance_spin = gui.spin( box, self, "variance_covered", 1, 100, callback=self._update_selection_variance_spin, keyboardTracking=False) self.variance_spin.setSuffix("%") form.addRow("成分:", self.components_spin) form.addRow("覆盖的贡献(Variance covered):", self.variance_spin) # Options self.options_box = gui.vBox(self.controlArea, "选项") self.normalize_box = gui.checkBox(self.options_box, self, "normalize", "归一化数据", callback=self._update_normalize) self.maxp_spin = gui.spin(self.options_box, self, "maxp", 1, MAX_COMPONENTS, label="只显示前...个", callback=self._setup_plot, keyboardTracking=False) self.controlArea.layout().addStretch() gui.auto_apply(self.controlArea, self, "auto_commit") self.plot = SliderGraph("主成分", "贡献率(Proportion of variance)", self._on_cut_changed) self.mainArea.layout().addWidget(self.plot) self._update_normalize()
class OPTICS_w(widget.OWWidget): name = "OPTICS" description = "dynamicaly clustering unlabeled data by density" icon = "icons/OPTICS.svg" priority = 20 class Inputs: data = Input("Data", Table) class Outputs: annotated_data = Output("Data", Table) class Error(widget.OWWidget.Error): not_enough_instances = Msg("Not enough unique data instances. " "At least two are required.") minimum_samples = settings.Setting(5) metric_methode = settings.Setting(11) xi_value = settings.Setting(0.05) algorithm_base = settings.Setting(0) auto_commit = settings.Setting(False) cut_point = xi_value want_main_area = True def __init__(self): super().__init__() self.data = None self.dataset = None self.annotated_data = None # GUI infobox = gui.widgetBox(self.controlArea, "Info") self.infoa = gui.widgetLabel(infobox, "No data on input yet, waiting to get something.") self.infob = gui.widgetLabel(infobox, "") self.infoc = gui.widgetLabel(infobox, "") self.infod = gui.widgetLabel(infobox, "") self.optionsBox = gui.widgetBox(self.controlArea, "OPTICS Options") gui.spin( self.optionsBox, self, "minimum_samples", minv=1, maxv=100, step=1, label="Core point neighbors ", callback=self._min_samples_changed ) gui.comboBox( self.optionsBox, self, "metric_methode", orientation=Qt.Horizontal, label="Distance metric: ", items=[d[0] for d in OPTICS_METRICS], callback=self._metric_changed ) gui.doubleSpin( self.optionsBox, self, "xi_value", minv=(0.000), maxv=(0.999), step=(0.001), label="Minimum steepness: ", callback=self._xi_changed ) gui.comboBox( self.optionsBox, self, "algorithm_base", orientation=Qt.Horizontal, label="neighborhood algorithm: ", items=[d[0] for d in OPTICS_ALGORITHM], callback=self._algorithm_changed ) self.optionsBox.setDisabled(True) gui.auto_apply(self.controlArea, self, "auto_commit") gui.rubber(self.controlArea) self.controlArea.layout().addStretch() self.plot = SliderGraph( x_axis_label="Ordering of the points as processed by OPTICS", y_axis_label="Reachability distance (epsilon distance)", callback=self._on_changed ) self.mainArea.layout().addWidget(self.plot) def check_data_size(self, data): if data is None: return False if len(data) < 2: self.Error.not_enough_instances() return False return True def normalizing(self,model): clusters = [c if c >= 0 else np.nan for c in model.labels_] k = len(set(clusters) - {np.nan}) clusters = np.array(clusters).reshape(len(self.data), 1) clust_var = DiscreteVariable("Cluster", values=["C%d" % (x + 1) for x in range(k)]) domain = self.data.domain attributes, classes = domain.attributes, domain.class_vars meta_attrs = domain.metas x, y, metas = self.data.X, self.data.Y, self.data.metas meta_attrs += (clust_var, ) metas = np.hstack((metas, clusters)) domain = Domain(attributes, classes, meta_attrs) new_table = Table(domain, x, y, metas, self.data.W) # self.Outputs.annotated_data.send(new_table) return new_table def commit(self): self.cluster() return def cluster(self): if not self.check_data_size(self.data): return model = OPTICS(min_samples=self.minimum_samples, metric=OPTICS_METRICS[self.metric_methode][1], xi=self.xi_value, algorithm=OPTICS_ALGORITHM[self.algorithm_base][1], ) model.fit(self.data.X) self._plot_graph(model) self.result_OPTICS = self.normalizing(model) self.send_data() def _plot_graph(self,model): reachability = model.reachability_[model.ordering_] space = np.arange(len(reachability)) reachability[reachability == np.inf] = np.nanmax(reachability[reachability != np.inf]) labels = model.labels_[model.ordering_] cluster_count = (len(np.unique(labels[labels[:]>=0]))) self.infoc.setText("%d values in the cluster outcome" % cluster_count) noisy_counter = len(space[labels==-1]) self.infod.setText("%d noisy samples in the leaf cluster" % noisy_counter) x_plot = space y_plot = reachability self.plot.clear_plot() colors = np.arange(150, (150+cluster_count)) for klaster, color in zip(range(0, cluster_count), colors): Xk = space[labels == klaster] Rk = reachability[labels == klaster] self.plot.plot(Xk, Rk, pen=mkPen(intColor(color), width=2), antialias=True) self.plot.plot(x_plot[labels==-1], y_plot[labels==-1], pen=mkPen(QColor('black'), width=2), antialias=True) @Inputs.data def set_data(self, dataset): self.Error.clear() if not self.check_data_size(dataset): self.optionsBox.setDisabled(True) self.plot.clear_plot() self.infoa.setText( "No data on input yet, waiting to get something.") self.infob.setText('') self.infoc.setText('') self.infod.setText('') self.dataset = None self.annotated_data = None self.Outputs.annotated_data.send(None) return self.data = dataset self.optionsBox.setDisabled(False) self.numberOfInputInstances = len(self.data) self.infoa.setText("%d instances in input data set" % self.numberOfInputInstances) numOfclasses = len(self.data.domain.class_var.values) self.infob.setText("%d values in the categorical outcome" % numOfclasses) self.commit() def checkCommit(self): if self.commitOnChange: self.commit() def send_data(self): self.Outputs.annotated_data.send(self.result_OPTICS) def _min_samples_changed(self): if self.data is None: return self.commit() def _metric_changed(self): if self.data is None: return self.algorithm_base = 0 self.commit() def _xi_changed(self): self.commit() def _algorithm_changed(self): if self.data is None: return if self.algorithm_base != 0: if OPTICS_METRICS[self.metric_methode][1] not in VALID_METRICS[OPTICS_ALGORITHM[self.algorithm_base][1]]: self.algorithm_base = 0 self.commit() def _on_changed(self, value): self.cut_point = value
def __init__(self): super().__init__() self.data = None self.dataset = None self.annotated_data = None # GUI infobox = gui.widgetBox(self.controlArea, "Info") self.infoa = gui.widgetLabel(infobox, "No data on input yet, waiting to get something.") self.infob = gui.widgetLabel(infobox, "") self.infoc = gui.widgetLabel(infobox, "") self.infod = gui.widgetLabel(infobox, "") self.optionsBox = gui.widgetBox(self.controlArea, "OPTICS Options") gui.spin( self.optionsBox, self, "minimum_samples", minv=1, maxv=100, step=1, label="Core point neighbors ", callback=self._min_samples_changed ) gui.comboBox( self.optionsBox, self, "metric_methode", orientation=Qt.Horizontal, label="Distance metric: ", items=[d[0] for d in OPTICS_METRICS], callback=self._metric_changed ) gui.doubleSpin( self.optionsBox, self, "xi_value", minv=(0.000), maxv=(0.999), step=(0.001), label="Minimum steepness: ", callback=self._xi_changed ) gui.comboBox( self.optionsBox, self, "algorithm_base", orientation=Qt.Horizontal, label="neighborhood algorithm: ", items=[d[0] for d in OPTICS_ALGORITHM], callback=self._algorithm_changed ) self.optionsBox.setDisabled(True) gui.auto_apply(self.controlArea, self, "auto_commit") gui.rubber(self.controlArea) self.controlArea.layout().addStretch() self.plot = SliderGraph( x_axis_label="Ordering of the points as processed by OPTICS", y_axis_label="Reachability distance (epsilon distance)", callback=self._on_changed ) self.mainArea.layout().addWidget(self.plot)