def set_data(self, data): self.information(1) self.__timer.stop() self.sampling.setVisible(False) self.sql_data = None if isinstance(data, SqlTable): if data.approx_len() < 4000: data = Table(data) else: self.information(1, "Large SQL table (showing a sample)") self.sql_data = data data_sample = data.sample_time(0.8, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) self.sampling.setVisible(True) if self.auto_sample: self.__timer.start() if data is not None and (len(data) == 0 or len(data.domain) == 0): data = None if self.data and data and self.data.checksum() == data.checksum(): return self.closeContext() same_domain = self.data and data and data.domain.checksum() == self.data.domain.checksum() self.data = data self.data_metas_X = self.move_primitive_metas_to_X(data) if not same_domain: self.init_attr_values() self.vizrank._initialize() self.vizrank_button.setEnabled( self.data is not None and self.data.domain.class_var is not None and len(self.data.domain.attributes) > 1 ) self.openContext(self.data)
def commit(self): transformed = components = None if self._pca is not None: if self._transformed is None: # Compute the full transform (all components) only once. self._transformed = self._pca(self.data) transformed = self._transformed domain = Domain( transformed.domain.attributes[:self.ncomponents], self.data.domain.class_vars, self.data.domain.metas ) transformed = transformed.from_table(domain, transformed) dom = Domain(self._pca.orig_domain.attributes, metas=[StringVariable(name='component')]) metas = numpy.array([['PC{}'.format(i + 1) for i in range(self.ncomponents)]], dtype=object).T components = Table(dom, self._pca.components_[:self.ncomponents], metas=metas) components.name = 'components' self._pca_projector.component = self.ncomponents self.send("Transformed data", transformed) self.send("Components", components) self.send("PCA", self._pca_projector)
def extend_corpus(self, metadata, Y): """ Append documents to corpus. Args: metadata (numpy.ndarray): Meta data Y (numpy.ndarray): Class variables """ if np.prod(self.X.shape) != 0: raise ValueError("Extending corpus only works when X is empty" "while the shape of X is {}".format(self.X.shape)) self.metas = np.vstack((self.metas, metadata)) cv = self.domain.class_var for val in set(filter(None, Y)): if val not in cv.values: cv.add_value(val) new_Y = np.array([cv.to_val(i) for i in Y])[:, None] self._Y = np.vstack((self._Y, new_Y)) self.X = self.W = np.zeros((self.metas.shape[0], 0)) Table._init_ids(self) self._tokens = None # invalidate tokens
def __init__(self, X=None, Y=None, metas=None, domain=None, text_features=None): """ Args: X (numpy.ndarray): attributes Y (numpy.ndarray): class variables metas (numpy.ndarray): meta attributes; e.g. text domain (Orange.data.domain): the domain for this Corpus text_features (list): meta attributes that are used for text mining. Infer them if None. """ n_doc = _check_arrays(X, Y, metas) self.X = X if X is not None else np.zeros((n_doc, 0)) self.Y = Y if Y is not None else np.zeros((n_doc, 0)) self.metas = metas if metas is not None else np.zeros((n_doc, 0)) self.W = np.zeros((n_doc, 0)) self.domain = domain self.text_features = None # list of text features for mining if domain is not None and text_features is None: self._infer_text_features() elif domain is not None: self.set_text_features(text_features) Table._init_ids(self)
def send_features(self): features = None if self.attr_x or self.attr_y: dom = Domain([], metas=(StringVariable(name="feature"),)) features = Table(dom, [[self.attr_x], [self.attr_y]]) features.name = "Features" self.Outputs.features.send(features)
def __call__(self, data): """ Apply randomization of the given data. Returns a new data table. Parameters ---------- data : Orange.data.Table A data table to be randomized. Returns ------- data : Orange.data.Table Randomized data table. """ new_data = Table(data) new_data.ensure_copy() if self.rand_type == Randomize.RandomizeClasses: self.randomize(new_data.Y) elif self.rand_type == Randomize.RandomizeAttributes: self.randomize(new_data.X) elif self.rand_type == Randomize.RandomizeMetas: self.randomize(new_data.metas) else: raise TypeError('Unsupported type') return new_data
def set_data(self, data): self.information(1) if isinstance(data, SqlTable): if data.approx_len() < 4000: data = Table(data) else: self.information(1, "Data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) if data is not None and (len(data) == 0 or len(data.domain) == 0): data = None if self.data and data and self.data.checksum() == data.checksum(): return self.closeContext() same_domain = \ self.data and data and \ data.domain.checksum() == self.data.domain.checksum() self.data = data self.data_metas_X = self.move_primitive_metas_to_X(data) # TODO: adapt scatter plot to work on SqlTables (avoid use of X and Y) if isinstance(self.data, SqlTable): self.data.download_data() if not same_domain: self.init_attr_values() self.vizrank._initialize() self.vizrank_button.setEnabled( self.data is not None and self.data.domain.class_var is not None and len(self.data.domain.attributes) > 1) self.openContext(self.data)
def set_test_data(self, data): """ Set the input separate testing dataset. """ self.error(1) self.information(1) if data and not data.domain.class_var: self.error(1, "Test data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(1, "Test data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.test_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning(4, self._get_missing_data_warning( self.train_data_missing_vals, self.test_data_missing_vals )) if data: data = RemoveNaNClasses(data) self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate()
def commit(self): transformed = components = pp = None if self._pca is not None: if self._transformed is None: # Compute the full transform (MAX_COMPONENTS components) only once. self._transformed = self._pca(self.data) transformed = self._transformed domain = Domain( transformed.domain.attributes[:self.ncomponents], self.data.domain.class_vars, self.data.domain.metas ) transformed = transformed.from_table(domain, transformed) # prevent caching new features by defining compute_value dom = Domain([ContinuousVariable(a.name, compute_value=lambda _: None) for a in self._pca.orig_domain.attributes], metas=[StringVariable(name='component')]) metas = numpy.array([['PC{}'.format(i + 1) for i in range(self.ncomponents)]], dtype=object).T components = Table(dom, self._pca.components_[:self.ncomponents], metas=metas) components.name = 'components' pp = ApplyDomain(domain, "PCA") self._pca_projector.component = self.ncomponents self.Outputs.transformed_data.send(transformed) self.Outputs.components.send(components) self.Outputs.pca.send(self._pca_projector) self.Outputs.preprocessor.send(pp)
def __call__(self, data): if not self.check_learner_adequacy(data.domain): raise ValueError(self.learner_adequacy_err_msg) origdomain = data.domain if isinstance(data, Instance): data = Table(data.domain, [data]) data = self.preprocess(data) if len(data.domain.class_vars) > 1 and not self.supports_multiclass: raise TypeError("%s doesn't support multiple class variables" % self.__class__.__name__) self.domain = data.domain if type(self).fit is Learner.fit: model = self.fit_storage(data) else: X, Y, W = data.X, data.Y, data.W if data.has_weights() else None model = self.fit(X, Y, W) model.domain = data.domain model.supports_multiclass = self.supports_multiclass model.name = self.name model.original_domain = origdomain return model
def set_train_data(self, data): """ Set the input training dataset. """ self.error(0) self.information(0) if data and not data.domain.class_var: self.error(0, "Train data input requires a class variable") data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information(0, "Train data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.warning(4) self.train_data_missing_vals = data is not None and \ np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.warning(4, self._get_missing_data_warning( self.train_data_missing_vals, self.test_data_missing_vals )) if data: data = RemoveNaNClasses(data) self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate()
def test_constant_data(self): data = Table("iris")[::5] data.X[:, :] = 1.0 self.send_signal("Data", data) self.assertTrue(self.widget.Warning.trivial_components.is_shown()) self.assertIsNone(self.get_output("Transformed Data")) self.assertIsNone(self.get_output("Components"))
def prepare_data(): data = Table("iris") values = list(range(15)) class_var = DiscreteVariable("iris5", values=[str(v) for v in values]) data = data.transform(Domain(attributes=data.domain.attributes, class_vars=[class_var])) data.Y = np.array(values * 10, dtype=float) return data
def test_format_combo(self): widget = self.widget filetype = widget.controls.filetype widget.save_file = Mock() data = Table("iris") sparse_data = Table("iris") sparse_data.is_sparse = Mock(return_value=True) self.send_signal(widget.Inputs.data, data) n_nonsparse = filetype.count() self.send_signal(widget.Inputs.data, sparse_data) n_sparse = filetype.count() self.assertGreater(n_nonsparse, n_sparse) self.send_signal(widget.Inputs.data, sparse_data) self.assertEqual(filetype.count(), n_sparse) self.send_signal(widget.Inputs.data, data) self.assertEqual(filetype.count(), n_nonsparse) self.send_signal(widget.Inputs.data, None) self.send_signal(widget.Inputs.data, data) self.assertEqual(filetype.count(), n_nonsparse) self.send_signal(widget.Inputs.data, None) self.send_signal(widget.Inputs.data, sparse_data) self.assertEqual(filetype.count(), n_sparse)
def set_data(self, data): self.closeContext() self.clear_messages() self.clear() self.information() self.data = None if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.information("Data has been sampled") data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(2000, partial=True) data = Table(data_sample) if isinstance(data, Table): if len(data.domain.attributes) == 0: self.Error.no_features() self.clear_outputs() return if len(data) == 0: self.Error.no_instances() self.clear_outputs() return self.openContext(data) self._init_projector() self.data = data self.fit()
def setUp(self): self.cont_data = Table.from_list( self.cont_domain, [[1, 3, 2], [-1, 5, 0], [1, 1, 1], [7, 2, 3]]) self.cont_data2 = Table.from_list( self.cont_domain, [[2, 1, 3], [1, 2, 2]] ) self.disc_data = Table.from_list( self.disc_domain, [[0, 0, 0], [0, 1, 1], [1, 3, 1]] ) self.disc_data4 = Table.from_list( self.disc_domain, [[0, 0, 0], [0, 1, 1], [0, 1, 1], [1, 3, 1]] ) self.mixed_data = self.data = Table.from_numpy( self.domain, np.hstack((self.cont_data.X[:3], self.disc_data.X)))
def test_inputs_check_sql(self): """Test if check_sql_input is called when data is sent to a widget.""" d = Table() self.send_signal(self.widget.Inputs.data, d) self.assertIs(self.widget.pop_called_with(), d) a_table = object() with patch("Orange.widgets.utils.sql.Table", MagicMock(return_value=a_table)) as table_mock: d = SqlTable(None, None, MagicMock()) d.approx_len = MagicMock(return_value=AUTO_DL_LIMIT - 1) self.send_signal(self.widget.Inputs.data, d) table_mock.assert_called_once_with(d) self.assertIs(self.widget.pop_called_with(), a_table) table_mock.reset_mock() d.approx_len = MagicMock(return_value=AUTO_DL_LIMIT + 1) self.send_signal(self.widget.Inputs.data, d) table_mock.assert_not_called() self.assertIs(self.widget.pop_called_with(), None) self.assertTrue(self.widget.Error.download_sql_data.is_shown()) table_mock.reset_mock() self.send_signal(self.widget.Inputs.data, None) table_mock.assert_not_called() self.assertIs(self.widget.pop_called_with(), None) self.assertFalse(self.widget.Error.download_sql_data.is_shown())
def test_data_with_similarity(self): widget = self.widget indices = np.array([5, 10, 15, 100]) data = Table("iris") widget.data = data widget.distances = np.arange(1000, 1150).astype(float) neighbours = widget._data_with_similarity(indices) self.assertEqual(neighbours.metas.shape, (4, 1)) np.testing.assert_almost_equal( neighbours.metas.flatten(), indices + 1000) np.testing.assert_almost_equal(neighbours.X, data.X[indices]) domain = data.domain domain2 = Domain([domain[2]], domain.class_var, metas=domain[:2]) data2 = data.transform(domain2) widget.data = data2 widget.distances = np.arange(1000, 1150).astype(float) neighbours = widget._data_with_similarity(indices) self.assertEqual(len(neighbours.domain.metas), 3) self.assertEqual(neighbours.metas.shape, (4, 3)) np.testing.assert_almost_equal( neighbours.get_column_view("distance")[0], indices + 1000) np.testing.assert_almost_equal(neighbours.X, data2.X[indices])
def test_constant_data(self): data = Table("iris")[::5] data.X[:, :] = 1.0 self.send_signal(self.widget.Inputs.data, data) self.assertTrue(self.widget.Warning.trivial_components.is_shown()) self.assertIsNone(self.get_output(self.widget.Outputs.transformed_data)) self.assertIsNone(self.get_output(self.widget.Outputs.components))
def test_varying_between_combined(self): X = np.array([[0, 0, 0, 0, 0, 1,], [0, 0, 1, 1, 0, 1,], [0, 0, 0, 2, np.nan, np.nan,], [0, 1, 0, 0, 0, 0,], [0, 1, 0, 2, 0, 0,], [0, 1, 0, 0, np.nan, 0,]]) M = np.array([["A", 0, 0, 0, 0, 0, 1,], ["A", 0, 0, 1, 1, 0, 1,], ["A", 0, 0, 0, 2, np.nan, np.nan,], ["B", 0, 1, 0, 0, 0, 0,], ["B", 0, 1, 0, 2, 0, 0,], ["B", 0, 1, 0, 0, np.nan, 0,]], dtype=str) variables = [ContinuousVariable(name="F%d" % j) for j in range(X.shape[1])] metas = [StringVariable(name="M%d" % j) for j in range(M.shape[1])] domain = Domain(attributes=variables, metas=metas) data = Table.from_numpy(X=X, domain=domain, metas=M) self.assertEqual(varying_between(data, idvar=data.domain.metas[0]), [variables[2], variables[3], metas[3], metas[4], metas[5], metas[6]]) # scipy.sparse uses matrix; this filter can be removed when it's fixed warnings.filterwarnings( "ignore", ".*the matrix subclass.*", PendingDeprecationWarning) data = Table.from_numpy(X=sp.csr_matrix(X), domain=domain, metas=M) self.assertEqual(varying_between(data, idvar=data.domain.metas[0]), [variables[2], variables[3], metas[3], metas[4], metas[5], metas[6]])
def test_do_not_recluster_on_same_data(self): """Do not recluster data points when targets or metas change.""" # Prepare some dummy data x = np.eye(5) y1, y2 = np.ones((5, 1)), np.ones((5, 2)) meta1, meta2 = np.ones((5, 1)), np.ones((5, 2)) table1 = Table.from_numpy( domain=Domain.from_numpy(X=x, Y=y1, metas=meta1), X=x, Y=y1, metas=meta1, ) # X is same, should not cause update table2 = Table.from_numpy( domain=Domain.from_numpy(X=x, Y=y2, metas=meta2), X=x, Y=y2, metas=meta2, ) # X is different, should cause update table3 = table1.copy() table3.X[:, 0] = 1 with patch.object(self.widget, 'commit') as commit: self.send_signal(self.widget.Inputs.data, table1) self.commit_and_wait() call_count = commit.call_count # Sending data with same X should not recompute the clustering self.send_signal(self.widget.Inputs.data, table2) self.commit_and_wait() self.assertEqual(call_count, commit.call_count) # Sending data with different X should recompute the clustering self.send_signal(self.widget.Inputs.data, table3) self.commit_and_wait() self.assertEqual(call_count + 1, commit.call_count)
def check_data(self): self.clear_messages() self.__timer.stop() self.sampling.setVisible(False) self.sql_data = None if isinstance(self.data, SqlTable): if self.data.approx_len() < 4000: self.data = Table(self.data) else: self.Information.sampled_sql() self.sql_data = self.data data_sample = self.data.sample_time(0.8, no_cache=True) data_sample.download_data(2000, partial=True) self.data = Table(data_sample) self.sampling.setVisible(True) if self.auto_sample: self.__timer.start() if self.data is not None: if not self.data.domain.has_continuous_attributes(True, True): self.Warning.no_continuous_vars() self.data = None if self.data is not None and (len(self.data) == 0 or len(self.data.domain) == 0): self.data = None
def test_wrong_input(self): # no data self.data = None self.send_signal(self.widget.Inputs.data, self.data) self.assertIsNone(self.widget.data) # <2 rows self.data = Table(self.domain, [[1, 2, 3, 4, 5, 'STG1']]) self.send_signal(self.widget.Inputs.data, self.data) self.assertIsNone(self.widget.data) self.assertTrue(self.widget.Error.not_enough_rows.is_shown()) # no attributes self.data = Table(self.empty_domain, [['STG1']] * 2) self.send_signal(self.widget.Inputs.data, self.data) self.assertIsNone(self.widget.data) self.assertTrue(self.widget.Error.no_attributes.is_shown()) # constant data self.data = Table(self.domain, [[1, 2, 3, 4, 5, 'STG1']] * 2) self.send_signal(self.widget.Inputs.data, self.data) self.assertIsNone(self.widget.data) self.assertTrue(self.widget.Error.constant_data.is_shown()) # correct input self.data = Table(self.domain, [[1, 2, 3, 4, 5, 'STG1'], [5, 4, 3, 2, 1, 'STG1']]) self.send_signal(self.widget.Inputs.data, self.data) self.assertIsNotNone(self.widget.data) self.assertFalse(self.widget.Error.not_enough_rows.is_shown()) self.assertFalse(self.widget.Error.no_attributes.is_shown()) self.assertFalse(self.widget.Error.constant_data.is_shown())
def set_train_data(self, data): """ Set the input training dataset. Parameters ---------- data : Optional[Orange.data.Table] """ self.Information.data_sampled.clear() self.Error.train_data_empty.clear() self.Error.class_required.clear() self.Error.too_many_classes.clear() self.Error.no_class_values.clear() self.Error.only_one_class_var_value.clear() if data is not None and not len(data): self.Error.train_data_empty() data = None if data: conds = [not data.domain.class_vars, len(data.domain.class_vars) > 1, np.isnan(data.Y).all(), data.domain.has_discrete_class and len(data.domain.class_var.values) == 1] errors = [self.Error.class_required, self.Error.too_many_classes, self.Error.no_class_values, self.Error.only_one_class_var_value] for cond, error in zip(conds, errors): if cond: error() data = None break if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = \ data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = HasClass()(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() self._update_scorers() self._update_controls() if data is not None: self._update_class_selection() self.openContext(data.domain) if self.fold_feature_selected and bool(self.feature_model): self.resampling = OWTestLearners.FeatureFold self._invalidate()
def commit(self): if self.data is None or self.cont_data is None: self.Outputs.data.send(self.data) self.Outputs.features.send(None) self.Outputs.correlations.send(None) return attrs = [ContinuousVariable("Correlation"), ContinuousVariable("FDR")] metas = [StringVariable("Feature 1"), StringVariable("Feature 2")] domain = Domain(attrs, metas=metas) model = self.vizrank.rank_model x = np.array([[float(model.data(model.index(row, 0), role)) for role in (Qt.DisplayRole, CorrelationRank.PValRole)] for row in range(model.rowCount())]) x[:, 1] = FDR(list(x[:, 1])) # pylint: disable=protected-access m = np.array([[a.name for a in model.data(model.index(row, 0), CorrelationRank._AttrRole)] for row in range(model.rowCount())], dtype=object) corr_table = Table(domain, x, metas=m) corr_table.name = "Correlations" self.Outputs.data.send(self.data) # data has been imputed; send original attributes self.Outputs.features.send(AttributeList( [self.data.domain[name] for name, _ in self.selection])) self.Outputs.correlations.send(corr_table)
def set_test_data(self, data): """ Set the input separate testing dataset. """ self.Information.test_data_sampled.clear() if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required_test.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.test_data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.test_data_missing_vals = data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.test_data = data if self.resampling == OWTestLearners.TestOnTest: self._invalidate()
class SVMTest(unittest.TestCase): def setUp(self): self.data = Table('ionosphere') self.data.shuffle() def test_SVM(self): learn = SVMLearner() res = CrossValidation(self.data, [learn], k=2) self.assertGreater(CA(res)[0], 0.9) def test_LinearSVM(self): learn = LinearSVMLearner() res = CrossValidation(self.data, [learn], k=2) self.assertTrue(0.8 < CA(res)[0] < 0.9) def test_NuSVM(self): learn = NuSVMLearner(nu=0.01) res = CrossValidation(self.data, [learn], k=2) self.assertGreater(CA(res)[0], 0.9) def test_SVR(self): nrows, ncols = 200, 5 X = np.random.rand(nrows, ncols) y = X.dot(np.random.rand(ncols)) data = Table(X, y) learn = SVRLearner(kernel='rbf', gamma=0.1) res = CrossValidation(data, [learn], k=2) self.assertLess(RMSE(res)[0], 0.15) def test_NuSVR(self): nrows, ncols = 200, 5 X = np.random.rand(nrows, ncols) y = X.dot(np.random.rand(ncols)) data = Table(X, y) learn = NuSVRLearner(kernel='rbf', gamma=0.1) res = CrossValidation(data, [learn], k=2) self.assertLess(RMSE(res)[0], 0.1) def test_OneClassSVM(self): np.random.seed(42) domain = Domain((ContinuousVariable("c1"), ContinuousVariable("c2"))) X_in = 0.3 * np.random.randn(40, 2) X_out = np.random.uniform(low=-4, high=4, size=(20, 2)) X_all = Table(domain, np.r_[X_in + 2, X_in - 2, X_out]) n_true_in = len(X_in) * 2 n_true_out = len(X_out) nu = 0.2 learner = OneClassSVMLearner(nu=nu) cls = learner(X_all) y_pred = cls(X_all) n_pred_out_all = np.sum(y_pred == -1) n_pred_in_true_in = np.sum(y_pred[:n_true_in] == 1) n_pred_out_true_out = np.sum(y_pred[- n_true_out:] == -1) self.assertTrue(all(np.absolute(y_pred) == 1)) self.assertTrue(n_pred_out_all <= len(X_all) * nu) self.assertTrue(np.absolute(n_pred_out_all - n_true_out) < 2) self.assertTrue(np.absolute(n_pred_in_true_in - n_true_in) < 4) self.assertTrue(np.absolute(n_pred_out_true_out - n_true_out) < 3)
def test_varying_between_combined(self): X = np.array([[0, 0, 0, 0, 0, 1,], [0, 0, 1, 1, 0, 1,], [0, 0, 0, 2, np.nan, np.nan,], [0, 1, 0, 0, 0, 0,], [0, 1, 0, 2, 0, 0,], [0, 1, 0, 0, np.nan, 0,]]) M = np.array([["A", 0, 0, 0, 0, 0, 1,], ["A", 0, 0, 1, 1, 0, 1,], ["A", 0, 0, 0, 2, np.nan, np.nan,], ["B", 0, 1, 0, 0, 0, 0,], ["B", 0, 1, 0, 2, 0, 0,], ["B", 0, 1, 0, 0, np.nan, 0,]], dtype=str) variables = [ContinuousVariable(name="F%d" % j) for j in range(X.shape[1])] metas = [StringVariable(name="M%d" % j) for j in range(M.shape[1])] domain = Domain(attributes=variables, metas=metas) data = Table.from_numpy(X=X, domain=domain, metas=M) self.assertEqual(varying_between(data, idvar=data.domain.metas[0]), [variables[2], variables[3], metas[3], metas[4], metas[5], metas[6]]) data = Table.from_numpy(X=sp.csr_matrix(X), domain=domain, metas=M) self.assertEqual(varying_between(data, idvar=data.domain.metas[0]), [variables[2], variables[3], metas[3], metas[4], metas[5], metas[6]])
def set_train_data(self, data): """ Set the input training dataset. """ self.Information.data_sampled.clear() if data and not data.domain.class_var: self.Error.class_required() data = None else: self.Error.class_required.clear() if isinstance(data, SqlTable): if data.approx_len() < AUTO_DL_LIMIT: data = Table(data) else: self.Information.data_sampled() data_sample = data.sample_time(1, no_cache=True) data_sample.download_data(AUTO_DL_LIMIT, partial=True) data = Table(data_sample) self.train_data_missing_vals = data is not None and np.isnan(data.Y).any() if self.train_data_missing_vals or self.test_data_missing_vals: self.Warning.missing_data(self._which_missing_data()) if data: data = RemoveNaNClasses(data) else: self.Warning.missing_data.clear() self.data = data self.closeContext() if data is not None: self._update_class_selection() self.openContext(data.domain.class_var) self._invalidate()
def _send_output_signals(self, embeddings): skipped_images_bool = np.array([x is None for x in embeddings]) if np.any(skipped_images_bool): skipped_images = self._input_data[skipped_images_bool] skipped_images = Table(skipped_images) skipped_images.ids = self._input_data.ids[skipped_images_bool] self.send(_Output.SKIPPED_IMAGES, skipped_images) else: self.send(_Output.SKIPPED_IMAGES, None) embedded_images_bool = np.logical_not(skipped_images_bool) if np.any(embedded_images_bool): embedded_images = self._input_data[embedded_images_bool] embeddings = embeddings[embedded_images_bool] embeddings = np.stack(embeddings) embedded_images = self._construct_output_data_table( embedded_images, embeddings ) embedded_images.ids = self._input_data.ids[embedded_images_bool] self.send(_Output.EMBEDDINGS, embedded_images) else: self.send(_Output.EMBEDDINGS, None)
def test_clone_context(self): context = self.handler.new_context() iris = Table('iris') attrs, metas = self.handler.encode_domain(iris.domain) self.handler.clone_context(context, iris.domain, attrs, metas)
def setUp(self): self.widget = self.create_widget( OWUnivariateRegression) # type: OWUnivariateRegression self.data = Table("iris") self.data_housing = Table("housing")
def test_data_attributes(self): """No crash on data attributes of different types""" data = Table("iris") data.attributes = {"att 1": 1, "att 2": True, "att 3": 3} self.send_signal(self.widget.Inputs.data, data)
def test_empty_data(self): """No crash on empty data""" data = Table("iris") self.send_signal(self.widget.Inputs.data, Table.from_domain(data.domain))
def test_data(self): """No crash on iris""" data = Table("iris") self.send_signal(self.widget.Inputs.data, data)
def setUp(self) -> None: super().setUp() data = Table("brown-selected") self.model = RichTableModel(data) self.view.setModel(self.model)
def apply(self): self.clear_messages() transformed_data = None if self.data and self.template_domain is not None: try: transformed_data = self.data.transform(self.template_domain) except Exception as ex: # pylint: disable=broad-except self.Error.error(ex) data = transformed_data self.transformed_info = describe_data(data) self.Outputs.transformed_data.send(data) self.set_template_label_text() self.set_output_label_text(data) def send_report(self): if self.data: self.report_data("Data", self.data) if self.template_domain is not None: self.report_domain("Template data", self.template_domain) if self.transformed_info: self.report_items("Transformed data", self.transformed_info) if __name__ == "__main__": # pragma: no cover from Orange.preprocess import Discretize table = Table("iris") WidgetPreview(OWTransform).run(set_data=table, set_template_data=Discretize()(table))
def setUp(self): self.data = Table("iris")
def setUp(self) -> None: self.widget = self.create_widget(OWGroupBy) self.iris = Table("iris") self.data = create_sample_data()
def setUpClass(cls): cls.data = Table(test_filename('datasets/ionosphere.tab')) cls.data.shuffle()
class TestOWDBSCAN(WidgetTest): def setUp(self): self.widget = self.create_widget(OWDBSCAN) self.iris = Table("iris") def tearDown(self): self.widgets.remove(self.widget) self.widget.onDeleteWidget() self.widget = None def test_cluster(self): w = self.widget self.send_signal(w.Inputs.data, self.iris) output = self.get_output(w.Outputs.annotated_data) self.assertIsNotNone(output) self.assertEqual(len(self.iris), len(output)) self.assertTupleEqual(self.iris.X.shape, output.X.shape) self.assertTupleEqual(self.iris.Y.shape, output.Y.shape) self.assertEqual(2, output.metas.shape[1]) self.assertEqual("Cluster", str(output.domain.metas[0])) self.assertEqual("DBSCAN Core", str(output.domain.metas[1])) def test_unique_domain(self): w = self.widget data = possible_duplicate_table("Cluster") self.send_signal(w.Inputs.data, data) output = self.get_output(w.Outputs.annotated_data) self.assertEqual(output.domain.metas[0].name, "Cluster (1)") def test_bad_input(self): w = self.widget self.send_signal(w.Inputs.data, self.iris[:1]) self.assertTrue(w.Error.not_enough_instances.is_shown()) self.send_signal(w.Inputs.data, self.iris[:2]) self.assertFalse(w.Error.not_enough_instances.is_shown()) self.send_signal(w.Inputs.data, self.iris) self.assertFalse(w.Error.not_enough_instances.is_shown()) def test_data_none(self): w = self.widget self.send_signal(w.Inputs.data, None) output = self.get_output(w.Outputs.annotated_data) self.assertIsNone(output) def test_change_eps(self): w = self.widget self.send_signal(w.Inputs.data, self.iris) # change parameters self.widget.controls.eps.valueChanged.emit(0.5) output1 = self.get_output(w.Outputs.annotated_data) self.widget.controls.eps.valueChanged.emit(1) output2 = self.get_output(w.Outputs.annotated_data) # on this data higher eps has greater sum of clusters - less nan # values self.assertGreater(np.nansum(output2.metas[:, 0]), np.nansum(output1.metas[:, 0])) # try when no data self.send_signal(w.Inputs.data, None) self.widget.controls.eps.valueChanged.emit(0.5) output = self.get_output(w.Outputs.annotated_data) self.assertIsNone(output) def test_change_min_samples(self): w = self.widget self.send_signal(w.Inputs.data, self.iris) # change parameters self.widget.controls.min_samples.valueChanged.emit(5) output1 = self.get_output(w.Outputs.annotated_data) self.widget.controls.min_samples.valueChanged.emit(1) output2 = self.get_output(w.Outputs.annotated_data) # on this data lower min_samples has greater sum of clusters - less nan # values self.assertGreater(np.nansum(output2.metas[:, 0]), np.nansum(output1.metas[:, 0])) # try when no data self.send_signal(w.Inputs.data, None) self.widget.controls.min_samples.valueChanged.emit(3) output = self.get_output(w.Outputs.annotated_data) self.assertIsNone(output) def test_change_metric_idx(self): w = self.widget self.send_signal(w.Inputs.data, self.iris) # change parameters cbox = self.widget.controls.metric_idx simulate.combobox_activate_index(cbox, 0) # Euclidean output1 = self.get_output(w.Outputs.annotated_data) simulate.combobox_activate_index(cbox, 1) # Manhattan output2 = self.get_output(w.Outputs.annotated_data) # Manhattan has more nan clusters self.assertGreater(np.nansum(output1.metas[:, 0]), np.nansum(output2.metas[:, 0])) # try when no data self.send_signal(w.Inputs.data, None) cbox = self.widget.controls.metric_idx simulate.combobox_activate_index(cbox, 0) # Euclidean def test_sparse_csr_data(self): with self.iris.unlocked(): self.iris.X = csr_matrix(self.iris.X) w = self.widget self.send_signal(w.Inputs.data, self.iris) output = self.get_output(w.Outputs.annotated_data) self.assertIsNotNone(output) self.assertEqual(len(self.iris), len(output)) self.assertTupleEqual(self.iris.X.shape, output.X.shape) self.assertTupleEqual(self.iris.Y.shape, output.Y.shape) self.assertEqual(2, output.metas.shape[1]) self.assertEqual("Cluster", str(output.domain.metas[0])) self.assertEqual("DBSCAN Core", str(output.domain.metas[1])) def test_sparse_csc_data(self): with self.iris.unlocked(): self.iris.X = csc_matrix(self.iris.X) w = self.widget self.send_signal(w.Inputs.data, self.iris) output = self.get_output(w.Outputs.annotated_data) self.assertIsNotNone(output) self.assertEqual(len(self.iris), len(output)) self.assertTupleEqual(self.iris.X.shape, output.X.shape) self.assertTupleEqual(self.iris.Y.shape, output.Y.shape) self.assertEqual(2, output.metas.shape[1]) self.assertEqual("Cluster", str(output.domain.metas[0])) self.assertEqual("DBSCAN Core", str(output.domain.metas[1])) def test_get_kth_distances(self): dists = get_kth_distances(self.iris, "euclidean", k=5) self.assertEqual(len(self.iris), len(dists)) # dists must be sorted np.testing.assert_array_equal(dists, np.sort(dists)[::-1]) # test with different distance - e.g. Orange distance dists = get_kth_distances(self.iris, Euclidean, k=5) self.assertEqual(len(self.iris), len(dists)) # dists must be sorted np.testing.assert_array_equal(dists, np.sort(dists)[::-1]) def test_metric_changed(self): w = self.widget self.send_signal(w.Inputs.data, self.iris) cbox = w.controls.metric_idx simulate.combobox_activate_index(cbox, 2) output = self.get_output(w.Outputs.annotated_data) self.assertIsNotNone(output) self.assertEqual(len(self.iris), len(output)) self.assertTupleEqual(self.iris.X.shape, output.X.shape) self.assertTupleEqual(self.iris.Y.shape, output.Y.shape) def test_large_data(self): """ When data has less than 1000 instances they are subsampled in k-values computation. """ w = self.widget data = Table(self.iris.domain, np.repeat(self.iris.X, 10, axis=0), np.repeat(self.iris.Y, 10, axis=0)) self.send_signal(w.Inputs.data, data) output = self.get_output(w.Outputs.annotated_data) self.assertEqual(len(data), len(output)) self.assertTupleEqual(data.X.shape, output.X.shape) self.assertTupleEqual(data.Y.shape, output.Y.shape) self.assertEqual(2, output.metas.shape[1]) def test_titanic(self): """ Titanic is a data-set with many 0 in k-nearest neighbours and thus some manipulation is required to set cut-point. This test checks whether widget works on those type of data. """ w = self.widget data = Table("titanic") self.send_signal(w.Inputs.data, data) def test_data_retain_ids(self): self.send_signal(self.widget.Inputs.data, self.iris) output = self.get_output(self.widget.Outputs.annotated_data) np.testing.assert_array_equal(self.iris.ids, output.ids) def test_missing_data(self): w = self.widget with self.iris.unlocked(): self.iris[1:5, 1] = np.nan self.send_signal(w.Inputs.data, self.iris) output = self.get_output(w.Outputs.annotated_data) self.assertTupleEqual((150, 1), output[:, "Cluster"].metas.shape) def test_normalize_data(self): # not normalized self.widget.controls.normalize.setChecked(False) data = Table("heart_disease") self.send_signal(self.widget.Inputs.data, data) kwargs = { "eps": self.widget.eps, "min_samples": self.widget.min_samples, "metric": "euclidean" } clusters = DBSCAN(**kwargs)(data) output = self.get_output(self.widget.Outputs.annotated_data) output_clusters = output.metas[:, 0].copy() output_clusters[np.isnan(output_clusters)] = -1 np.testing.assert_array_equal(output_clusters, clusters) # normalized self.widget.controls.normalize.setChecked(True) kwargs = { "eps": self.widget.eps, "min_samples": self.widget.min_samples, "metric": "euclidean" } for pp in (Continuize(), Normalize(), SklImpute()): data = pp(data) clusters = DBSCAN(**kwargs)(data) output = self.get_output(self.widget.Outputs.annotated_data) output_clusters = output.metas[:, 0].copy() output_clusters[np.isnan(output_clusters)] = -1 np.testing.assert_array_equal(output_clusters, clusters) def test_normalize_changed(self): self.send_signal(self.widget.Inputs.data, self.iris) simulate.combobox_run_through_all(self.widget.controls.metric_idx) self.widget.controls.normalize.setChecked(False) simulate.combobox_run_through_all(self.widget.controls.metric_idx)
self.clear() self.data = None self.shutdown() super().onDeleteWidget() @classmethod def migrate_settings(cls, settings, version): if version < 3: if "selection_indices" in settings: settings["selection"] = settings["selection_indices"] if version < 4: settings.pop("max_iter", None) @classmethod def migrate_context(cls, context, version): if version < 3: values = context.values values["attr_color"] = values["graph"]["attr_color"] values["attr_size"] = values["graph"]["attr_size"] values["attr_shape"] = values["graph"]["attr_shape"] values["attr_label"] = values["graph"]["attr_label"] if __name__ == "__main__": import sys data = Table(sys.argv[1] if len(sys.argv) > 1 else "iris") WidgetPreview(OWtSNE).run( set_data=data, set_subset_data=data[np.random.choice(len(data), 10)], )
def test_string_variables(self): self.send_signal(self.widget.Inputs.data, Table("zoo"))
def setUp(self): self.widget = self.create_widget(OWDBSCAN) self.iris = Table("iris")
def data_table(cls, data, headers=None): """ Return Orange.data.Table given rows of `headers` (iterable of iterable) and rows of `data` (iterable of iterable). Basically, the idea of subclasses is to produce those two iterables, however they might. If `headers` is not provided, the header rows are extracted from `data`, assuming they precede it. """ if not headers: headers, data = cls.parse_headers(data) # Consider various header types (single-row, two-row, three-row, none) if len(headers) == 3: names, types, flags = map(list, headers) else: if len(headers) == 1: HEADER1_FLAG_SEP = '#' # First row format either: # 1) delimited column names # 2) -||- with type and flags prepended, separated by #, # e.g. d#sex,c#age,cC#IQ _flags, names = zip(*[ i.split(HEADER1_FLAG_SEP, 1) if HEADER1_FLAG_SEP in i else ('', i) for i in headers[0] ]) names = list(names) elif len(headers) == 2: names, _flags = map(list, headers) else: # Use heuristics for everything names, _flags = [], [] types = [ ''.join(filter(str.isupper, flag)).lower() for flag in _flags ] flags = [Flags.join(filter(str.islower, flag)) for flag in _flags] # Determine maximum row length rowlen = max(map(len, (names, types, flags))) strip = False def _equal_length(lst): nonlocal strip if len(lst) > rowlen > 0: lst = lst[:rowlen] strip = True elif len(lst) < rowlen: lst.extend([''] * (rowlen - len(lst))) return lst # Ensure all data is of equal width in a column-contiguous array data = [ _equal_length([s.strip() for s in row]) for row in data if any(row) ] data = np.array(data, dtype=object, order='F') if strip: warnings.warn("Columns with no headers were removed.") # Data may actually be longer than headers were try: rowlen = data.shape[1] except IndexError: pass else: for lst in (names, types, flags): _equal_length(lst) NAMEGEN = namegen('Feature ', 1) Xcols, attrs = [], [] Mcols, metas = [], [] Ycols, clses = [], [] Wcols = [] # Rename variables if necessary # Reusing across files still works if both files have same duplicates name_counts = Counter(names) del name_counts[""] if len(name_counts) != len(names) and name_counts: uses = { name: 0 for name, count in name_counts.items() if count > 1 } for i, name in enumerate(names): if name in uses: uses[name] += 1 names[i] = "{}_{}".format(name, uses[name]) namask = np.empty(data.shape[0], dtype=bool) # Iterate through the columns for col in range(rowlen): flag = Flags(Flags.split(flags[col])) if flag.i: continue type_flag = types and types[col].strip() try: orig_values = data[:, col] except IndexError: orig_values = np.array([], dtype=object) namask = isnastr(orig_values, out=namask) coltype_kwargs = {} valuemap = None values = orig_values if type_flag in StringVariable.TYPE_HEADERS: coltype = StringVariable values = orig_values elif type_flag in ContinuousVariable.TYPE_HEADERS: coltype = ContinuousVariable values = np.empty(data.shape[0], dtype=float) try: np.copyto(values, orig_values, casting="unsafe", where=~namask) values[namask] = np.nan except ValueError: for row, num in enumerate(orig_values): if not isnastr(num): try: float(num) except ValueError: break raise ValueError('Non-continuous value in (1-based) ' 'line {}, column {}'.format( row + len(headers) + 1, col + 1)) elif type_flag in TimeVariable.TYPE_HEADERS: coltype = TimeVariable values = np.where(namask, "", orig_values) elif (type_flag in DiscreteVariable.TYPE_HEADERS or _RE_DISCRETE_LIST.match(type_flag)): coltype = DiscreteVariable orig_values = values = np.where(namask, "", orig_values) if _RE_DISCRETE_LIST.match(type_flag): valuemap = Flags.split(type_flag) coltype_kwargs.update(ordered=True) else: valuemap = sorted(set(orig_values) - {""}) else: # No known type specified, use heuristics valuemap, values, coltype = guess_data_type( orig_values, namask) if flag.m or coltype is StringVariable: append_to = (Mcols, metas) elif flag.w: append_to = (Wcols, None) elif flag.c: append_to = (Ycols, clses) else: append_to = (Xcols, attrs) cols, domain_vars = append_to if domain_vars is not None: var_name = names and names[col] if not var_name: var_name = next(NAMEGEN) values, var = sanitize_variable(valuemap, values, orig_values, coltype, coltype_kwargs, name=var_name) else: var = None if domain_vars is not None: var.attributes.update(flag.attributes) domain_vars.append(var) if isinstance(values, np.ndarray) and not values.flags.owndata: values = values.copy() # might view `data` (string columns) cols.append(values) try: # allow gc to reclaim memory used by string values data[:, col] = None except IndexError: pass domain = Domain(attrs, clses, metas) if not data.size: return Table.from_domain(domain, 0) X = Y = M = W = None if Xcols: X = np.c_[tuple(Xcols)] assert X.dtype == np.float_ else: X = np.empty((data.shape[0], 0), dtype=np.float_) if Ycols: Y = np.c_[tuple(Ycols)] assert Y.dtype == np.float_ if Mcols: M = np.c_[tuple(Mcols)].astype(object) if Wcols: W = np.c_[tuple(Wcols)].astype(float) table = Table.from_numpy(domain, X, Y, M, W) return table
def setUp(self): self.widget = self.create_widget( OWLouvainClustering, stored_settings={'auto_commit': False} ) self.iris = Table('iris')
def init(self): self.data = Table("iris") self.same_input_output_domain = True
def setUp(self): self.widget = self.create_widget(owcolor.OWColor) self.iris = Table("iris")
def setUp(self): self.widget = self.create_widget(OWLookalike) self.zoo = Table("zoo-with-images")
def setUpClass(cls): cls.iris = Table("iris") cls.housing = Table("housing")
else: self.invalidate(unconditional=True) def send_report(self): # False positives (Setting is not recognized as int) # pylint: disable=invalid-sequence-index if self.optimize_k and self.selected_row() is not None: k_clusters = self.k_from + self.selected_row() else: k_clusters = self.k init_method = self.INIT_METHODS[self.smart_init][0] init_method = init_method[0].lower() + init_method[1:] self.report_items(( ("Number of clusters", k_clusters), ("Optimization", "{}, {} re-runs limited to {} steps".format( init_method, self.n_init, self.max_iterations)))) if self.data is not None: self.report_data("Data", self.data) if self.optimize_k: self.report_table( "Silhouette scores for different numbers of clusters", self.table_view) def onDeleteWidget(self): self.cancel() super().onDeleteWidget() if __name__ == "__main__": # pragma: no cover WidgetPreview(OWKMeans).run(Table("heart_disease"))
arrow1 = pg.ArrowItem( parent=self, angle=angle_1, brush=color, pen=pg.mkPen(color) ) arrow1.setPos(np.cos(angle - dangle), np.sin(angle - dangle)) arrow2 = pg.ArrowItem( parent=self, angle=angle_2, brush=color, pen=pg.mkPen(color) ) arrow2.setPos(np.cos(angle + dangle), np.sin(angle + dangle)) arc_x = np.fromfunction( lambda i: np.cos((angle - dangle) + (2 * dangle) * i / 120.), (121,), dtype=int ) arc_y = np.fromfunction( lambda i: np.sin((angle - dangle) + (2 * dangle) * i / 120.), (121,), dtype=int ) pg.PlotCurveItem( parent=self, x=arc_x, y=arc_y, pen=pg.mkPen(color), antialias=False ) def paint(self, painter, option, widget): pass def boundingRect(self): return QRectF() if __name__ == "__main__": # pragma: no cover data = Table("brown-selected") WidgetPreview(OWRadviz).run(set_data=data, set_subset_data=data[::10])
return SampleRandomN(n, self.stratified, random_state=self.random_state)(table) class SampleBootstrap(Reprable): def __init__(self, size=0, random_state=None): self.size = size self.random_state = random_state def __call__(self, table=None): """Bootstrap indices Args: table: Not used (but part of the signature) Returns: tuple (out_of_sample, sample) indices """ # pylint: disable=no-member rgen = np.random.RandomState(self.random_state) sample = rgen.randint(0, self.size, self.size) sample.sort() # not needed for the code below, just for the user insample = np.ones((self.size, ), dtype=np.bool) insample[sample] = False remaining = np.flatnonzero(insample) return remaining, sample if __name__ == "__main__": # pragma: no cover WidgetPreview(OWDataSampler).run(Table("iris"))
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)] ) clust_ids = km.labels silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if len(self.data) <= SILHOUETTE_MAX_SAMPLES: self.Warning.no_silhouettes.clear() scores = self.samples_scores(clust_ids) clust_scores = [] for i in range(km.k): in_clust = clust_ids == i if in_clust.any(): clust_scores.append(np.mean(scores[in_clust])) else: clust_scores.append(0.) clust_scores = np.atleast_2d(clust_scores).T else: self.Warning.no_silhouettes() scores = np.nan clust_scores = np.full((km.k, 1), np.nan) new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) new_table.get_column_view(cluster_var)[0][:] = clust_ids new_table.get_column_view(silhouette_var)[0][:] = scores centroid_attributes = [ attr.compute_value.variable if isinstance(attr.compute_value, ReplaceUnknowns) and attr.compute_value.variable in domain.attributes else attr for attr in km.domain.attributes] centroid_domain = add_columns( Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) centroids = Table( centroid_domain, km.centroids, None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores)) ) if self.data.name == Table.name: centroids.name = "centroids" else: centroids.name = f"{self.data.name} centroids" self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def setUp(self): self.widget = self.create_widget(OWEditDomain) self.iris = Table("iris")
def setUpClass(cls): super().setUpClass() cls.iris = Table("iris")[::5] cls.titanic = Table("titanic")[::10]
def test_continuous(self): table = Table("housing") self.send_signal(self.widget.Inputs.data, table) self.widget.unconditional_commit()
def get_learner_parameters(self): items = OrderedDict() items['Loss function'] = self.LOSS_FUNCTIONS[self.loss_function] if self.loss_function != self.SqLoss: items['Loss function'] += ", ε={}".format(self.epsilon) items['Penalty'] = self.PENALTIES[self.penalty_type] if self.penalty_type == self.ElasticNet: items['Penalty'] += ": L1 : L2 = {} : {}".format( self.l1_ratio, 1.0 - self.l1_ratio) items['Penalty'] = items['Penalty'] + ', α={}'.format(self.alpha) items['Learning rate'] = self.LEARNING_RATES[self.learning_rate] items['Learning rate'] += ", η<sub>0</sub>={}".format(self.eta0) if self.learning_rate == self.InvScaling: items['Learning rate'] += ", power_t={}".format(self.power_t) items['Number of iterations'] = self.n_iter return items if __name__ == "__main__": import sys from PyQt4.QtGui import QApplication a = QApplication(sys.argv) ow = OWSGDRegression() d = Table('housing') ow.set_data(d) ow.show() a.exec_() ow.saveSettings()
preprocessors=self.preprocessors, algorithm=self.algorithms[self.algorithm_index], loss=self.losses[self.loss_index].lower()) @Inputs.learner def set_base_learner(self, learner): self.Error.no_weight_support.clear() if learner and not learner.supports_weights: # Clear the error and reset to default base learner self.Error.no_weight_support() self.base_estimator = None self.base_label.setText("Base estimator: INVALID") else: self.base_estimator = learner or self.DEFAULT_BASE_ESTIMATOR self.base_label.setText("Base estimator: %s" % self.base_estimator.name.title()) if self.auto_apply: self.apply() def get_learner_parameters(self): return (("Base estimator", self.base_estimator), ("Number of estimators", self.n_estimators), ("Algorithm (classification)", self.algorithms[self.algorithm_index].capitalize()), ("Loss (regression)", self.losses[self.loss_index].capitalize())) if __name__ == "__main__": # pragma: no cover WidgetPreview(OWAdaBoost).run(Table("iris"))
def setUp(self): Variable._clear_all_caches() # pylint: disable=protected-access random.seed(42) self.zoo = Table("zoo")