def test_hash(self): a = ContinuousVariable("a") b = ContinuousVariable("a") self.assertEqual(hash(a), hash(b)) a._compute_value = lambda x: x self.assertNotEqual(hash(a), hash(b)) b._compute_value = lambda x: x self.assertNotEqual(hash(a), hash(b)) a1 = ContinuousVariable("a") a2 = ContinuousVariable("a") a._compute_value = Identity(a1) self.assertNotEqual(hash(a), hash(b)) b._compute_value = Identity(a2) self.assertEqual(hash(a), hash(b)) at = TimeVariable("a") b = ContinuousVariable("b") self.assertEqual(hash(a1), hash(a2)) self.assertNotEqual(hash(a1), hash(b)) self.assertNotEqual(hash(a1), hash(at))
def test_eq_and_hash(self): x = ContinuousVariable("x") id_x1 = Identity(x) id_x1b = Identity(x) id_x2 = Identity(ContinuousVariable("x")) self.assertEqual(id_x1, id_x1b) self.assertEqual(hash(id_x1), hash(id_x1b)) self.assertEqual(id_x1, id_x2) self.assertEqual(hash(id_x1), hash(id_x2)) id_y = Identity(ContinuousVariable("y")) self.assertNotEqual(id_x1, id_y) self.assertNotEqual(hash(id_x1), hash(id_y))
def test_hash_eq(self): a = ContinuousVariable("a") b1 = ContinuousVariable("b", compute_value=Identity(a)) b2 = ContinuousVariable("b2", compute_value=Identity(b1)) b3 = ContinuousVariable("b") self.assertEqual(a, b2) self.assertEqual(b1, b2) self.assertEqual(a, b1) self.assertNotEqual(b1, b3) self.assertEqual(hash(a), hash(b2)) self.assertEqual(hash(b1), hash(b2)) self.assertEqual(hash(a), hash(b1)) self.assertNotEqual(hash(b1), hash(b3))
def test_identity(self): domain = Domain([ContinuousVariable("X")], [DiscreteVariable("C", values=["0", "1", "2"])], [StringVariable("S")]) X = np.random.normal(size=(4, 1)) Y = np.random.randint(3, size=(4, 1)) M = np.array(["A", "B", "C", "D"], dtype=object).reshape(-1, 1) D = Table.from_numpy(domain, X, Y, metas=M) X1 = domain[0].copy(compute_value=Identity(domain[0])) Y1 = domain[1].copy(compute_value=Identity(domain[1])) S1 = domain.metas[0].copy(compute_value=Identity(domain.metas[0])) domain_1 = Domain([X1], [Y1], [S1]) D1 = Table.from_table(domain_1, D) np.testing.assert_equal(D1.X, D.X) np.testing.assert_equal(D1.Y, D.Y) np.testing.assert_equal(D1.metas, D.metas)
def apply_transform_discete(var, trs): # type: (Orange.data.DiscreteVariable, ...) -> ... # pylint: disable=too-many-branches name, annotations = var.name, var.attributes base_value = var.base_value mapping = None ordered = var.ordered for tr in trs: if isinstance(tr, Rename): name = tr.name elif isinstance(tr, CategoriesMapping): mapping = tr.mapping elif isinstance(tr, Annotate): annotations = _parse_attributes(tr.annotations) elif isinstance(tr, ChangeOrdered): ordered = tr.ordered source_values = var.values if mapping is not None: dest_values = [cj for ci, cj in mapping if cj is not None] else: dest_values = var.values def positions(values): rval = {c: i for i, c in enumerate(values)} assert len(rval) == len(values) return rval source_codes = positions(source_values) dest_codes = positions(dest_values) if mapping is not None: # construct a lookup table lookup = np.full(len(source_values), np.nan, dtype=np.float) for ci, cj in mapping: if ci is not None and cj is not None: i, j = source_codes[ci], dest_codes[cj] lookup[i] = j if base_value != -1: base_value = lookup[base_value] if np.isnan(base_value): base_value = -1 lookup = Lookup(var, lookup) else: lookup = Identity(var) variable = Orange.data.DiscreteVariable( name, values=dest_values, base_value=base_value, compute_value=lookup, ordered=ordered, ) variable.attributes.update(annotations) return variable
def apply_transform_string(var, trs): # type: (Orange.data.StringVariable, ...) -> ... name, annotations = var.name, var.attributes for tr in trs: if isinstance(tr, Rename): name = tr.name elif isinstance(tr, Annotate): annotations = _parse_attributes(tr.annotations) variable = Orange.data.StringVariable(name=name, compute_value=Identity(var)) variable.attributes.update(annotations) return variable
def transform_discrete(var): if (len(var.values) < 2 or treat == Continuize.Remove or treat == Continuize.RemoveMultinomial and len(var.values) > 2): return [] if treat == Continuize.AsOrdinal: new_var = ContinuousVariable(var.name, compute_value=Identity(var), sparse=var.sparse) return [new_var] if treat == Continuize.AsNormalizedOrdinal: n_values = max(1, len(var.values)) if self.zero_based: return [ ContinuousVariable(var.name, compute_value=Normalizer( var, 0, 1 / (n_values - 1)), sparse=var.sparse) ] else: return [ ContinuousVariable(var.name, compute_value=Normalizer( var, (n_values - 1) / 2, 2 / (n_values - 1)), sparse=var.sparse) ] new_vars = [] if treat == Continuize.Indicators: base = -1 elif treat in (Continuize.FirstAsBase, Continuize.RemoveMultinomial): base = 0 else: base = dists[var_ptr].modus() ind_class = [Indicator1, Indicator][self.zero_based] for i, val in enumerate(var.values): if i == base: continue new_var = ContinuousVariable("{}={}".format(var.name, val), compute_value=ind_class(var, i), sparse=var.sparse) new_vars.append(new_var) return new_vars
def test_eq_with_compute_value(self): a = ContinuousVariable("a") b = ContinuousVariable("a") self.assertEqual(a, a) self.assertEqual(a, b) self.assertIsNot(a, b) a._compute_value = lambda x: x self.assertEqual(a, a) self.assertNotEqual(a, b) a1 = ContinuousVariable("a") a2 = ContinuousVariable("a") c = ContinuousVariable("c") a._compute_value = Identity(a1) self.assertEqual(a, a) self.assertEqual(a, b) self.assertEqual(hash(a), hash(b)) b._compute_value = a.compute_value self.assertEqual(a, b) b._compute_value = Identity(a1) self.assertEqual(a, b) b._compute_value = Identity(a2) self.assertEqual(a, b) b._compute_value = Identity(c) self.assertNotEqual(a, b) b._compute_value = Identity(a2) a1._compute_value = lambda x: x self.assertNotEqual(a, b) a1._compute_value = Identity(c) self.assertNotEqual(a, b) a2._compute_value = Identity(c) self.assertEqual(a, b)
def __init__(self, domain, columnimputers={}): self.columnimputers = columnimputers self.domain = domain col_models = [(var, columnimputers.get(var, None)) for var in domain.variables] # variables for the codomain codomain_attrs = [] codomain_class_vars = [] # column imputers for all variables in the domain col_imputers = [] for i, (var, imp) in enumerate(col_models): if isinstance(imp, ColumnImputerModel): pass elif isinstance(imp, Orange.classification.Model): imp = ColumnImputerFromModel(domain, imp.class_vars, imp) elif isinstance(imp, collections.Callable): raise NotImplementedError imp = ColumnImputerFromCallable(var, imp) elif imp is None: imp = NullColumnImputer(domain, (var, ), (Identity(var), )) col_imputers.append((var, imp)) if i < len(domain.attributes): codomain_attrs.extend(imp.codomain) else: codomain_class_vars.extend(imp.codomain) self.codomain = Orange.data.Domain(codomain_attrs, codomain_class_vars, domain.metas) self.transformers = [] self.columnimputers = dict(col_imputers) for var, colimp in col_imputers: self.transformers.append( (var, tuple(zip(colimp.codomain, colimp.transformers))))
def ordinal_to_continuous(var): return Orange.data.ContinuousVariable(var.name, compute_value=Identity(var))
def renamed(self, new_name): # prevent cyclic import, pylint: disable=import-outside-toplevel from Orange.preprocess.transformation import Identity return self.copy(name=new_name, compute_value=Identity(variable=self))
def create_variable(self): new_var = self.var.copy(name=self.name, compute_value=Identity(self.var)) new_var.attributes["palette"] = self.palette_name return new_var
def create_variable(self): new_var = self.var.copy(name=self.name, values=self.values, compute_value=Identity(self.var)) new_var.colors = np.asarray(self.colors) return new_var
def ordinal_to_continuous(var): new_var = Orange.data.ContinuousVariable(var.name) new_var.compute_value = Identity(var) return new_var