def __get_pivot_tab_domain(self, val_var, X, X_h, X_v, X_t, agg_funs): def map_values(index, _X): values = np.unique(_X[:, index]) values = np.delete(values, np.where(values == "nan")[0]) for j, value in enumerate(values): _X[:, index][_X[:, index] == value] = j return values create_time_var = \ isinstance(val_var, TimeVariable) and \ all(fun in self.TimeVarFunctions for fun in agg_funs) create_cont_var = \ not val_var or val_var.is_continuous and \ (not isinstance(val_var, TimeVariable) or all(fun in self.FloatFunctions for fun in agg_funs)) vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)] if create_time_var: kwargs = { "have_date": val_var.have_date, "have_time": val_var.have_time } attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2 attrs.extend([[TimeVariable("Total", **kwargs)]] * 2) elif create_cont_var: attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2 attrs.extend([[ContinuousVariable("Total", 1)]] * 2) else: attrs = [] for x in (X, X_h): attrs.append([ DiscreteVariable(f"{v}", map_values(i, x)) for i, v in enumerate(vals, 2) ]) for x in (X_v, X_t): attrs.append([DiscreteVariable("Total", map_values(0, x))]) row_var_h = DiscreteVariable(self._row_var.name, values=["Total"]) aggr_attr = DiscreteVariable('Aggregate', [str(f) for f in agg_funs]) same_row_col = self._col_var is self._row_var extra_vars = [self._row_var, aggr_attr] uniq_a = get_unique_names_duplicates([v.name for v in extra_vars] + [atr.name for atr in attrs[0]]) for (idx, var), u in zip(enumerate(chain(extra_vars, attrs[0])), uniq_a): if var.name == u: continue if idx == 0: self.renamed.append(self._row_var.name) self._row_var = self._row_var.copy(name=u) if same_row_col: self._col_var = self._row_var row_var_h = row_var_h.copy(name=u) elif idx == 1: self.renamed.append(aggr_attr.name) aggr_attr = aggr_attr.copy(name=u) else: self.renamed.append(var.name) attrs[0][idx - 2] = var.copy(name=u) attrs[1][idx - 2] = var.copy(name=u) if same_row_col: vals = tuple(v.name for v in attrs[0]) self._row_var.make(self._row_var.name, values=vals) vals = tuple(v.name for v in attrs[2]) row_var_h.make(row_var_h.name, vals) return (Domain([self._row_var, aggr_attr] + attrs[0]), Domain([row_var_h, aggr_attr] + attrs[1]), Domain(attrs[2]), Domain(attrs[3]))
# Test methods with descriptive names can omit docstrings # pylint: disable=missing-docstring from unittest import TestCase from unittest.mock import Mock from Orange.data import ContinuousVariable, DiscreteVariable, Domain from Orange.widgets.settings import ContextSetting, PerfectDomainContextHandler, Context, Setting from Orange.widgets.utils import vartype Continuous = vartype(ContinuousVariable("x")) Discrete = vartype(DiscreteVariable("x")) class TestPerfectDomainContextHandler(TestCase): def setUp(self): self.domain = Domain(attributes=[ ContinuousVariable('c1'), DiscreteVariable('d1', values='abc'), DiscreteVariable('d2', values='def') ], class_vars=[DiscreteVariable('d3', values='ghi')], metas=[ ContinuousVariable('c2'), DiscreteVariable('d4', values='jkl') ]) self.args = (self.domain, (('c1', Continuous), ('d1', Discrete), ('d2', Discrete)), (('d3', Discrete), ), (('c2', Continuous), ('d4', Discrete))) self.args_match_all = (self.domain, (('c1', Continuous), ('d1', list('abc')),
def constr_vars(inds): if inds: return [ContinuousVariable(x.decode("utf-8")) for _, x in sorted((ind, name) for name, ind in inds.items())]
def test_different_metas(self): """ Test weather widget do not show error when data and a reference have domain that differ only in metas """ w = self.widget domain = Domain([ContinuousVariable("a"), ContinuousVariable("b")], metas=[ContinuousVariable("c")]) data = Table(domain, np.random.rand(15, len(domain.attributes)), metas=np.random.rand(15, len(domain.metas))) # same domain with same metas no error self.send_signal(w.Inputs.data, data) self.send_signal(w.Inputs.reference, data[:1]) self.assertFalse(w.Error.diff_domains.is_shown()) output = self.get_output(w.Outputs.data) self.assertEqual(10, len(output)) # same domain with different metas no error domain_ref = Domain(domain.attributes, metas=[ContinuousVariable("d")]) reference = Table(domain_ref, np.random.rand(1, len(domain_ref.attributes)), metas=np.random.rand(1, len(domain.metas))) self.send_signal(w.Inputs.data, data) self.send_signal(w.Inputs.reference, reference) self.assertFalse(w.Error.diff_domains.is_shown()) output = self.get_output(w.Outputs.data) self.assertEqual(10, len(output)) # same domain with different order - no error domain_ref = Domain(domain.attributes[::-1]) reference = Table(domain_ref, np.random.rand(1, len(domain_ref.attributes))) self.send_signal(w.Inputs.data, data) self.send_signal(w.Inputs.reference, reference) self.assertFalse(w.Error.diff_domains.is_shown()) output = self.get_output(w.Outputs.data) self.assertEqual(10, len(output)) # same domain with different number of metas no error domain_ref = Domain( domain.attributes, metas=[ContinuousVariable("d"), ContinuousVariable("e")]) reference = Table(domain_ref, np.random.rand(1, len(domain_ref.attributes)), metas=np.random.rand(1, len(domain_ref.metas))) self.send_signal(w.Inputs.data, data) self.send_signal(w.Inputs.reference, reference) self.assertFalse(w.Error.diff_domains.is_shown()) output = self.get_output(w.Outputs.data) self.assertEqual(10, len(output)) # different domain with same metas - error shown domain_ref = Domain(domain.attributes + (ContinuousVariable("e"), ), metas=[ContinuousVariable("c")]) reference = Table(domain_ref, np.random.rand(1, len(domain_ref.attributes)), metas=np.random.rand(1, len(domain_ref.metas))) self.send_signal(w.Inputs.data, data) self.send_signal(w.Inputs.reference, reference) self.assertTrue(w.Error.diff_domains.is_shown()) output = self.get_output(w.Outputs.data) self.assertIsNone(output)
def _new_var(self): name = self._new_var_name() if self.operation in self.TimePreserving \ and all(isinstance(var, TimeVariable) for var in self.variables): return TimeVariable(name) return ContinuousVariable(name)
class TestInstance(unittest.TestCase): attributes = ["Feature %i" % i for i in range(10)] class_vars = ["Class %i" % i for i in range(1)] metas = [DiscreteVariable("Meta 1", values="XYZ"), ContinuousVariable("Meta 2"), StringVariable("Meta 3")] def mock_domain(self, with_classes=False, with_metas=False): attributes = self.attributes class_vars = self.class_vars if with_classes else [] metas = self.metas if with_metas else [] variables = attributes + class_vars return MagicMock(Domain, attributes=attributes, class_vars=class_vars, metas=metas, variables=variables) def create_domain(self, attributes=(), classes=(), metas=()): attr_vars = [ContinuousVariable(name=a) if isinstance(a, str) else a for a in attributes] class_vars = [ContinuousVariable(name=c) if isinstance(c, str) else c for c in classes] meta_vars = [DiscreteVariable(name=m, values=map(str, range(5))) if isinstance(m, str) else m for m in metas] domain = Domain(attr_vars, class_vars, meta_vars) return domain def test_init_x_no_data(self): domain = self.mock_domain() inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._values.shape, (len(self.attributes), )) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) self.assertTrue(all(isnan(x) for x in inst._values)) self.assertTrue(all(isnan(x) for x in inst._x)) def test_init_xy_no_data(self): domain = self.mock_domain(with_classes=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._values.shape, (len(self.attributes) + len(self.class_vars), )) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (0, )) self.assertTrue(all(isnan(x) for x in inst._values)) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) def test_init_xym_no_data(self): domain = self.mock_domain(with_classes=True, with_metas=True) inst = Instance(domain) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._values.shape, (len(self.attributes) + len(self.class_vars), )) self.assertEqual(inst._x.shape, (len(self.attributes), )) self.assertEqual(inst._y.shape, (len(self.class_vars), )) self.assertEqual(inst._metas.shape, (3, )) self.assertTrue(all(isnan(x) for x in inst._values)) self.assertTrue(all(isnan(x) for x in inst._x)) self.assertTrue(all(isnan(x) for x in inst._y)) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal(inst._metas, np.array([Unknown, Unknown, None])) def test_init_x_arr(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) vals = np.array([42, 0]) inst = Instance(domain, vals) assert_array_equal(inst._values, vals) assert_array_equal(inst._x, vals) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) domain = self.create_domain() inst = Instance(domain, np.empty((0,))) self.assertEqual(inst._x.shape, (0, )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) def test_init_x_list(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) lst = [42, 0] vals = np.array(lst) inst = Instance(domain, vals) assert_array_equal(inst._values, vals) assert_array_equal(inst._x, vals) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) domain = self.create_domain() inst = Instance(domain, []) self.assertEqual(inst._x.shape, (0, )) self.assertEqual(inst._y.shape, (0, )) self.assertEqual(inst._metas.shape, (0, )) def test_init_xy_arr(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) vals = np.array([42, 0, 1]) inst = Instance(domain, vals) assert_array_equal(inst._values, vals) assert_array_equal(inst._x, vals[:2]) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._y[0], 1) self.assertEqual(inst._metas.shape, (0, )) def test_init_xy_list(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) lst = [42, "M", "C"] vals = np.array([42, 0, 2]) inst = Instance(domain, vals) assert_array_equal(inst._values, vals) assert_array_equal(inst._x, vals[:2]) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._y[0], 2) self.assertEqual(inst._metas.shape, (0, )) def test_init_xym_arr(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = np.array([42, "M", "B", "X", 43, "Foo"], dtype=object) inst = Instance(domain, vals) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._values.shape, (3, )) self.assertEqual(inst._x.shape, (2, )) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._metas.shape, (3, )) assert_array_equal(inst._values, np.array([42, 0, 1])) assert_array_equal(inst._x, np.array([42, 0])) self.assertEqual(inst._y[0], 1) assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object)) def test_init_xym_list(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) self.assertIsInstance(inst, Instance) self.assertIs(inst.domain, domain) self.assertEqual(inst._values.shape, (3, )) self.assertEqual(inst._x.shape, (2, )) self.assertEqual(inst._y.shape, (1, )) self.assertEqual(inst._metas.shape, (3, )) assert_array_equal(inst._values, np.array([42, 0, 1])) assert_array_equal(inst._x, np.array([42, 0])) self.assertEqual(inst._y[0], 1) assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object)) def test_init_inst(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, inst) assert_array_equal(inst2._values, np.array([42, 0, 1])) assert_array_equal(inst2._x, np.array([42, 0])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, 43, "Foo"], dtype=object)) domain2 = self.create_domain(["z", domain[1], self.metas[1]], domain.class_vars, [self.metas[0], "w", domain[0]]) inst2 = Instance(domain2, inst) with warnings.catch_warnings(): warnings.simplefilter("ignore", FutureWarning) assert_array_equal(inst2._values, np.array([Unknown, 0, 43, 1])) assert_array_equal(inst2._x, np.array([Unknown, 0, 43])) self.assertEqual(inst2._y[0], 1) assert_array_equal(inst2._metas, np.array([0, Unknown, 42], dtype=object)) def test_get_item(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) val = inst[0] self.assertIsInstance(val, Value) self.assertEqual(inst[0], 42) self.assertEqual(inst["x"], 42) self.assertEqual(inst[domain[0]], 42) val = inst[1] self.assertIsInstance(val, Value) self.assertEqual(inst[1], "M") self.assertEqual(inst["g"], "M") self.assertEqual(inst[domain[1]], "M") val = inst[2] self.assertIsInstance(val, Value) self.assertEqual(inst[2], "B") self.assertEqual(inst["y"], "B") self.assertEqual(inst[domain.class_var], "B") val = inst[-2] self.assertIsInstance(val, Value) self.assertEqual(inst[-2], 43) self.assertEqual(inst["Meta 2"], 43) self.assertEqual(inst[self.metas[1]], 43) with self.assertRaises(ValueError): inst["asdf"] = 42 with self.assertRaises(ValueError): inst[ContinuousVariable("asdf")] = 42 def test_set_item(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst[0] = 43 self.assertEqual(inst[0], 43) inst["x"] = 44 self.assertEqual(inst[0], 44) inst[domain[0]] = 45 self.assertEqual(inst[0], 45) inst[1] = "F" self.assertEqual(inst[1], "F") inst["g"] = "M" self.assertEqual(inst[1], "M") with self.assertRaises(ValueError): inst[1] = "N" with self.assertRaises(ValueError): inst["asdf"] = 42 inst[2] = "C" self.assertEqual(inst[2], "C") inst["y"] = "A" self.assertEqual(inst[2], "A") inst[domain.class_var] = "B" self.assertEqual(inst[2], "B") inst[-1] = "Y" self.assertEqual(inst[-1], "Y") inst["Meta 1"] = "Z" self.assertEqual(inst[-1], "Z") inst[domain.metas[0]] = "X" self.assertEqual(inst[-1], "X") def test_str(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")]) inst = Instance(domain, [42, 0]) self.assertEqual(str(inst), "[42.000, M]") domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")]) inst = Instance(domain, [42, "M", "B"]) self.assertEqual(str(inst), "[42.000, M | B]") domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) inst = Instance(domain, [42, "M", "B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[42.000, M | B] {X, 43.000, Foo}") domain = self.create_domain([], [DiscreteVariable("y", values="ABC")], self.metas) inst = Instance(domain, ["B", "X", 43, "Foo"]) self.assertEqual(str(inst), "[ | B] {X, 43.000, Foo}") domain = self.create_domain([], [], self.metas) inst = Instance(domain, ["X", 43, "Foo"]) self.assertEqual(str(inst), "[] {X, 43.000, Foo}") domain = self.create_domain(self.attributes) inst = Instance(domain, range(len(self.attributes))) self.assertEqual(str(inst), "[0.000, 1.000, 2.000, 3.000, 4.000, ...]") for attr in domain: attr.number_of_decimals = 0 self.assertEqual(str(inst), "[0, 1, 2, 3, 4, ...]") def test_eq(self): domain = self.create_domain(["x", DiscreteVariable("g", values="MF")], [DiscreteVariable("y", values="ABC")], self.metas) vals = [42, "M", "B", "X", 43, "Foo"] inst = Instance(domain, vals) inst2 = Instance(domain, vals) self.assertTrue(inst == inst2) self.assertTrue(inst2 == inst) inst2[0] = 43 self.assertFalse(inst == inst2) inst2[0] = Unknown self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[2] = "C" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-1] = "Y" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-2] = "33" self.assertFalse(inst == inst2) inst2 = Instance(domain, vals) inst2[-3] = "Bar" self.assertFalse(inst == inst2)
def _get_projection_variables(self): names = get_unique_names(self.data.domain, self.embedding_variables_names) return ContinuousVariable(names[0]), ContinuousVariable(names[1])
stream.close = lambda: None # HACK: Prevent closing of streams table = CSVReader(input_csv).read() self.assertIsInstance(table.domain['Date'], TimeVariable) self.assertEqual(table[0, 'Date'], '1920-12-12') # Dates before 1970 are negative self.assertTrue(all(inst['Date'] < 0 for inst in table)) CSVReader.write_file(output_csv, table) self.assertEqual(input_csv.getvalue().splitlines(), output_csv.getvalue().splitlines()) PickleContinuousVariable = create_pickling_tests( "PickleContinuousVariable", ("with_name", lambda: ContinuousVariable(name="Feature 0")), ) PickleDiscreteVariable = create_pickling_tests( "PickleDiscreteVariable", ("with_name", lambda: DiscreteVariable(name="Feature 0")), ("with_int_values", lambda: DiscreteVariable(name="Feature 0", values=[1, 2, 3])), ("with_str_value", lambda: DiscreteVariable(name="Feature 0", values=["F", "M"])), ("ordered", lambda: DiscreteVariable( name="Feature 0", values=["F", "M"], ordered=True)), ("with_base_value", lambda: DiscreteVariable( name="Feature 0", values=["F", "M"], base_value=0))) PickleStringVariable = create_pickling_tests(
def test_strange_eq(self): a = ContinuousVariable() b = ContinuousVariable() self.assertEqual(a, a) self.assertNotEqual(a, b) self.assertNotEqual(a, "somestring")
def test_match_attr_name(self): widget = self.widget row = widget.attr_boxes.rows[0] data_combo, extra_combo = row.left_combo, row.right_combo domainA = Domain([DiscreteVariable("dA1", ("a", "b", "c", "d")), DiscreteVariable("dA2", ("aa", "bb")), DiscreteVariable("dA3", ("aa", "bb"))], DiscreteVariable("cls", ("aaa", "bbb", "ccc")), [DiscreteVariable("mA1", ("cc", "dd")), StringVariable("mA2")]) XA = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 0], [3, 1, 0]]) yA = np.array([0, 1, 2, np.nan]) metasA = np.array([[0.0, "m1"], [1.0, "m2"], [np.nan, "m3"], [0.0, "m4"]]).astype(object) domainB = Domain([DiscreteVariable("dB1", values=("a", "b", "c")), ContinuousVariable("dA2")], None, [StringVariable("cls"), DiscreteVariable("dA1", ("m4", "m5"))]) XB = np.array([[0, 0], [1, 1], [2, np.nan]]) yB = np.empty((3, 0)) metasB = np.array([[np.nan, np.nan], [1, 1], [0, 0]]).astype(object) dataA = Table(domainA, XA, yA, metasA) dataA.name = 'dataA' dataA.attributes = 'dataA attributes' dataB = Table(domainB, XB, yB, metasB) dataB.name = 'dataB' dataB.attributes = 'dataB attributes' self.send_signal(widget.Inputs.data, dataA) self.send_signal(widget.Inputs.extra_data, dataB) # match variable if available and the other combo is Row Index extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 5) # match variable if available and the other combo is ID extra_combo.setCurrentIndex(1) extra_combo.activated.emit(1) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 5) # don't match variable if other combo is set extra_combo.setCurrentIndex(4) extra_combo.activated.emit(4) data_combo.setCurrentIndex(2) data_combo.activated.emit(2) self.assertEqual(extra_combo.currentIndex(), 4) # don't match if nothing to match to extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(4) data_combo.activated.emit(4) self.assertEqual(extra_combo.currentIndex(), 0) # don't match numeric with non-numeric extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(3) data_combo.activated.emit(3) self.assertEqual(extra_combo.currentIndex(), 0) # allow matching string with discrete extra_combo.setCurrentIndex(0) extra_combo.activated.emit(0) data_combo.setCurrentIndex(5) data_combo.activated.emit(5) self.assertEqual(extra_combo.currentIndex(), 4)
def test_nonunique(self): widget = self.widget x = ContinuousVariable("x") d = DiscreteVariable("d", values=tuple("abc")) domain = Domain([x, d], []) dataA = Table.from_numpy( domain, np.array([[1.0, 0], [1, 1], [2, 1]])) dataB = Table.from_numpy( domain, np.array([[1.0, 0], [2, 1], [3, 1]])) dataB.ids = dataA.ids self.send_signal(widget.Inputs.data, dataA) self.send_signal(widget.Inputs.extra_data, dataB) widget.merging = widget.InnerJoin self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) widget.attr_boxes.set_state([(INSTANCEID, INSTANCEID)]) widget.commit.now() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.attr_boxes.set_state([(INDEX, INDEX)]) widget.commit.now() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.attr_boxes.set_state([(x, x)]) widget.commit.now() self.assertTrue(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) widget.merging = widget.LeftJoin widget.commit.now() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.merging = widget.InnerJoin widget.attr_boxes.set_state([(x, x), (d, d)]) widget.commit.now() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNotNone(self.get_output(widget.Outputs.data)) widget.attr_boxes.set_state([(d, d)]) widget.commit.now() self.assertTrue(widget.Error.nonunique_left.is_shown()) self.assertTrue(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) widget.merging = widget.LeftJoin widget.commit.now() self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertTrue(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) widget.merging = widget.InnerJoin widget.commit.now() self.assertTrue(widget.Error.nonunique_left.is_shown()) self.assertTrue(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data)) self.send_signal(widget.Inputs.data, None) self.send_signal(widget.Inputs.extra_data, None) self.assertFalse(widget.Error.nonunique_left.is_shown()) self.assertFalse(widget.Error.nonunique_right.is_shown()) self.assertIsNone(self.get_output(widget.Outputs.data))
def test_select_data(self): """ Test select data function """ w = self.widget # test for none data self.send_signal("Data", None) self.assertIsNone(w.select_data()) # result is none # test on iris self.send_signal("Data", self.iris) self.assertEqual(len(w.select_data()), len(self.iris)) self.assertEqual(len(w.select_data().domain.attributes), 2) self.assertEqual(len(w.select_data().domain.class_var.values), 2) self.assertEqual(w.select_data().domain.class_var.values[1], 'Others') self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x) self.assertEqual(w.select_data().domain.attributes[1].name, w.attr_y) self.assertEqual(w.select_data().domain.class_var.values[0], w.target_class) # test on housing - continuous class self.send_signal("Data", self.housing) self.assertEqual(len(w.select_data()), len(self.housing)) self.assertEqual(len(w.select_data().domain.attributes), 1) self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x) self.assertTrue(w.select_data().domain.class_var.is_continuous) # test with data set for logistic regression - class discrete # there no other class value is provided domain = Domain([ContinuousVariable('a'), ContinuousVariable('b')], DiscreteVariable('c', values=['a', 'b'])) data = Table(domain, [[1, 2], [1, 2]], [0, 1]) self.send_signal("Data", data) self.assertEqual(len(w.select_data()), len(data)) self.assertEqual(len(w.select_data().domain.attributes), 2) self.assertEqual(len(w.select_data().domain.class_var.values), 2) self.assertEqual(w.select_data().domain.class_var.values[1], data.domain.class_var.values[1]) self.assertEqual(w.select_data().domain.class_var.values[0], data.domain.class_var.values[0]) self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x) self.assertEqual(w.select_data().domain.attributes[1].name, w.attr_y) self.assertEqual(w.select_data().domain.class_var.values[0], w.target_class) # selected data none when one column only Nones data = Table( Domain([ContinuousVariable('a'), ContinuousVariable('b')], DiscreteVariable('c', values=['a', 'b'])), [[1, None], [1, None]], [0, 1]) self.send_signal("Data", data) selected_data = w.select_data() self.assertIsNone(selected_data) data = Table( Domain([ContinuousVariable('a'), ContinuousVariable('b')], DiscreteVariable('c', values=['a', 'b'])), [[None, None], [None, None]], [0, 1]) self.send_signal("Data", data) selected_data = w.select_data() self.assertIsNone(selected_data)
def test_set_data(self): """ Test set data """ w = self.widget # test on init self.assertIsNone(w.data) self.assertEqual(w.cbx.count(), 0) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertIsNone(w.learner) self.assertIsNone(w.cost_grid) # call with none data self.send_signal("Data", None) self.assertIsNone(w.data) self.assertEqual(w.cbx.count(), 0) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertIsNone(w.learner) self.assertIsNone(w.cost_grid) # call with no class variable table_no_class = Table( Domain([ContinuousVariable("x"), ContinuousVariable("y")]), [[1, 2], [2, 3]]) self.send_signal("Data", table_no_class) self.assertIsNone(w.data) self.assertEqual(w.cbx.count(), 0) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertIsNone(w.learner) self.assertIsNone(w.cost_grid) self.assertTrue(w.Error.no_class.is_shown()) # with only one class value table_one_class = Table( Domain([ContinuousVariable("x"), ContinuousVariable("y")], DiscreteVariable("a", values=["k"])), [[1, 2], [2, 3]], [0, 0]) self.send_signal("Data", table_one_class) self.assertIsNone(w.data) self.assertEqual(w.cbx.count(), 0) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertIsNone(w.learner) self.assertIsNone(w.cost_grid) self.assertTrue(w.Error.to_few_values.is_shown()) # not enough continuous variables table_no_enough_cont = Table( Domain([ ContinuousVariable("x"), DiscreteVariable("y", values=["a", "b"]) ], DiscreteVariable("a", values=['a', 'b'])), [[1, 0], [2, 1]], [0, 1]) self.send_signal("Data", table_no_enough_cont) self.assertIsNone(w.data) self.assertEqual(w.cbx.count(), 0) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertIsNone(w.learner) self.assertIsNone(w.cost_grid) self.assertTrue(w.Error.to_few_features.is_shown()) # init with ok data, discrete class - logistic regression num_continuous_attributes = sum(True for var in self.iris.domain.attributes if isinstance(var, ContinuousVariable)) self.send_signal("Data", self.iris) self.assertEqual(w.cbx.count(), num_continuous_attributes) self.assertEqual(w.cby.count(), num_continuous_attributes) self.assertEqual(w.target_class_combobox.count(), len(self.iris.domain.class_var.values)) self.assertEqual(w.cbx.currentText(), self.iris.domain[0].name) self.assertEqual(w.cby.currentText(), self.iris.domain[1].name) self.assertEqual(w.target_class_combobox.currentText(), self.iris.domain.class_var.values[0]) self.assertEqual(w.attr_x, self.iris.domain[0].name) self.assertEqual(w.attr_y, self.iris.domain[1].name) self.assertEqual(w.target_class, self.iris.domain.class_var.values[0]) # change showed attributes w.attr_x = self.iris.domain[1].name w.attr_y = self.iris.domain[2].name w.target_class = self.iris.domain.class_var.values[1] self.assertEqual(w.cbx.currentText(), self.iris.domain[1].name) self.assertEqual(w.cby.currentText(), self.iris.domain[2].name) self.assertEqual(w.target_class_combobox.currentText(), self.iris.domain.class_var.values[1]) self.assertEqual(w.attr_x, self.iris.domain[1].name) self.assertEqual(w.attr_y, self.iris.domain[2].name) self.assertEqual(w.target_class, self.iris.domain.class_var.values[1]) # remove data self.send_signal("Data", None) self.assertIsNone(w.data) self.assertEqual(w.cbx.count(), 0) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertIsNone(w.learner) self.assertIsNone(w.cost_grid) # not enough continuous variables when continuous class table_no_enough_cont = Table( Domain([DiscreteVariable("y", values=["a", "b"])], ContinuousVariable("a")), [[1, 0], [2, 1]], [0, 1]) self.send_signal("Data", table_no_enough_cont) self.assertIsNone(w.data) self.assertEqual(w.cbx.count(), 0) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertIsNone(w.learner) self.assertIsNone(w.cost_grid) self.assertTrue(w.Error.to_few_features.is_shown()) # init with ok data, discrete class - linear regression num_continuous_attributes = sum( True for var in self.housing.domain.attributes if isinstance(var, ContinuousVariable)) self.send_signal("Data", self.housing) self.assertEqual(w.cbx.count(), num_continuous_attributes) self.assertEqual(w.cby.count(), 0) self.assertEqual(w.target_class_combobox.count(), 0) self.assertFalse(w.cby.isEnabled()) self.assertFalse(w.target_class_combobox.isEnabled()) self.assertEqual(w.cbx.currentText(), self.housing.domain[0].name) self.assertEqual(w.attr_x, self.housing.domain[0].name) # change showed attributes w.attr_x = self.housing.domain[1].name self.assertEqual(w.cbx.currentText(), self.housing.domain[1].name) self.assertEqual(w.attr_x, self.housing.domain[1].name)
def _add_regression_out_columns(slot, newmetas, newcolumns): newmetas.append(ContinuousVariable(name=slot.predictor.name)) newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1)))
def test_bool_raises_warning(self): self.assertWarns(OrangeDeprecationWarning, bool, Domain([])) self.assertWarns(OrangeDeprecationWarning, bool, Domain([ContinuousVariable("y")]))
def test_decimals(self): a = ContinuousVariable("a", 4) self.assertEqual(a.str_val(4.654321), "4.6543") self.assertEqual(a.str_val(Unknown), "?")
def assign_annotations( z_values, available_annotations, data, z_threshold=1, p_value_fun=PFUN_BINOMIAL, scoring=SCORING_EXP_RATIO ): """ The function gets a set of attributes (e.g. genes) for each cell and attributes for each annotation. It returns the annotations significant for each cell. Parameters ---------- z_values : Orange.data.Table Table which show z values for each item available_annotations : Orange.data.Table Available annotations (e.g. cell types) z_threshold : float The threshold for selecting the attribute. For each item the attributes with z-value above this value are selected. p_value_fun : str, optional (defaults: TEST_BINOMIAL) A function that calculates p-value. It can be either PFUN_BINOMIAL that uses statistics.Binomial().p_value or PFUN_HYPERGEOMETRIC that uses hypergeom.sf. data : Orange.data.Table Tabular data with gene expressions - we need that to compute scores. scoring : str, optional (default=SCORING_EXP_RATIO) Type of scoring Returns ------- Orange.data.Table Annotation probabilities Orange.data.Table Annotation fdrs """ # checks that assures that data are ok assert TAX_ID in data.attributes, "The input table needs to have a " "tax_id attribute" assert any( "Entrez ID" in x.attributes for x in data.domain.attributes ), "Input data do not contain gene expression data." # retrieve number of genes tax_id = data.attributes[TAX_ID] n = len(GeneInfo(tax_id)) # number of genes for organism # transform data to pandas dataframe df_z_values, _ = AnnotateSamplesMeta._to_pandas(z_values, use_entrez_id=True) df_data, _ = AnnotateSamplesMeta._to_pandas(data, use_entrez_id=True) # transform marker genes columns = list(map(str, available_annotations.domain.metas)) # the framework recognizes Gene instead of Entrez ID columns[columns.index("Entrez ID")] = "Gene" df_available_annotations = pd.DataFrame(available_annotations.metas, columns=columns) df_available_annotations = df_available_annotations[df_available_annotations["Gene"] != "?"] # call the method scores, fdrs = AnnotateSamples.assign_annotations( df_z_values, df_available_annotations, df_data, n, z_threshold=z_threshold, p_value_fun=p_value_fun, scoring=scoring, ) # create orange tables domain = Domain([ContinuousVariable(ct) for ct in scores.columns.values]) scores_table = Table(domain, scores.values) fdrs_table = Table(domain, fdrs.values) return scores_table, fdrs_table
def test_warnings(self): domain = Domain([ContinuousVariable("x")]) self.assertWarns(OrangeDeprecationWarning, Table, domain) self.assertWarns(OrangeDeprecationWarning, Table, domain, Table()) self.assertWarns(OrangeDeprecationWarning, Table, domain, [[12]]) self.assertWarns(OrangeDeprecationWarning, Table, np.zeros((5, 5)))
def _regression_output_columns(self): slots = self._valid_predictors() newmetas = [ContinuousVariable(name=p.name) for p in slots] newcolumns = [p.results[0].reshape((-1, 1)) for p in slots] return newmetas, newcolumns
def test_continous(self): X = ContinuousVariable("X") self._test_common(X)
def send_data(self): if self.optimize_k: row = self.selected_row() k = self.k_from + row if row is not None else None else: k = self.k km = self.clusterings.get(k) if self.data is None or km is None or isinstance(km, str): self.Outputs.annotated_data.send(None) self.Outputs.centroids.send(None) return domain = self.data.domain cluster_var = DiscreteVariable( get_unique_names(domain, "Cluster"), values=["C%d" % (x + 1) for x in range(km.k)]) clust_ids = km.labels silhouette_var = ContinuousVariable( get_unique_names(domain, "Silhouette")) if km.silhouette_samples is not None: self.Warning.no_silhouettes.clear() scores = np.arctan(km.silhouette_samples) / np.pi + 0.5 clust_scores = [] for i in range(km.k): in_clust = clust_ids == i if in_clust.any(): clust_scores.append(np.mean(scores[in_clust])) else: clust_scores.append(0.) clust_scores = np.atleast_2d(clust_scores).T else: self.Warning.no_silhouettes() scores = np.nan clust_scores = np.full((km.k, 1), np.nan) new_domain = add_columns(domain, metas=[cluster_var, silhouette_var]) new_table = self.data.transform(new_domain) with new_table.unlocked(new_table.metas): new_table.get_column_view(cluster_var)[0][:] = clust_ids new_table.get_column_view(silhouette_var)[0][:] = scores domain_attributes = set(domain.attributes) centroid_attributes = [ attr.compute_value.variable if isinstance(attr.compute_value, ReplaceUnknowns) and attr.compute_value.variable in domain_attributes else attr for attr in km.domain.attributes ] centroid_domain = add_columns(Domain(centroid_attributes, [], domain.metas), metas=[cluster_var, silhouette_var]) # Table is constructed from a copy of centroids: if data is stored in # the widget, it can be modified, so the widget should preferrably # output a copy. The number of centroids is small, hence copying it is # cheap. centroids = Table( centroid_domain, km.centroids.copy(), None, np.hstack((np.full((km.k, len(domain.metas)), np.nan), np.arange(km.k).reshape(km.k, 1), clust_scores))) if self.data.name == Table.name: centroids.name = "centroids" else: centroids.name = f"{self.data.name} centroids" self.Outputs.annotated_data.send(new_table) self.Outputs.centroids.send(centroids)
def from_numpy(cls, X, Y=None, metas=None): """ Create a domain corresponding to the given numpy arrays. This method is usually invoked from :meth:`Orange.data.Table.from_numpy`. All attributes are assumed to be continuous and are named "Feature <n>". Target variables are discrete if the only two values are 0 and 1; otherwise they are continuous. Discrete targets are named "Class <n>" and continuous are named "Target <n>". Domain is marked as :attr:`anonymous`, so data from any other domain of the same shape can be converted into this one and vice-versa. :param `numpy.ndarray` X: 2-dimensional array with data :param Y: 1- of 2- dimensional data for target :type Y: `numpy.ndarray` or None :param `numpy.ndarray` metas: meta attributes :type metas: `numpy.ndarray` or None :return: a new domain :rtype: :class:`Domain` """ def get_places(max_index): return 0 if max_index == 1 else int(log(max_index, 10)) + 1 def get_name(base, index, places): return base if not places \ else "{} {:0{}}".format(base, index + 1, places) if X.ndim != 2: raise ValueError('X must be a 2-dimensional array') n_attrs = X.shape[1] places = get_places(n_attrs) attr_vars = [ ContinuousVariable(name=get_name("Feature", a, places)) for a in range(n_attrs) ] class_vars = [] if Y is not None: if Y.ndim == 1: Y = Y.reshape(len(Y), 1) elif Y.ndim != 2: raise ValueError('Y has invalid shape') n_classes = Y.shape[1] places = get_places(n_classes) for i, values in enumerate(Y.T): if set(values) == {0, 1}: name = get_name('Class', i, places) values = ['v1', 'v2'] class_vars.append(DiscreteVariable(name, values)) else: name = get_name('Target', i + 1, places) class_vars.append(ContinuousVariable(name)) if metas is not None: n_metas = metas.shape[1] places = get_places(n_metas) meta_vars = [ StringVariable(get_name("Meta", m, places)) for m in range(n_metas) ] else: meta_vars = [] domain = cls(attr_vars, class_vars, meta_vars) domain.anonymous = True return domain
def vars_from_df(df, role=None, force_nominal=False): if role is None and hasattr(df, 'orange_role'): _role = df.orange_role else: _role = role # If df index is not a simple RangeIndex (or similar), put it into data if not any(str(i).startswith('_o') for i in df.index) \ and not (df.index.is_integer() and (df.index.is_monotonic_increasing or df.index.is_monotonic_decreasing)): df = df.reset_index() Xcols, Ycols, Mcols = [], [], [] Xexpr, Yexpr, Mexpr = [], [], [] attrs, class_vars, metas = [], [], [] contains_strings = _role == Role.Meta for column in df.columns: s = df[column] if hasattr(df, 'orange_variables') and column in df.orange_variables: original_var = df.orange_variables[column] var = original_var.copy(compute_value=None) if _role == Role.Attribute: Xcols.append(column) Xexpr.append(None) attrs.append(var) elif _role == Role.ClassAttribute: Ycols.append(column) Yexpr.append(None) class_vars.append(var) else: # if role == Role.Meta: Mcols.append(column) Mexpr.append(None) metas.append(var) elif _is_discrete(s, force_nominal): discrete = s.astype('category').cat var = DiscreteVariable(str(column), discrete.categories.astype(str).tolist()) attrs.append(var) Xcols.append(column) Xexpr.append(lambda s, _: np.asarray( s.astype('category').cat.codes.replace(-1, np.nan) )) elif _is_datetime(s): var = TimeVariable(str(column)) s = pd.to_datetime(s, infer_datetime_format=True) attrs.append(var) Xcols.append(column) Xexpr.append(lambda s, v: np.asarray( s.astype('str').replace('NaT', np.nan).map(v.parse) )) elif is_numeric_dtype(s): var = ContinuousVariable( # set number of decimals to 0 if int else keeps default behaviour str(column), number_of_decimals=(0 if is_integer_dtype(s) else None) ) attrs.append(var) Xcols.append(column) Xexpr.append(None) else: contains_strings = True var = StringVariable(str(column)) metas.append(var) Mcols.append(column) Mexpr.append(lambda s, _: np.asarray(s, dtype=object)) # if role isn't explicitly set, try to # export dataframes into one contiguous block. # for this all columns must be of the same role if isinstance(df, OrangeDataFrame) \ and not role \ and contains_strings \ and not force_nominal: attrs.extend(class_vars) attrs.extend(metas) metas = attrs Xcols.extend(Ycols) Xcols.extend(Mcols) Mcols = Xcols Xexpr.extend(Yexpr) Xexpr.extend(Mexpr) Mexpr = Xexpr attrs, class_vars = [], [] Xcols, Ycols = [], [] Xexpr, Yexpr = [], [] XYM = [] for Avars, Acols, Aexpr in zip( (attrs, class_vars, metas), (Xcols, Ycols, Mcols), (Xexpr, Yexpr, Mexpr)): if not Acols: A = None if Acols != Xcols else np.empty((df.shape[0], 0)) XYM.append(A) continue if not any(Aexpr): Adf = df if all(c in Acols for c in df.columns) else df[Acols] if all(isinstance(a, SparseDtype) for a in Adf.dtypes): A = csr_matrix(Adf.sparse.to_coo()) else: A = np.asarray(Adf) XYM.append(A) continue # we'll have to copy the table to resolve any expressions # TODO eliminate expr (preprocessing for pandas -> table) A = np.array([expr(df[col], var) if expr else np.asarray(df[col]) for var, col, expr in zip(Avars, Acols, Aexpr)]).T XYM.append(A) return XYM, Domain(attrs, class_vars, metas)
def setUp(self): z = ContinuousVariable("z") w = ContinuousVariable("w") u = ContinuousVariable("u") self.descs = [owcolor.ContAttrDesc(v) for v in (z, w, u)] self.model = owcolor.ContColorTableModel()
def table_from_frame(df, *, force_nominal=False): """ Convert pandas.DataFrame to Orange.data.Table Parameters ---------- df : pandas.DataFrame force_nominal : boolean If True, interpret ALL string columns as nominal (DiscreteVariable). Returns ------- Table """ def _is_discrete(s): return (is_categorical_dtype(s) or is_object_dtype(s) and (force_nominal or s.nunique() < s.size**.666)) def _is_datetime(s): if is_datetime64_any_dtype(s): return True try: if is_object_dtype(s): pd.to_datetime(s, infer_datetime_format=True) return True except Exception: # pylint: disable=broad-except pass return False # If df index is not a simple RangeIndex (or similar), put it into data if not (df.index.is_integer() and (df.index.is_monotonic_increasing or df.index.is_monotonic_decreasing)): df = df.reset_index() attrs, metas = [], [] X, M = [], [] # Iter over columns for name, s in df.items(): name = str(name) if _is_discrete(s): discrete = s.astype('category').cat attrs.append( DiscreteVariable(name, discrete.categories.astype(str).tolist())) X.append(discrete.codes.replace(-1, np.nan).values) elif _is_datetime(s): tvar = TimeVariable(name) attrs.append(tvar) s = pd.to_datetime(s, infer_datetime_format=True) X.append( s.astype('str').replace('NaT', np.nan).map(tvar.parse).values) elif is_numeric_dtype(s): attrs.append(ContinuousVariable(name)) X.append(s.values) else: metas.append(StringVariable(name)) M.append(s.values.astype(object)) return Table.from_numpy( Domain(attrs, None, metas), np.column_stack(X) if X else np.empty((df.shape[0], 0)), None, np.column_stack(M) if M else None)
def test_invalid_input_colors(self): a = ContinuousVariable("a") a.attributes["colors"] = "invalid" t = Table.from_domain(Domain([a])) self.send_signal(self.widget.Inputs.data, t)
def test_var_key(self): self.assertEqual(variable_key(ContinuousVariable("foo")), ("foo", False)) self.assertEqual(variable_key(TimeVariable("bar")), ("bar", True))
def setUp(self): x = ContinuousVariable("x") self.desc = owcolor.ContAttrDesc(x)
def setUp(self): domain = Domain([ContinuousVariable('a'), ContinuousVariable('b')]) self.data = Table.from_numpy(domain, np.zeros((3, 2))) self.data[1:, 1] = 7
from functools import wraps from itertools import chain from typing import Callable import numpy as np from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \ DiscreteVariable, TimeVariable from Orange.widgets.tests.base import WidgetTest from Orange.widgets.tests.utils import simulate from orangecontrib.prototypes.widgets.owfeaturestatistics import \ OWFeatureStatistics # Continuous variable variations continuous_full = [ ContinuousVariable('continuous_full'), np.array([0, 1, 2, 3, 4], dtype=float), ] continuous_missing = [ ContinuousVariable('continuous_missing'), np.array([0, 1, 2, np.nan, 4], dtype=float), ] continuous_all_missing = [ ContinuousVariable('continuous_all_missing'), np.array([np.nan] * 5, dtype=float), ] continuous_same = [ ContinuousVariable('continuous_same'), np.array([3] * 5, dtype=float), ] continuous = [