コード例 #1
0
    def __get_pivot_tab_domain(self, val_var, X, X_h, X_v, X_t, agg_funs):
        def map_values(index, _X):
            values = np.unique(_X[:, index])
            values = np.delete(values, np.where(values == "nan")[0])
            for j, value in enumerate(values):
                _X[:, index][_X[:, index] == value] = j
            return values

        create_time_var = \
            isinstance(val_var, TimeVariable) and \
            all(fun in self.TimeVarFunctions for fun in agg_funs)
        create_cont_var = \
            not val_var or val_var.is_continuous and \
            (not isinstance(val_var, TimeVariable) or
             all(fun in self.FloatFunctions for fun in agg_funs))

        vals = np.array(self._col_var.values)[self._col_var_groups.astype(int)]
        if create_time_var:
            kwargs = {
                "have_date": val_var.have_date,
                "have_time": val_var.have_time
            }
            attrs = [[TimeVariable(f"{v}", **kwargs) for v in vals]] * 2
            attrs.extend([[TimeVariable("Total", **kwargs)]] * 2)
        elif create_cont_var:
            attrs = [[ContinuousVariable(f"{v}", 1) for v in vals]] * 2
            attrs.extend([[ContinuousVariable("Total", 1)]] * 2)
        else:
            attrs = []
            for x in (X, X_h):
                attrs.append([
                    DiscreteVariable(f"{v}", map_values(i, x))
                    for i, v in enumerate(vals, 2)
                ])
            for x in (X_v, X_t):
                attrs.append([DiscreteVariable("Total", map_values(0, x))])
        row_var_h = DiscreteVariable(self._row_var.name, values=["Total"])
        aggr_attr = DiscreteVariable('Aggregate', [str(f) for f in agg_funs])

        same_row_col = self._col_var is self._row_var

        extra_vars = [self._row_var, aggr_attr]
        uniq_a = get_unique_names_duplicates([v.name for v in extra_vars] +
                                             [atr.name for atr in attrs[0]])
        for (idx, var), u in zip(enumerate(chain(extra_vars, attrs[0])),
                                 uniq_a):
            if var.name == u:
                continue
            if idx == 0:
                self.renamed.append(self._row_var.name)
                self._row_var = self._row_var.copy(name=u)
                if same_row_col:
                    self._col_var = self._row_var
                row_var_h = row_var_h.copy(name=u)
            elif idx == 1:
                self.renamed.append(aggr_attr.name)
                aggr_attr = aggr_attr.copy(name=u)
            else:
                self.renamed.append(var.name)
                attrs[0][idx - 2] = var.copy(name=u)
                attrs[1][idx - 2] = var.copy(name=u)

        if same_row_col:
            vals = tuple(v.name for v in attrs[0])
            self._row_var.make(self._row_var.name, values=vals)
            vals = tuple(v.name for v in attrs[2])
            row_var_h.make(row_var_h.name, vals)

        return (Domain([self._row_var, aggr_attr] + attrs[0]),
                Domain([row_var_h, aggr_attr] + attrs[1]), Domain(attrs[2]),
                Domain(attrs[3]))
コード例 #2
0
# Test methods with descriptive names can omit docstrings
# pylint: disable=missing-docstring

from unittest import TestCase
from unittest.mock import Mock

from Orange.data import ContinuousVariable, DiscreteVariable, Domain
from Orange.widgets.settings import ContextSetting, PerfectDomainContextHandler, Context, Setting
from Orange.widgets.utils import vartype

Continuous = vartype(ContinuousVariable("x"))
Discrete = vartype(DiscreteVariable("x"))


class TestPerfectDomainContextHandler(TestCase):
    def setUp(self):
        self.domain = Domain(attributes=[
            ContinuousVariable('c1'),
            DiscreteVariable('d1', values='abc'),
            DiscreteVariable('d2', values='def')
        ],
                             class_vars=[DiscreteVariable('d3', values='ghi')],
                             metas=[
                                 ContinuousVariable('c2'),
                                 DiscreteVariable('d4', values='jkl')
                             ])
        self.args = (self.domain, (('c1', Continuous), ('d1', Discrete),
                                   ('d2', Discrete)), (('d3', Discrete), ),
                     (('c2', Continuous), ('d4', Discrete)))
        self.args_match_all = (self.domain,
                               (('c1', Continuous), ('d1', list('abc')),
コード例 #3
0
 def constr_vars(inds):
     if inds:
         return [ContinuousVariable(x.decode("utf-8")) for _, x in
                 sorted((ind, name) for name, ind in inds.items())]
コード例 #4
0
    def test_different_metas(self):
        """
        Test weather widget do not show error when data and a reference have
        domain that differ only in metas
        """
        w = self.widget

        domain = Domain([ContinuousVariable("a"),
                         ContinuousVariable("b")],
                        metas=[ContinuousVariable("c")])
        data = Table(domain,
                     np.random.rand(15, len(domain.attributes)),
                     metas=np.random.rand(15, len(domain.metas)))

        # same domain with same metas no error
        self.send_signal(w.Inputs.data, data)
        self.send_signal(w.Inputs.reference, data[:1])
        self.assertFalse(w.Error.diff_domains.is_shown())
        output = self.get_output(w.Outputs.data)
        self.assertEqual(10, len(output))

        # same domain with different metas no error
        domain_ref = Domain(domain.attributes, metas=[ContinuousVariable("d")])
        reference = Table(domain_ref,
                          np.random.rand(1, len(domain_ref.attributes)),
                          metas=np.random.rand(1, len(domain.metas)))
        self.send_signal(w.Inputs.data, data)
        self.send_signal(w.Inputs.reference, reference)
        self.assertFalse(w.Error.diff_domains.is_shown())
        output = self.get_output(w.Outputs.data)
        self.assertEqual(10, len(output))

        # same domain with different order - no error
        domain_ref = Domain(domain.attributes[::-1])
        reference = Table(domain_ref,
                          np.random.rand(1, len(domain_ref.attributes)))
        self.send_signal(w.Inputs.data, data)
        self.send_signal(w.Inputs.reference, reference)
        self.assertFalse(w.Error.diff_domains.is_shown())
        output = self.get_output(w.Outputs.data)
        self.assertEqual(10, len(output))

        # same domain with different number of metas no error
        domain_ref = Domain(
            domain.attributes,
            metas=[ContinuousVariable("d"),
                   ContinuousVariable("e")])
        reference = Table(domain_ref,
                          np.random.rand(1, len(domain_ref.attributes)),
                          metas=np.random.rand(1, len(domain_ref.metas)))
        self.send_signal(w.Inputs.data, data)
        self.send_signal(w.Inputs.reference, reference)
        self.assertFalse(w.Error.diff_domains.is_shown())
        output = self.get_output(w.Outputs.data)
        self.assertEqual(10, len(output))

        # different domain with same metas - error shown
        domain_ref = Domain(domain.attributes + (ContinuousVariable("e"), ),
                            metas=[ContinuousVariable("c")])
        reference = Table(domain_ref,
                          np.random.rand(1, len(domain_ref.attributes)),
                          metas=np.random.rand(1, len(domain_ref.metas)))
        self.send_signal(w.Inputs.data, data)
        self.send_signal(w.Inputs.reference, reference)
        self.assertTrue(w.Error.diff_domains.is_shown())
        output = self.get_output(w.Outputs.data)
        self.assertIsNone(output)
コード例 #5
0
 def _new_var(self):
     name = self._new_var_name()
     if self.operation in self.TimePreserving \
             and all(isinstance(var, TimeVariable) for var in self.variables):
         return TimeVariable(name)
     return ContinuousVariable(name)
コード例 #6
0
class TestInstance(unittest.TestCase):
    attributes = ["Feature %i" % i for i in range(10)]
    class_vars = ["Class %i" % i for i in range(1)]
    metas = [DiscreteVariable("Meta 1", values="XYZ"),
             ContinuousVariable("Meta 2"),
             StringVariable("Meta 3")]

    def mock_domain(self, with_classes=False, with_metas=False):
        attributes = self.attributes
        class_vars = self.class_vars if with_classes else []
        metas = self.metas if with_metas else []
        variables = attributes + class_vars
        return MagicMock(Domain,
                         attributes=attributes,
                         class_vars=class_vars,
                         metas=metas,
                         variables=variables)

    def create_domain(self, attributes=(), classes=(), metas=()):
        attr_vars = [ContinuousVariable(name=a) if isinstance(a, str) else a
                     for a in attributes]
        class_vars = [ContinuousVariable(name=c) if isinstance(c, str) else c
                      for c in classes]
        meta_vars = [DiscreteVariable(name=m, values=map(str, range(5)))
                     if isinstance(m, str) else m
                     for m in metas]
        domain = Domain(attr_vars, class_vars, meta_vars)
        return domain

    def test_init_x_no_data(self):
        domain = self.mock_domain()
        inst = Instance(domain)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._values.shape, (len(self.attributes), ))
        self.assertEqual(inst._x.shape, (len(self.attributes), ))
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))
        self.assertTrue(all(isnan(x) for x in inst._values))
        self.assertTrue(all(isnan(x) for x in inst._x))

    def test_init_xy_no_data(self):
        domain = self.mock_domain(with_classes=True)
        inst = Instance(domain)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._values.shape,
                         (len(self.attributes) + len(self.class_vars), ))
        self.assertEqual(inst._x.shape, (len(self.attributes), ))
        self.assertEqual(inst._y.shape, (len(self.class_vars), ))
        self.assertEqual(inst._metas.shape, (0, ))
        self.assertTrue(all(isnan(x) for x in inst._values))
        self.assertTrue(all(isnan(x) for x in inst._x))
        self.assertTrue(all(isnan(x) for x in inst._y))

    def test_init_xym_no_data(self):
        domain = self.mock_domain(with_classes=True, with_metas=True)
        inst = Instance(domain)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._values.shape,
                         (len(self.attributes) + len(self.class_vars), ))
        self.assertEqual(inst._x.shape, (len(self.attributes), ))
        self.assertEqual(inst._y.shape, (len(self.class_vars), ))
        self.assertEqual(inst._metas.shape, (3, ))
        self.assertTrue(all(isnan(x) for x in inst._values))
        self.assertTrue(all(isnan(x) for x in inst._x))
        self.assertTrue(all(isnan(x) for x in inst._y))
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", FutureWarning)
            assert_array_equal(inst._metas, np.array([Unknown, Unknown, None]))

    def test_init_x_arr(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")])
        vals = np.array([42, 0])
        inst = Instance(domain, vals)
        assert_array_equal(inst._values, vals)
        assert_array_equal(inst._x, vals)
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))

        domain = self.create_domain()
        inst = Instance(domain, np.empty((0,)))
        self.assertEqual(inst._x.shape, (0, ))
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))


    def test_init_x_list(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")])
        lst = [42, 0]
        vals = np.array(lst)
        inst = Instance(domain, vals)
        assert_array_equal(inst._values, vals)
        assert_array_equal(inst._x, vals)
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))

        domain = self.create_domain()
        inst = Instance(domain, [])
        self.assertEqual(inst._x.shape, (0, ))
        self.assertEqual(inst._y.shape, (0, ))
        self.assertEqual(inst._metas.shape, (0, ))

    def test_init_xy_arr(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")])
        vals = np.array([42, 0, 1])
        inst = Instance(domain, vals)
        assert_array_equal(inst._values, vals)
        assert_array_equal(inst._x, vals[:2])
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._y[0], 1)
        self.assertEqual(inst._metas.shape, (0, ))

    def test_init_xy_list(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")])
        lst = [42, "M", "C"]
        vals = np.array([42, 0, 2])
        inst = Instance(domain, vals)
        assert_array_equal(inst._values, vals)
        assert_array_equal(inst._x, vals[:2])
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._y[0], 2)
        self.assertEqual(inst._metas.shape, (0, ))

    def test_init_xym_arr(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        vals = np.array([42, "M", "B", "X", 43, "Foo"], dtype=object)
        inst = Instance(domain, vals)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._values.shape, (3, ))
        self.assertEqual(inst._x.shape, (2, ))
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._metas.shape, (3, ))
        assert_array_equal(inst._values, np.array([42, 0, 1]))
        assert_array_equal(inst._x, np.array([42, 0]))
        self.assertEqual(inst._y[0], 1)
        assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object))

    def test_init_xym_list(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)
        self.assertIsInstance(inst, Instance)
        self.assertIs(inst.domain, domain)
        self.assertEqual(inst._values.shape, (3, ))
        self.assertEqual(inst._x.shape, (2, ))
        self.assertEqual(inst._y.shape, (1, ))
        self.assertEqual(inst._metas.shape, (3, ))
        assert_array_equal(inst._values, np.array([42, 0, 1]))
        assert_array_equal(inst._x, np.array([42, 0]))
        self.assertEqual(inst._y[0], 1)
        assert_array_equal(inst._metas, np.array([0, 43, "Foo"], dtype=object))

    def test_init_inst(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)

        inst2 = Instance(domain, inst)
        assert_array_equal(inst2._values, np.array([42, 0, 1]))
        assert_array_equal(inst2._x, np.array([42, 0]))
        self.assertEqual(inst2._y[0], 1)
        assert_array_equal(inst2._metas, np.array([0, 43, "Foo"], dtype=object))

        domain2 = self.create_domain(["z", domain[1], self.metas[1]],
                                     domain.class_vars,
                                     [self.metas[0], "w", domain[0]])
        inst2 = Instance(domain2, inst)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", FutureWarning)
            assert_array_equal(inst2._values, np.array([Unknown, 0, 43, 1]))
            assert_array_equal(inst2._x, np.array([Unknown, 0, 43]))
            self.assertEqual(inst2._y[0], 1)
            assert_array_equal(inst2._metas, np.array([0, Unknown, 42],
                                                      dtype=object))

    def test_get_item(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)

        val = inst[0]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[0], 42)
        self.assertEqual(inst["x"], 42)
        self.assertEqual(inst[domain[0]], 42)

        val = inst[1]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[1], "M")
        self.assertEqual(inst["g"], "M")
        self.assertEqual(inst[domain[1]], "M")

        val = inst[2]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[2], "B")
        self.assertEqual(inst["y"], "B")
        self.assertEqual(inst[domain.class_var], "B")

        val = inst[-2]
        self.assertIsInstance(val, Value)
        self.assertEqual(inst[-2], 43)
        self.assertEqual(inst["Meta 2"], 43)
        self.assertEqual(inst[self.metas[1]], 43)

        with self.assertRaises(ValueError):
            inst["asdf"] = 42
        with self.assertRaises(ValueError):
            inst[ContinuousVariable("asdf")] = 42

    def test_set_item(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)

        inst[0] = 43
        self.assertEqual(inst[0], 43)
        inst["x"] = 44
        self.assertEqual(inst[0], 44)
        inst[domain[0]] = 45
        self.assertEqual(inst[0], 45)

        inst[1] = "F"
        self.assertEqual(inst[1], "F")
        inst["g"] = "M"
        self.assertEqual(inst[1], "M")
        with self.assertRaises(ValueError):
            inst[1] = "N"
        with self.assertRaises(ValueError):
            inst["asdf"] = 42

        inst[2] = "C"
        self.assertEqual(inst[2], "C")
        inst["y"] = "A"
        self.assertEqual(inst[2], "A")
        inst[domain.class_var] = "B"
        self.assertEqual(inst[2], "B")

        inst[-1] = "Y"
        self.assertEqual(inst[-1], "Y")
        inst["Meta 1"] = "Z"
        self.assertEqual(inst[-1], "Z")
        inst[domain.metas[0]] = "X"
        self.assertEqual(inst[-1], "X")

    def test_str(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")])
        inst = Instance(domain, [42, 0])
        self.assertEqual(str(inst), "[42.000, M]")

        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")])
        inst = Instance(domain, [42, "M", "B"])
        self.assertEqual(str(inst), "[42.000, M | B]")

        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        inst = Instance(domain, [42, "M", "B", "X", 43, "Foo"])
        self.assertEqual(str(inst), "[42.000, M | B] {X, 43.000, Foo}")

        domain = self.create_domain([],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        inst = Instance(domain, ["B", "X", 43, "Foo"])
        self.assertEqual(str(inst), "[ | B] {X, 43.000, Foo}")

        domain = self.create_domain([],
                                    [],
                                    self.metas)
        inst = Instance(domain, ["X", 43, "Foo"])
        self.assertEqual(str(inst), "[] {X, 43.000, Foo}")

        domain = self.create_domain(self.attributes)
        inst = Instance(domain, range(len(self.attributes)))
        self.assertEqual(str(inst), "[0.000, 1.000, 2.000, 3.000, 4.000, ...]")

        for attr in domain:
            attr.number_of_decimals = 0
        self.assertEqual(str(inst), "[0, 1, 2, 3, 4, ...]")

    def test_eq(self):
        domain = self.create_domain(["x", DiscreteVariable("g", values="MF")],
                                    [DiscreteVariable("y", values="ABC")],
                                    self.metas)
        vals = [42, "M", "B", "X", 43, "Foo"]
        inst = Instance(domain, vals)
        inst2 = Instance(domain, vals)
        self.assertTrue(inst == inst2)
        self.assertTrue(inst2 == inst)

        inst2[0] = 43
        self.assertFalse(inst == inst2)

        inst2[0] = Unknown
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[2] = "C"
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[-1] = "Y"
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[-2] = "33"
        self.assertFalse(inst == inst2)

        inst2 = Instance(domain, vals)
        inst2[-3] = "Bar"
        self.assertFalse(inst == inst2)
コード例 #7
0
 def _get_projection_variables(self):
     names = get_unique_names(self.data.domain,
                              self.embedding_variables_names)
     return ContinuousVariable(names[0]), ContinuousVariable(names[1])
コード例 #8
0
ファイル: test_variable.py プロジェクト: tojojames/orange3
            stream.close = lambda: None  # HACK: Prevent closing of streams

        table = CSVReader(input_csv).read()
        self.assertIsInstance(table.domain['Date'], TimeVariable)
        self.assertEqual(table[0, 'Date'], '1920-12-12')
        # Dates before 1970 are negative
        self.assertTrue(all(inst['Date'] < 0 for inst in table))

        CSVReader.write_file(output_csv, table)
        self.assertEqual(input_csv.getvalue().splitlines(),
                         output_csv.getvalue().splitlines())


PickleContinuousVariable = create_pickling_tests(
    "PickleContinuousVariable",
    ("with_name", lambda: ContinuousVariable(name="Feature 0")),
)

PickleDiscreteVariable = create_pickling_tests(
    "PickleDiscreteVariable",
    ("with_name", lambda: DiscreteVariable(name="Feature 0")),
    ("with_int_values",
     lambda: DiscreteVariable(name="Feature 0", values=[1, 2, 3])),
    ("with_str_value",
     lambda: DiscreteVariable(name="Feature 0", values=["F", "M"])),
    ("ordered", lambda: DiscreteVariable(
        name="Feature 0", values=["F", "M"], ordered=True)),
    ("with_base_value", lambda: DiscreteVariable(
        name="Feature 0", values=["F", "M"], base_value=0)))

PickleStringVariable = create_pickling_tests(
コード例 #9
0
ファイル: test_variable.py プロジェクト: tojojames/orange3
 def test_strange_eq(self):
     a = ContinuousVariable()
     b = ContinuousVariable()
     self.assertEqual(a, a)
     self.assertNotEqual(a, b)
     self.assertNotEqual(a, "somestring")
コード例 #10
0
ファイル: test_owmergedata.py プロジェクト: sckevmit/orange3
    def test_match_attr_name(self):
        widget = self.widget
        row = widget.attr_boxes.rows[0]
        data_combo, extra_combo = row.left_combo, row.right_combo

        domainA = Domain([DiscreteVariable("dA1", ("a", "b", "c", "d")),
                          DiscreteVariable("dA2", ("aa", "bb")),
                          DiscreteVariable("dA3", ("aa", "bb"))],
                         DiscreteVariable("cls", ("aaa", "bbb", "ccc")),
                         [DiscreteVariable("mA1", ("cc", "dd")),
                          StringVariable("mA2")])
        XA = np.array([[0, 0, 0], [1, 1, 0], [2, 0, 0], [3, 1, 0]])
        yA = np.array([0, 1, 2, np.nan])
        metasA = np.array([[0.0, "m1"], [1.0, "m2"], [np.nan, "m3"],
                           [0.0, "m4"]]).astype(object)

        domainB = Domain([DiscreteVariable("dB1", values=("a", "b", "c")),
                          ContinuousVariable("dA2")],
                         None,
                         [StringVariable("cls"),
                          DiscreteVariable("dA1", ("m4", "m5"))])
        XB = np.array([[0, 0], [1, 1], [2, np.nan]])
        yB = np.empty((3, 0))
        metasB = np.array([[np.nan, np.nan], [1, 1], [0, 0]]).astype(object)
        dataA = Table(domainA, XA, yA, metasA)
        dataA.name = 'dataA'
        dataA.attributes = 'dataA attributes'
        dataB = Table(domainB, XB, yB, metasB)
        dataB.name = 'dataB'
        dataB.attributes = 'dataB attributes'

        self.send_signal(widget.Inputs.data, dataA)
        self.send_signal(widget.Inputs.extra_data, dataB)

        # match variable if available and the other combo is Row Index
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 5)

        # match variable if available and the other combo is ID
        extra_combo.setCurrentIndex(1)
        extra_combo.activated.emit(1)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 5)

        # don't match variable if other combo is set
        extra_combo.setCurrentIndex(4)
        extra_combo.activated.emit(4)
        data_combo.setCurrentIndex(2)
        data_combo.activated.emit(2)
        self.assertEqual(extra_combo.currentIndex(), 4)

        # don't match if nothing to match to
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(4)
        data_combo.activated.emit(4)
        self.assertEqual(extra_combo.currentIndex(), 0)

        # don't match numeric with non-numeric
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(3)
        data_combo.activated.emit(3)
        self.assertEqual(extra_combo.currentIndex(), 0)

        # allow matching string with discrete
        extra_combo.setCurrentIndex(0)
        extra_combo.activated.emit(0)
        data_combo.setCurrentIndex(5)
        data_combo.activated.emit(5)
        self.assertEqual(extra_combo.currentIndex(), 4)
コード例 #11
0
ファイル: test_owmergedata.py プロジェクト: sckevmit/orange3
    def test_nonunique(self):
        widget = self.widget
        x = ContinuousVariable("x")
        d = DiscreteVariable("d", values=tuple("abc"))
        domain = Domain([x, d], [])
        dataA = Table.from_numpy(
            domain, np.array([[1.0, 0], [1, 1], [2, 1]]))
        dataB = Table.from_numpy(
            domain, np.array([[1.0, 0], [2, 1], [3, 1]]))
        dataB.ids = dataA.ids
        self.send_signal(widget.Inputs.data, dataA)
        self.send_signal(widget.Inputs.extra_data, dataB)
        widget.merging = widget.InnerJoin

        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())

        widget.attr_boxes.set_state([(INSTANCEID, INSTANCEID)])
        widget.commit.now()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.attr_boxes.set_state([(INDEX, INDEX)])
        widget.commit.now()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.attr_boxes.set_state([(x, x)])
        widget.commit.now()
        self.assertTrue(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.LeftJoin
        widget.commit.now()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.InnerJoin
        widget.attr_boxes.set_state([(x, x), (d, d)])
        widget.commit.now()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNotNone(self.get_output(widget.Outputs.data))

        widget.attr_boxes.set_state([(d, d)])
        widget.commit.now()
        self.assertTrue(widget.Error.nonunique_left.is_shown())
        self.assertTrue(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.LeftJoin
        widget.commit.now()
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertTrue(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        widget.merging = widget.InnerJoin
        widget.commit.now()
        self.assertTrue(widget.Error.nonunique_left.is_shown())
        self.assertTrue(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))

        self.send_signal(widget.Inputs.data, None)
        self.send_signal(widget.Inputs.extra_data, None)
        self.assertFalse(widget.Error.nonunique_left.is_shown())
        self.assertFalse(widget.Error.nonunique_right.is_shown())
        self.assertIsNone(self.get_output(widget.Outputs.data))
コード例 #12
0
    def test_select_data(self):
        """
        Test select data function
        """
        w = self.widget

        # test for none data
        self.send_signal("Data", None)

        self.assertIsNone(w.select_data())  # result is none

        # test on iris
        self.send_signal("Data", self.iris)
        self.assertEqual(len(w.select_data()), len(self.iris))
        self.assertEqual(len(w.select_data().domain.attributes), 2)
        self.assertEqual(len(w.select_data().domain.class_var.values), 2)
        self.assertEqual(w.select_data().domain.class_var.values[1], 'Others')
        self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x)
        self.assertEqual(w.select_data().domain.attributes[1].name, w.attr_y)
        self.assertEqual(w.select_data().domain.class_var.values[0],
                         w.target_class)

        # test on housing - continuous class
        self.send_signal("Data", self.housing)
        self.assertEqual(len(w.select_data()), len(self.housing))
        self.assertEqual(len(w.select_data().domain.attributes), 1)
        self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x)
        self.assertTrue(w.select_data().domain.class_var.is_continuous)

        # test with data set for logistic regression - class discrete
        # there no other class value is provided
        domain = Domain([ContinuousVariable('a'),
                         ContinuousVariable('b')],
                        DiscreteVariable('c', values=['a', 'b']))
        data = Table(domain, [[1, 2], [1, 2]], [0, 1])

        self.send_signal("Data", data)
        self.assertEqual(len(w.select_data()), len(data))
        self.assertEqual(len(w.select_data().domain.attributes), 2)
        self.assertEqual(len(w.select_data().domain.class_var.values), 2)
        self.assertEqual(w.select_data().domain.class_var.values[1],
                         data.domain.class_var.values[1])
        self.assertEqual(w.select_data().domain.class_var.values[0],
                         data.domain.class_var.values[0])
        self.assertEqual(w.select_data().domain.attributes[0].name, w.attr_x)
        self.assertEqual(w.select_data().domain.attributes[1].name, w.attr_y)
        self.assertEqual(w.select_data().domain.class_var.values[0],
                         w.target_class)

        # selected data none when one column only Nones
        data = Table(
            Domain([ContinuousVariable('a'),
                    ContinuousVariable('b')],
                   DiscreteVariable('c', values=['a', 'b'])),
            [[1, None], [1, None]], [0, 1])
        self.send_signal("Data", data)
        selected_data = w.select_data()
        self.assertIsNone(selected_data)

        data = Table(
            Domain([ContinuousVariable('a'),
                    ContinuousVariable('b')],
                   DiscreteVariable('c', values=['a', 'b'])),
            [[None, None], [None, None]], [0, 1])
        self.send_signal("Data", data)
        selected_data = w.select_data()
        self.assertIsNone(selected_data)
コード例 #13
0
    def test_set_data(self):
        """
        Test set data
        """
        w = self.widget

        # test on init
        self.assertIsNone(w.data)
        self.assertEqual(w.cbx.count(), 0)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertIsNone(w.learner)
        self.assertIsNone(w.cost_grid)

        # call with none data
        self.send_signal("Data", None)
        self.assertIsNone(w.data)
        self.assertEqual(w.cbx.count(), 0)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertIsNone(w.learner)
        self.assertIsNone(w.cost_grid)

        # call with no class variable
        table_no_class = Table(
            Domain([ContinuousVariable("x"),
                    ContinuousVariable("y")]), [[1, 2], [2, 3]])
        self.send_signal("Data", table_no_class)
        self.assertIsNone(w.data)
        self.assertEqual(w.cbx.count(), 0)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertIsNone(w.learner)
        self.assertIsNone(w.cost_grid)
        self.assertTrue(w.Error.no_class.is_shown())

        # with only one class value
        table_one_class = Table(
            Domain([ContinuousVariable("x"),
                    ContinuousVariable("y")],
                   DiscreteVariable("a", values=["k"])), [[1, 2], [2, 3]],
            [0, 0])
        self.send_signal("Data", table_one_class)
        self.assertIsNone(w.data)
        self.assertEqual(w.cbx.count(), 0)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertIsNone(w.learner)
        self.assertIsNone(w.cost_grid)
        self.assertTrue(w.Error.to_few_values.is_shown())

        # not enough continuous variables
        table_no_enough_cont = Table(
            Domain([
                ContinuousVariable("x"),
                DiscreteVariable("y", values=["a", "b"])
            ], DiscreteVariable("a", values=['a', 'b'])), [[1, 0], [2, 1]],
            [0, 1])
        self.send_signal("Data", table_no_enough_cont)
        self.assertIsNone(w.data)
        self.assertEqual(w.cbx.count(), 0)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertIsNone(w.learner)
        self.assertIsNone(w.cost_grid)
        self.assertTrue(w.Error.to_few_features.is_shown())

        # init with ok data, discrete class - logistic regression
        num_continuous_attributes = sum(True
                                        for var in self.iris.domain.attributes
                                        if isinstance(var, ContinuousVariable))

        self.send_signal("Data", self.iris)
        self.assertEqual(w.cbx.count(), num_continuous_attributes)
        self.assertEqual(w.cby.count(), num_continuous_attributes)
        self.assertEqual(w.target_class_combobox.count(),
                         len(self.iris.domain.class_var.values))
        self.assertEqual(w.cbx.currentText(), self.iris.domain[0].name)
        self.assertEqual(w.cby.currentText(), self.iris.domain[1].name)
        self.assertEqual(w.target_class_combobox.currentText(),
                         self.iris.domain.class_var.values[0])

        self.assertEqual(w.attr_x, self.iris.domain[0].name)
        self.assertEqual(w.attr_y, self.iris.domain[1].name)
        self.assertEqual(w.target_class, self.iris.domain.class_var.values[0])

        # change showed attributes
        w.attr_x = self.iris.domain[1].name
        w.attr_y = self.iris.domain[2].name
        w.target_class = self.iris.domain.class_var.values[1]

        self.assertEqual(w.cbx.currentText(), self.iris.domain[1].name)
        self.assertEqual(w.cby.currentText(), self.iris.domain[2].name)
        self.assertEqual(w.target_class_combobox.currentText(),
                         self.iris.domain.class_var.values[1])

        self.assertEqual(w.attr_x, self.iris.domain[1].name)
        self.assertEqual(w.attr_y, self.iris.domain[2].name)
        self.assertEqual(w.target_class, self.iris.domain.class_var.values[1])

        # remove data
        self.send_signal("Data", None)
        self.assertIsNone(w.data)
        self.assertEqual(w.cbx.count(), 0)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertIsNone(w.learner)
        self.assertIsNone(w.cost_grid)

        # not enough continuous variables when continuous class
        table_no_enough_cont = Table(
            Domain([DiscreteVariable("y", values=["a", "b"])],
                   ContinuousVariable("a")), [[1, 0], [2, 1]], [0, 1])
        self.send_signal("Data", table_no_enough_cont)
        self.assertIsNone(w.data)
        self.assertEqual(w.cbx.count(), 0)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertIsNone(w.learner)
        self.assertIsNone(w.cost_grid)
        self.assertTrue(w.Error.to_few_features.is_shown())

        # init with ok data, discrete class - linear regression
        num_continuous_attributes = sum(
            True for var in self.housing.domain.attributes
            if isinstance(var, ContinuousVariable))

        self.send_signal("Data", self.housing)
        self.assertEqual(w.cbx.count(), num_continuous_attributes)
        self.assertEqual(w.cby.count(), 0)
        self.assertEqual(w.target_class_combobox.count(), 0)
        self.assertFalse(w.cby.isEnabled())
        self.assertFalse(w.target_class_combobox.isEnabled())
        self.assertEqual(w.cbx.currentText(), self.housing.domain[0].name)

        self.assertEqual(w.attr_x, self.housing.domain[0].name)

        # change showed attributes
        w.attr_x = self.housing.domain[1].name

        self.assertEqual(w.cbx.currentText(), self.housing.domain[1].name)

        self.assertEqual(w.attr_x, self.housing.domain[1].name)
コード例 #14
0
ファイル: owpredictions.py プロジェクト: zyblx/orange3
 def _add_regression_out_columns(slot, newmetas, newcolumns):
     newmetas.append(ContinuousVariable(name=slot.predictor.name))
     newcolumns.append(slot.results.unmapped_predicted.reshape((-1, 1)))
コード例 #15
0
 def test_bool_raises_warning(self):
     self.assertWarns(OrangeDeprecationWarning, bool, Domain([]))
     self.assertWarns(OrangeDeprecationWarning, bool,
                      Domain([ContinuousVariable("y")]))
コード例 #16
0
ファイル: test_variable.py プロジェクト: tojojames/orange3
 def test_decimals(self):
     a = ContinuousVariable("a", 4)
     self.assertEqual(a.str_val(4.654321), "4.6543")
     self.assertEqual(a.str_val(Unknown), "?")
コード例 #17
0
    def assign_annotations(
        z_values, available_annotations, data, z_threshold=1, p_value_fun=PFUN_BINOMIAL, scoring=SCORING_EXP_RATIO
    ):
        """
        The function gets a set of attributes (e.g. genes) for each cell and
        attributes for each annotation. It returns the annotations significant
        for each cell.

        Parameters
        ----------
        z_values : Orange.data.Table
            Table which show z values for each item
        available_annotations : Orange.data.Table
            Available annotations (e.g. cell types)
        z_threshold : float
            The threshold for selecting the attribute. For each item the
            attributes with z-value above this value are selected.
        p_value_fun : str, optional (defaults: TEST_BINOMIAL)
            A function that calculates p-value. It can be either
            PFUN_BINOMIAL that uses statistics.Binomial().p_value or
            PFUN_HYPERGEOMETRIC that uses hypergeom.sf.
        data : Orange.data.Table
            Tabular data with gene expressions - we need that to compute scores.
        scoring : str, optional (default=SCORING_EXP_RATIO)
            Type of scoring

        Returns
        -------
        Orange.data.Table
            Annotation probabilities
        Orange.data.Table
            Annotation fdrs
        """
        # checks that assures that data are ok
        assert TAX_ID in data.attributes, "The input table needs to have a " "tax_id attribute"
        assert any(
            "Entrez ID" in x.attributes for x in data.domain.attributes
        ), "Input data do not contain gene expression data."

        # retrieve number of genes
        tax_id = data.attributes[TAX_ID]
        n = len(GeneInfo(tax_id))  # number of genes for organism

        # transform data to pandas dataframe
        df_z_values, _ = AnnotateSamplesMeta._to_pandas(z_values, use_entrez_id=True)
        df_data, _ = AnnotateSamplesMeta._to_pandas(data, use_entrez_id=True)
        # transform marker genes
        columns = list(map(str, available_annotations.domain.metas))
        # the framework recognizes Gene instead of Entrez ID
        columns[columns.index("Entrez ID")] = "Gene"
        df_available_annotations = pd.DataFrame(available_annotations.metas, columns=columns)
        df_available_annotations = df_available_annotations[df_available_annotations["Gene"] != "?"]

        # call the method
        scores, fdrs = AnnotateSamples.assign_annotations(
            df_z_values,
            df_available_annotations,
            df_data,
            n,
            z_threshold=z_threshold,
            p_value_fun=p_value_fun,
            scoring=scoring,
        )

        # create orange tables
        domain = Domain([ContinuousVariable(ct) for ct in scores.columns.values])
        scores_table = Table(domain, scores.values)
        fdrs_table = Table(domain, fdrs.values)

        return scores_table, fdrs_table
コード例 #18
0
 def test_warnings(self):
     domain = Domain([ContinuousVariable("x")])
     self.assertWarns(OrangeDeprecationWarning, Table, domain)
     self.assertWarns(OrangeDeprecationWarning, Table, domain, Table())
     self.assertWarns(OrangeDeprecationWarning, Table, domain, [[12]])
     self.assertWarns(OrangeDeprecationWarning, Table, np.zeros((5, 5)))
コード例 #19
0
ファイル: owpredictions.py プロジェクト: haojia632/orange3
 def _regression_output_columns(self):
     slots = self._valid_predictors()
     newmetas = [ContinuousVariable(name=p.name) for p in slots]
     newcolumns = [p.results[0].reshape((-1, 1)) for p in slots]
     return newmetas, newcolumns
コード例 #20
0
 def test_continous(self):
     X = ContinuousVariable("X")
     self._test_common(X)
コード例 #21
0
    def send_data(self):
        if self.optimize_k:
            row = self.selected_row()
            k = self.k_from + row if row is not None else None
        else:
            k = self.k

        km = self.clusterings.get(k)
        if self.data is None or km is None or isinstance(km, str):
            self.Outputs.annotated_data.send(None)
            self.Outputs.centroids.send(None)
            return

        domain = self.data.domain
        cluster_var = DiscreteVariable(
            get_unique_names(domain, "Cluster"),
            values=["C%d" % (x + 1) for x in range(km.k)])
        clust_ids = km.labels
        silhouette_var = ContinuousVariable(
            get_unique_names(domain, "Silhouette"))
        if km.silhouette_samples is not None:
            self.Warning.no_silhouettes.clear()
            scores = np.arctan(km.silhouette_samples) / np.pi + 0.5
            clust_scores = []
            for i in range(km.k):
                in_clust = clust_ids == i
                if in_clust.any():
                    clust_scores.append(np.mean(scores[in_clust]))
                else:
                    clust_scores.append(0.)
            clust_scores = np.atleast_2d(clust_scores).T
        else:
            self.Warning.no_silhouettes()
            scores = np.nan
            clust_scores = np.full((km.k, 1), np.nan)

        new_domain = add_columns(domain, metas=[cluster_var, silhouette_var])
        new_table = self.data.transform(new_domain)
        with new_table.unlocked(new_table.metas):
            new_table.get_column_view(cluster_var)[0][:] = clust_ids
            new_table.get_column_view(silhouette_var)[0][:] = scores

        domain_attributes = set(domain.attributes)
        centroid_attributes = [
            attr.compute_value.variable
            if isinstance(attr.compute_value, ReplaceUnknowns)
            and attr.compute_value.variable in domain_attributes else attr
            for attr in km.domain.attributes
        ]
        centroid_domain = add_columns(Domain(centroid_attributes, [],
                                             domain.metas),
                                      metas=[cluster_var, silhouette_var])
        # Table is constructed from a copy of centroids: if data is stored in
        # the widget, it can be modified, so the widget should preferrably
        # output a copy. The number of centroids is small, hence copying it is
        # cheap.
        centroids = Table(
            centroid_domain, km.centroids.copy(), None,
            np.hstack((np.full((km.k, len(domain.metas)), np.nan),
                       np.arange(km.k).reshape(km.k, 1), clust_scores)))
        if self.data.name == Table.name:
            centroids.name = "centroids"
        else:
            centroids.name = f"{self.data.name} centroids"

        self.Outputs.annotated_data.send(new_table)
        self.Outputs.centroids.send(centroids)
コード例 #22
0
    def from_numpy(cls, X, Y=None, metas=None):
        """
        Create a domain corresponding to the given numpy arrays. This method
        is usually invoked from :meth:`Orange.data.Table.from_numpy`.

        All attributes are assumed to be continuous and are named
        "Feature <n>". Target variables are discrete if the only two values
        are 0 and 1; otherwise they are continuous. Discrete
        targets are named "Class <n>" and continuous are named "Target <n>".
        Domain is marked as :attr:`anonymous`, so data from any other domain of
        the same shape can be converted into this one and vice-versa.

        :param `numpy.ndarray` X: 2-dimensional array with data
        :param Y: 1- of 2- dimensional data for target
        :type Y: `numpy.ndarray` or None
        :param `numpy.ndarray` metas: meta attributes
        :type metas: `numpy.ndarray` or None
        :return: a new domain
        :rtype: :class:`Domain`
        """
        def get_places(max_index):
            return 0 if max_index == 1 else int(log(max_index, 10)) + 1

        def get_name(base, index, places):
            return base if not places \
                else "{} {:0{}}".format(base, index + 1, places)

        if X.ndim != 2:
            raise ValueError('X must be a 2-dimensional array')
        n_attrs = X.shape[1]
        places = get_places(n_attrs)
        attr_vars = [
            ContinuousVariable(name=get_name("Feature", a, places))
            for a in range(n_attrs)
        ]
        class_vars = []
        if Y is not None:
            if Y.ndim == 1:
                Y = Y.reshape(len(Y), 1)
            elif Y.ndim != 2:
                raise ValueError('Y has invalid shape')
            n_classes = Y.shape[1]
            places = get_places(n_classes)
            for i, values in enumerate(Y.T):
                if set(values) == {0, 1}:
                    name = get_name('Class', i, places)
                    values = ['v1', 'v2']
                    class_vars.append(DiscreteVariable(name, values))
                else:
                    name = get_name('Target', i + 1, places)
                    class_vars.append(ContinuousVariable(name))
        if metas is not None:
            n_metas = metas.shape[1]
            places = get_places(n_metas)
            meta_vars = [
                StringVariable(get_name("Meta", m, places))
                for m in range(n_metas)
            ]
        else:
            meta_vars = []

        domain = cls(attr_vars, class_vars, meta_vars)
        domain.anonymous = True
        return domain
コード例 #23
0
ファイル: pandas_compat.py プロジェクト: matejklemen/orange3
def vars_from_df(df, role=None, force_nominal=False):
    if role is None and hasattr(df, 'orange_role'):
        _role = df.orange_role
    else:
        _role = role

    # If df index is not a simple RangeIndex (or similar), put it into data
    if not any(str(i).startswith('_o') for i in df.index) \
            and not (df.index.is_integer() and (df.index.is_monotonic_increasing
                                                or df.index.is_monotonic_decreasing)):
        df = df.reset_index()

    Xcols, Ycols, Mcols = [], [], []
    Xexpr, Yexpr, Mexpr = [], [], []
    attrs, class_vars, metas = [], [], []

    contains_strings = _role == Role.Meta
    for column in df.columns:
        s = df[column]
        if hasattr(df, 'orange_variables') and column in df.orange_variables:
            original_var = df.orange_variables[column]
            var = original_var.copy(compute_value=None)
            if _role == Role.Attribute:
                Xcols.append(column)
                Xexpr.append(None)
                attrs.append(var)
            elif _role == Role.ClassAttribute:
                Ycols.append(column)
                Yexpr.append(None)
                class_vars.append(var)
            else:  # if role == Role.Meta:
                Mcols.append(column)
                Mexpr.append(None)
                metas.append(var)
        elif _is_discrete(s, force_nominal):
            discrete = s.astype('category').cat
            var = DiscreteVariable(str(column),
                                   discrete.categories.astype(str).tolist())
            attrs.append(var)
            Xcols.append(column)
            Xexpr.append(lambda s, _: np.asarray(
                s.astype('category').cat.codes.replace(-1, np.nan)
            ))
        elif _is_datetime(s):
            var = TimeVariable(str(column))
            s = pd.to_datetime(s, infer_datetime_format=True)
            attrs.append(var)
            Xcols.append(column)
            Xexpr.append(lambda s, v: np.asarray(
                s.astype('str').replace('NaT', np.nan).map(v.parse)
            ))
        elif is_numeric_dtype(s):
            var = ContinuousVariable(
                # set number of decimals to 0 if int else keeps default behaviour
                str(column), number_of_decimals=(0 if is_integer_dtype(s) else None)
            )
            attrs.append(var)
            Xcols.append(column)
            Xexpr.append(None)
        else:
            contains_strings = True
            var = StringVariable(str(column))
            metas.append(var)
            Mcols.append(column)
            Mexpr.append(lambda s, _: np.asarray(s, dtype=object))

    # if role isn't explicitly set, try to
    # export dataframes into one contiguous block.
    # for this all columns must be of the same role
    if isinstance(df, OrangeDataFrame) \
            and not role \
            and contains_strings \
            and not force_nominal:
        attrs.extend(class_vars)
        attrs.extend(metas)
        metas = attrs
        Xcols.extend(Ycols)
        Xcols.extend(Mcols)
        Mcols = Xcols
        Xexpr.extend(Yexpr)
        Xexpr.extend(Mexpr)
        Mexpr = Xexpr

        attrs, class_vars = [], []
        Xcols, Ycols = [], []
        Xexpr, Yexpr = [], []

    XYM = []
    for Avars, Acols, Aexpr in zip(
            (attrs, class_vars, metas),
            (Xcols, Ycols, Mcols),
            (Xexpr, Yexpr, Mexpr)):
        if not Acols:
            A = None if Acols != Xcols else np.empty((df.shape[0], 0))
            XYM.append(A)
            continue
        if not any(Aexpr):
            Adf = df if all(c in Acols
                            for c in df.columns) else df[Acols]
            if all(isinstance(a, SparseDtype) for a in Adf.dtypes):
                A = csr_matrix(Adf.sparse.to_coo())
            else:
                A = np.asarray(Adf)
            XYM.append(A)
            continue
        # we'll have to copy the table to resolve any expressions
        # TODO eliminate expr (preprocessing for pandas -> table)
        A = np.array([expr(df[col], var) if expr else np.asarray(df[col])
                      for var, col, expr in zip(Avars, Acols, Aexpr)]).T
        XYM.append(A)

    return XYM, Domain(attrs, class_vars, metas)
コード例 #24
0
 def setUp(self):
     z = ContinuousVariable("z")
     w = ContinuousVariable("w")
     u = ContinuousVariable("u")
     self.descs = [owcolor.ContAttrDesc(v) for v in (z, w, u)]
     self.model = owcolor.ContColorTableModel()
コード例 #25
0
def table_from_frame(df, *, force_nominal=False):
    """
    Convert pandas.DataFrame to Orange.data.Table

    Parameters
    ----------
    df : pandas.DataFrame
    force_nominal : boolean
        If True, interpret ALL string columns as nominal (DiscreteVariable).

    Returns
    -------
    Table
    """
    def _is_discrete(s):
        return (is_categorical_dtype(s) or is_object_dtype(s) and
                (force_nominal or s.nunique() < s.size**.666))

    def _is_datetime(s):
        if is_datetime64_any_dtype(s):
            return True
        try:
            if is_object_dtype(s):
                pd.to_datetime(s, infer_datetime_format=True)
                return True
        except Exception:  # pylint: disable=broad-except
            pass
        return False

    # If df index is not a simple RangeIndex (or similar), put it into data
    if not (df.index.is_integer() and (df.index.is_monotonic_increasing
                                       or df.index.is_monotonic_decreasing)):
        df = df.reset_index()

    attrs, metas = [], []
    X, M = [], []

    # Iter over columns
    for name, s in df.items():
        name = str(name)
        if _is_discrete(s):
            discrete = s.astype('category').cat
            attrs.append(
                DiscreteVariable(name,
                                 discrete.categories.astype(str).tolist()))
            X.append(discrete.codes.replace(-1, np.nan).values)
        elif _is_datetime(s):
            tvar = TimeVariable(name)
            attrs.append(tvar)
            s = pd.to_datetime(s, infer_datetime_format=True)
            X.append(
                s.astype('str').replace('NaT', np.nan).map(tvar.parse).values)
        elif is_numeric_dtype(s):
            attrs.append(ContinuousVariable(name))
            X.append(s.values)
        else:
            metas.append(StringVariable(name))
            M.append(s.values.astype(object))

    return Table.from_numpy(
        Domain(attrs, None, metas),
        np.column_stack(X) if X else np.empty((df.shape[0], 0)), None,
        np.column_stack(M) if M else None)
コード例 #26
0
    def test_invalid_input_colors(self):
        a = ContinuousVariable("a")
        a.attributes["colors"] = "invalid"
        t = Table.from_domain(Domain([a]))

        self.send_signal(self.widget.Inputs.data, t)
コード例 #27
0
 def test_var_key(self):
     self.assertEqual(variable_key(ContinuousVariable("foo")),
                      ("foo", False))
     self.assertEqual(variable_key(TimeVariable("bar")), ("bar", True))
コード例 #28
0
 def setUp(self):
     x = ContinuousVariable("x")
     self.desc = owcolor.ContAttrDesc(x)
コード例 #29
0
 def setUp(self):
     domain = Domain([ContinuousVariable('a'), ContinuousVariable('b')])
     self.data = Table.from_numpy(domain, np.zeros((3, 2)))
     self.data[1:, 1] = 7
コード例 #30
0
from functools import wraps
from itertools import chain
from typing import Callable

import numpy as np

from Orange.data import Table, Domain, StringVariable, ContinuousVariable, \
    DiscreteVariable, TimeVariable
from Orange.widgets.tests.base import WidgetTest
from Orange.widgets.tests.utils import simulate
from orangecontrib.prototypes.widgets.owfeaturestatistics import \
    OWFeatureStatistics

# Continuous variable variations
continuous_full = [
    ContinuousVariable('continuous_full'),
    np.array([0, 1, 2, 3, 4], dtype=float),
]
continuous_missing = [
    ContinuousVariable('continuous_missing'),
    np.array([0, 1, 2, np.nan, 4], dtype=float),
]
continuous_all_missing = [
    ContinuousVariable('continuous_all_missing'),
    np.array([np.nan] * 5, dtype=float),
]
continuous_same = [
    ContinuousVariable('continuous_same'),
    np.array([3] * 5, dtype=float),
]
continuous = [