def test_domain_conversion_sparsity(self):
        destination = Domain(
            attributes=[
                ContinuousVariable(name="a"),
                ContinuousVariable(name="b"),
                ContinuousVariable(name="c"),
            ],
            class_vars=[DiscreteVariable("d", values=["e"])],
            metas=[StringVariable("f")],
        )

        # all dense
        source = Domain(attributes=[])
        conversion = DomainConversion(source, destination)
        self.assertFalse(conversion.sparse_X)
        self.assertFalse(conversion.sparse_Y)
        self.assertFalse(conversion.sparse_metas)

        # set destination attributes as sparse
        for a in destination.attributes:
            a.sparse = True
        source = Domain(attributes=[])
        conversion = DomainConversion(source, destination)
        self.assertTrue(conversion.sparse_X)
        self.assertFalse(conversion.sparse_Y)
        self.assertFalse(conversion.sparse_metas)

        # set all destination variable as sparse
        for a in chain(destination.variables, destination.metas):
            a.sparse = True
        source = Domain(attributes=[])
        conversion = DomainConversion(source, destination)
        self.assertTrue(conversion.sparse_X)
        self.assertTrue(conversion.sparse_Y)
        self.assertFalse(conversion.sparse_metas)
    def test_domain_conversion_is_fast_enough(self):
        attrs = [ContinuousVariable("f%i" % i) for i in range(10000)]
        class_vars = [ContinuousVariable("c%i" % i) for i in range(10)]
        metas = [ContinuousVariable("m%i" % i) for i in range(10)]
        source = Domain(attrs, class_vars, metas)

        start = time()
        cases = (
            (
                (attrs[:1000], class_vars, metas),
                list(range(1000)),
                list(range(10000, 10010)),
                list(range(-1, -11, -1)),
            ),
            (
                (metas, attrs[:1000], class_vars),
                list(range(-1, -11, -1)),
                list(range(1000)),
                list(range(10000, 10010)),
            ),
            (
                (class_vars, metas, attrs[:1000]),
                list(range(10000, 10010)),
                list(range(-1, -11, -1)),
                list(range(1000)),
            ),
        )

        for domain_args, attributes, class_vars, metas in cases:
            c1 = DomainConversion(source, Domain(*domain_args))
            self.assertEqual(c1.attributes, attributes)
            self.assertEqual(c1.class_vars, class_vars)
            self.assertEqual(c1.metas, metas)

        self.assertLessEqual(time() - start, 1)
Ejemplo n.º 3
0
    def test_domain_conversion_is_fast_enough(self):
        attrs = [ContinuousVariable("f%i" % i) for i in range(10000)]
        class_vars = [ContinuousVariable("c%i" % i) for i in range(10)]
        metas = [ContinuousVariable("m%i" % i) for i in range(10)]
        source = Domain(attrs, class_vars, metas)

        start = time()
        c1 = DomainConversion(source, Domain(attrs[:1000], class_vars, metas))
        self.assertEqual(c1.attributes, list(range(1000)))
        self.assertEqual(c1.class_vars, list(range(10000, 10010)))
        self.assertEqual(c1.metas, list(range(-1, -11, -1)))

        c2 = DomainConversion(source, Domain(metas, attrs[:1000], class_vars))
        self.assertEqual(c2.attributes, list(range(-1, -11, -1)))
        self.assertEqual(c2.class_vars, list(range(1000)))
        self.assertEqual(c2.metas, list(range(10000, 10010)))

        c3 = DomainConversion(source, Domain(class_vars, metas, attrs[:1000]))
        self.assertEqual(c3.attributes, list(range(10000, 10010)))
        self.assertEqual(c3.class_vars, list(range(-1, -11, -1)))
        self.assertEqual(c3.metas, list(range(1000)))
        self.assertLessEqual(time() - start, 1)
Ejemplo n.º 4
0
    def test_get_conversion(self):
        compute_value = lambda: 42
        new_income = income.copy(compute_value=compute_value, name='new_income')

        d = Domain((age, gender, income), metas=(ssn, race))
        e = Domain((gender, race), None, metas=(age, income, ssn))
        f = Domain((gender,), (race, income), metas=(age, incomeA, ssn))
        g = Domain((), metas=(age, gender, ssn))
        h = Domain((gender,), (race, income), metas=(age, new_income, ssn))

        for conver, domain, attr, class_vars, metas in (
                (d, e, [1, -2], [], [0, 2, -1]),
                (d, f, [1], [-2, 2], [0, None, -1]),
                (f, g, [], [], [-1, 0, -3]),
                (g, h, [-2], [None, None], [-1, compute_value, -3])):
            to_domain = DomainConversion(conver, domain)
            self.assertIs(to_domain.source, conver)
            self.assertEqual(to_domain.attributes, attr)
            self.assertEqual(to_domain.class_vars, class_vars)
            self.assertEqual(to_domain.metas, metas)
Ejemplo n.º 5
0
 def bench_selection(self):
     DomainConversion(self.domain, self.single)
Ejemplo n.º 6
0
 def bench_full(self):
     DomainConversion(self.domain, self.domain_x)