Ejemplo n.º 1
0
    def test_vector_sketch(self):
        vector_data = [[], [1, 2], [3], [4, 5, 6, 7], [8, 9, 10], None]
        sa = SArray(data=vector_data)

        sketch = sa.sketch_summary()
        self.__validate_sketch_result(sketch, sa)
        self.__validate_sketch_result(sketch.element_length_summary(),
                                      sa.dropna().item_length())

        flattened = list(itertools.chain.from_iterable(list(sa.dropna())))
        self.__validate_sketch_result(sketch.element_summary(),
                                      SArray(flattened))

        fi = sketch.frequent_items()
        self.assertEqual(len(fi), 5)
        self.assertEqual((fi['[1 2]']), 1)
        self.assertEqual((fi['[4 5 6 7]']), 1)

        # sub sketch with one key
        s = sa.sketch_summary(sub_sketch_keys=1).element_sub_sketch(1)
        expected = sa.vector_slice(1)
        self.__validate_sketch_result(s, expected)

        # sub sketch with multiple keys
        keys = [1, 3]
        s = sa.sketch_summary(sub_sketch_keys=keys).element_sub_sketch(keys)
        self.assertEqual(len(s), len(keys))
        for key in keys:
            self.assertTrue(s.has_key(key))
            expected = sa.vector_slice(key)
            self.__validate_sketch_result(s[key], expected)

        indexes = range(0, 10)
        s = sa.sketch_summary(sub_sketch_keys=indexes).element_sub_sketch()
        self.assertEqual(len(s), len(indexes))
Ejemplo n.º 2
0
    def test_vector_sketch(self):
        vector_data = [[], [1,2], [3], [4,5,6,7], [8,9,10], None]
        sa = SArray(data=vector_data)

        sketch = sa.sketch_summary();
        self.__validate_sketch_result(sketch, sa)
        self.__validate_sketch_result(sketch.element_length_summary(), sa.dropna().item_length())

        flattened = list(itertools.chain.from_iterable(list(sa.dropna())))
        self.__validate_sketch_result(sketch.element_summary(), SArray(flattened))

        fi = sketch.frequent_items()
        self.assertEqual(len(fi), 5)
        self.assertEqual((fi['[1 2]']), 1)
        self.assertEqual((fi['[4 5 6 7]']), 1)

        # sub sketch with one key
        s = sa.sketch_summary(sub_sketch_keys = 1).element_sub_sketch(1)
        expected = sa.vector_slice(1)
        self.__validate_sketch_result(s, expected)

        # sub sketch with multiple keys
        keys = [1,3]
        s = sa.sketch_summary(sub_sketch_keys = keys).element_sub_sketch(keys)
        self.assertEqual(len(s), len(keys))
        for key in keys:
            self.assertTrue(s.has_key(key))
            expected = sa.vector_slice(key)
            self.__validate_sketch_result(s[key], expected)

        indexes = range(0,10)
        s = sa.sketch_summary(sub_sketch_keys = indexes).element_sub_sketch()
        self.assertEqual(len(s), len(indexes))
Ejemplo n.º 3
0
    def test_list_sketch(self):
        list_data = [[], [1,2],[1,2], ['a', 'a', 'a', 'b'], [ 1 ,1 , 2], None]
        sa = SArray(list_data)
        self.__validate_nested_sketch_result(sa)
        sketch = sa.sketch_summary();

        self.assertEqual(sketch.num_unique(), 4)
        element_summary = sketch.element_summary()
        another_rep = list(itertools.chain.from_iterable(list(sa.dropna())))
        self.__validate_sketch_result(element_summary, SArray(another_rep, str))

        fi = sketch.frequent_items()
        self.assertEqual(len(fi), 4)
        self.assertEqual((fi['[1,2]']), 2)
        self.assertEqual((fi['["a","a","a","b"]']), 1)
Ejemplo n.º 4
0
    def test_list_sketch(self):
        list_data = [[], [1, 2], [1, 2], ['a', 'a', 'a', 'b'], [1, 1, 2], None]
        sa = SArray(list_data)
        self.__validate_nested_sketch_result(sa)
        sketch = sa.sketch_summary()

        self.assertEqual(sketch.num_unique(), 4)
        element_summary = sketch.element_summary()
        another_rep = list(itertools.chain.from_iterable(list(sa.dropna())))
        self.__validate_sketch_result(element_summary,
                                      SArray(another_rep, str))

        fi = sketch.frequent_items()
        self.assertEqual(len(fi), 4)
        self.assertEqual((fi['[1,2]']), 2)
        self.assertEqual((fi['["a","a","a","b"]']), 1)