예제 #1
0
    def test_batch_apply_items_a(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('x', 'q'),
                name='f3')

        b1 = Batch.from_frames((f1, f2, f3)).apply_items(
                lambda k, x: (k, x['b'].mean()))

        self.assertEqual(b1.to_frame().to_pairs(0),
                ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),)
)
        b2 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply_items(
                lambda k, x: (k, x['b'].mean()))

        self.assertEqual(b2.to_frame().to_pairs(0),
                ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),)
                )
예제 #2
0
    def test_batch_to_zip_pickle_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(a=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        b1 = Batch.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp, config=config)
            b2 = Batch.from_zip_pickle(fp, config=config)
            frames = dict(b2.items())

        for frame in (f1, f2, f3):
            # parquet brings in characters as objects, thus forcing different dtypes
            self.assertEqualFrames(frame, frames[frame.name], compare_dtype=False)
예제 #3
0
    def test_batch_f(self) -> None:

        f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']})

        f2 = Batch(f1.iter_group_items('group')).loc[:, 'b'].sum().to_frame()
        self.assertEqual(f2.to_pairs(0),
                ((None, (('x', 2), ('z', 10))),))
예제 #4
0
    def test_batch_apply_b(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('x', 'q'),
                name='f3')

        b1 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply(lambda x: x.shape)
        self.assertEqual(b1.to_frame().to_pairs(0),
                ((None, (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2)))),)
                )

        f2 = Frame(np.arange(4).reshape(2, 2), name='f2')
        post = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[1, 1]).to_frame(fill_value=0.0)

        self.assertEqual(
                post.to_pairs(0),
                ((None, (('f1', 4), ('f2', 3))),)
                )
예제 #5
0
    def test_batch_apply_items_b(self) -> None:

        f1 = ff.parse('s(20,4)|v(bool,bool,int,float)|c(I,str)|i(I,str)')

        b1 = Batch(f1.iter_group_items(['zZbu', 'ztsv'])).apply_items(lambda k, f: f.iloc[:1] if k != (True, True) else f.iloc[:3]).to_frame()

        self.assertEqual(b1.to_pairs(0),
            (('zZbu', ((((False, False), 'zZbu'), False), (((False, True), 'zr4u'), False), (((True, False), 'zkuW'), True), (((True, True), 'zIA5'), True), (((True, True), 'zGDJ'), True), (((True, True), 'zo2Q'), True))), ('ztsv', ((((False, False), 'zZbu'), False), (((False, True), 'zr4u'), True), (((True, False), 'zkuW'), False), (((True, True), 'zIA5'), True), (((True, True), 'zGDJ'), True), (((True, True), 'zo2Q'), True))), ('zUvW', ((((False, False), 'zZbu'), -3648), (((False, True), 'zr4u'), 197228), (((True, False), 'zkuW'), 54020), (((True, True), 'zIA5'), 194224), (((True, True), 'zGDJ'), 172133), (((True, True), 'zo2Q'), -88017))), ('zkuW', ((((False, False), 'zZbu'), 1080.4), (((False, True), 'zr4u'), 3884.48), (((True, False), 'zkuW'), 3338.48), (((True, True), 'zIA5'), -1760.34), (((True, True), 'zGDJ'), 1857.34), (((True, True), 'zo2Q'), 268.96))))
            )
예제 #6
0
    def test_batch_g(self) -> None:
        f1 = Frame(np.arange(6).reshape(2,3), index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f1')
        f2 = Frame(np.arange(6).reshape(2,3) * 30.5, index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f2')

        # this results in two rows. one column labelled None
        f3 = Batch.from_frames((f1, f2)).sum().sum().to_frame()
        self.assertEqual(f3.to_pairs(0),
                ((None, (('f1', 15.0), ('f2', 457.5))),))

        f4 = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[0, 0]).to_frame()
        self.assertEqual(f4.to_pairs(0),
                ((None, (('f1', 0.0), ('f2', 0.0))),))
예제 #7
0
    def test_batch_to_npz(self) -> None:
        # assure processing of same named Frame
        f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)').rename('a')
        f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b')

        b1 = Batch.from_frames((f1, f2))
        with temp_file('.zip') as fp:
            b1.to_zip_npz(fp)
            b2 = Batch.from_zip_npz(fp)
            frames = dict(b2.items())

            self.assertTrue(frames['a'].equals(f1, compare_name=True, compare_dtype=True, compare_class=True))
예제 #8
0
    def test_batch_e(self) -> None:

        f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']})

        gi = f1.iter_group_items('group')
        f2 = Batch(gi)[['a', 'b']].sum().to_frame()
        self.assertEqual(f2.to_pairs(0),
                (('a', (('x', 1), ('z', 5))), ('b', (('x', 2), ('z', 10))))
                )

        gi = f1.iter_group_items('group')
        f3 = Frame.from_concat((-Batch(gi)[['a', 'b']]).values)
        self.assertEqual(f3.to_pairs(0),
                (('a', ((0, -1), (1, -2), (2, -3))), ('b', ((0, -2), (1, -4), (2, -6)))))
예제 #9
0
    def test_batch_display_a(self) -> None:

        dc = DisplayConfig.from_default(type_color=False)
        f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']})

        gi = f1.iter_group_items('group')
        d1 = Batch(gi)[['a', 'b']].display(dc)

        self.assertEqual(d1.to_rows(),
            ['<Batch>', '<Index>',
            'x       <Frame>',
            'z       <Frame>',
            '<<U1>   <object>'
            ])
예제 #10
0
 def b2() -> None:
     batch1 = Batch.from_frames((f1, f2, f3, f4, f5, f6, f7, f8),
                                max_workers=8,
                                use_threads=False,
                                chunksize=2)
     batch2 = batch1.apply(func_b)
     _ = tuple(batch2.items())
예제 #11
0
    def test_batch_drop_c(self) -> None:

        f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)')
        f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b')

        post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu']
        self.assertEqual(
            [list(v.columns) for _, v in post.items()], #type: ignore
            [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']]
            )

        post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu']
        self.assertEqual(
            [list(v.index) for _, v in post.items()],
            [[34715], [34715]]
            )
예제 #12
0
 def get_instance(cls, target: tp.Type[ContainerBase]) -> ContainerBase:
     '''
     Get a sample instance from any ContainerBase; cache to only create one per life of process.
     '''
     if target not in cls._CLS_TO_INSTANCE_CACHE:
         if target is TypeBlocks:
             instance = target.from_blocks(np.array((0,))) #type: ignore
         elif target is Bus:
             f = Frame.from_elements((0,), name='frame')
             instance = target.from_frames((f,)) #type: ignore
         elif target is Batch:
             instance = Batch(iter(()))
         elif target in (DisplayConfig, StoreFilter, StoreConfig):
             instance = target()
         elif issubclass(target, IndexHierarchy):
             instance = target.from_labels(((0,0),))
         elif issubclass(target, (IndexYearMonth, IndexYear, IndexDate)):
             instance = target(np.array((0,), dtype=DT64_S))
         elif target in (ContainerOperand, ContainerBase, IndexBase):
             instance = target()
         elif issubclass(target, Frame):
             instance = target.from_elements((0,))
         else:
             instance = target((0,)) #type: ignore
         cls._CLS_TO_INSTANCE_CACHE[target] = instance
     return cls._CLS_TO_INSTANCE_CACHE[target]
예제 #13
0
    def test_batch_iloc_cov_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(1,2,3), a=(4,5,6)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,10,100), a=(1,2,3)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).cov().to_frame()
        self.assertEqual(f3.to_pairs(),
                (('b', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 2997.0), (('f2', 'a'), 49.5))), ('a', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 49.5), (('f2', 'a'), 1.0)))))

        f4 = Batch.from_frames((f1, f2)).cov(axis=0).to_frame()
        self.assertEqual( f4.to_pairs(),
                (('x', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 388.0), (('f2', 'x'), 4704.5))), ('y', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 0.0), (('f2', 'x'), 0.0))), ('z', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 32.0), (('f2', 'x'), 388.0)))))
예제 #14
0
    def test_batch_count_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(20,20,0), a=(20,20,np.nan)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,np.nan,1), a=(1,50,1)),
                index=('y', 'z', 'x'),
                name='f2')

        self.assertEqual(
                Batch.from_frames((f1, f2)).count(axis=0).to_frame().to_pairs(0),
            (('b', (('f1', 3), ('f2', 2))), ('a', (('f1', 2), ('f2', 3)))))

        self.assertEqual(
            Batch.from_frames((f1, f2)).count(axis=1).to_frame().to_pairs(0),
            (('x', (('f1', 1), ('f2', 2))), ('y', (('f1', 2), ('f2', 2))), ('z', (('f1', 2), ('f2', 1))))
            )
예제 #15
0
    def test_batch_b(self) -> None:

        f1 = Frame.from_dict(
                {'a':[1,49,2,3], 'b':[2,4,381, 6], 'group': ['x', 'x','z','z']},
                index=('r', 's', 't', 'u'))

        b1 = Batch(f1.iter_group_items('group'))
        self.assertEqual(b1['b'].sum().to_frame().to_pairs(0),
                ((None, (('x', 6), ('z', 387))),)
                )
예제 #16
0
    def test_batch_i(self) -> None:
        # assure processing of same named Frame
        f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)')
        f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)')

        post = Batch.from_frames((f1, f2)).drop['zZbu']
        self.assertEqual(
            [list(v.columns) for _, v in post.items()],
            [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']]
            )
예제 #17
0
    def test_batch_shapes_a(self) -> None:

        dc = DisplayConfig.from_default(type_color=False)
        f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']})

        b1 = Batch(f1.iter_group_items('group'))[['a', 'b']]

        self.assertEqual(b1.shapes.to_pairs(),
                (('x', (1, 2)), ('z', (2, 2)))
                )
예제 #18
0
    def test_batch_iloc_max_a(self) -> None:
        f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)),
                             index=('z', 'y', 'x'),
                             name='f1')
        f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)),
                             index=('y', 'z', 'x'),
                             name='f2')

        f3 = Batch.from_frames((f1, f2)).iloc_max().to_frame()
        self.assertEqual(f3.to_pairs(0), (('b', (('f1', 0), ('f2', 1))),
                                          ('a', (('f1', 2), ('f2', 1)))))
예제 #19
0
    def test_batch_iter_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2), name='foo').cumsum()
        self.assertEqual(list(b1), ['f1', 'f2'])
예제 #20
0
    def test_batch_slotted_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')

        b1 = Batch.from_frames((f1, ))

        with self.assertRaises(AttributeError):
            b1.g = 30  # type: ignore #pylint: disable=E0237
        with self.assertRaises(AttributeError):
            b1.__dict__  #pylint: disable=W0104
예제 #21
0
    def test_batch_bloc_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(10, 20, 0), b=(30, 40, 50)),
                             index=('x', 'y', 'z'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2))
        b2 = b1.bloc[f2 >= 2]
        post = list(s.values.tolist() for s in b2.values)
        self.assertEqual(post, [[30, 40, 50], [4, 2, 5, 3, 6]])
예제 #22
0
    def test_batch_iloc_b(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2), max_workers=8, use_threads=True)
        b2 = b1.iloc[1, 1]
        post = list(s.values.tolist() for s in b2.values)
        self.assertEqual(post, [[4], [5]])
예제 #23
0
    def test_batch_drop_duplicated_a(self) -> None:
        f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)),
                             index=('z', 'y', 'x'),
                             name='f1')
        f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)),
                             index=('y', 'z', 'x'),
                             name='f2')

        f3 = Batch.from_frames((f1, f2)).drop_duplicated().to_frame()

        self.assertEqual(f3.to_pairs(0),
                         (('b', ((('f1', 'x'), 0), (('f2', 'z'), 20))),
                          ('a', ((('f1', 'x'), 50), (('f2', 'z'), 50)))))
예제 #24
0
    def test_batch_to_zip_parquet_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        config = StoreConfig(index_depth=1,
                             columns_depth=1,
                             include_columns=True,
                             include_index=True)

        b1 = Batch.from_frames((f1, f2), config=config)

        with temp_file('.xlsx') as fp:
            b1.to_xlsx(fp)
            b2 = (Batch.from_xlsx(fp, config=config) * 20).sum()

            self.assertEqual(b2.to_frame().to_pairs(0),
                             (('a', (('f1', 60), ('f2', 120))),
                              ('b', (('f1', 140), ('f2', 300)))))
예제 #25
0
    def test_batch_sort_values_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(50,20,0), a=(30,40,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(3,2,1), a=(4,5,6)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).sort_values('b').to_frame()
        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'x'), 0), (('f1', 'y'), 20), (('f1', 'z'), 50), (('f2', 'x'), 1), (('f2', 'z'), 2), (('f2', 'y'), 3))), ('a', ((('f1', 'x'), 50), (('f1', 'y'), 40), (('f1', 'z'), 30), (('f2', 'x'), 6), (('f2', 'z'), 5), (('f2', 'y'), 4)))))
예제 #26
0
    def test_batch_a(self) -> None:

        f1 = Frame.from_dict(
                {'a':[1,49,2,3], 'b':[2,4,381, 6], 'group': ['x', 'x','z','z']},
                index=('r', 's', 't', 'u'))

        b1 = Batch(f1.iter_group_items('group'))

        b2 = b1 * 3

        post = tuple(b2.items())
        self.assertEqual(post[0][1].to_pairs(0),
                (('a', (('r', 3), ('s', 147))), ('b', (('r', 6), ('s', 12))), ('group', (('r', 'xxx'), ('s', 'xxx')))),
                )
예제 #27
0
    def test_batch_to_frame_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(10,20,0), b=(30,40,50)),
                index=('x', 'y', 'z'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')

        b1 = Batch.from_frames((f1, f2))
        f3 = b1.loc['y':].to_frame(fill_value=0) #type: ignore
        self.assertEqual(f3.to_pairs(0),
                (('a', ((('f1', 'y'), 20), (('f1', 'z'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'y'), 40), (('f1', 'z'), 50), (('f2', 'y'), 5), (('f2', 'z'), 6))), ('c', ((('f1', 'y'), 0), (('f1', 'z'), 0), (('f2', 'y'), 2), (('f2', 'z'), 3)))))
예제 #28
0
    def test_batch_round_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(20, 20.234, 0), a=(20.234, 20.234, 50.828)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1, 20.234, 1.043), a=(1.043, 50.828, 1.043)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = round(Batch.from_frames((f1, f2)), 1).to_frame() #type: ignore
        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'z'), 20.0), (('f1', 'y'), 20.2), (('f1', 'x'), 0.0), (('f2', 'y'), 1.0), (('f2', 'z'), 20.2), (('f2', 'x'), 1.0))), ('a', ((('f1', 'z'), 20.2), (('f1', 'y'), 20.2), (('f1', 'x'), 50.8), (('f2', 'y'), 1.0), (('f2', 'z'), 50.8), (('f2', 'x'), 1.0))))
                )
예제 #29
0
    def test_batch_transpose_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(10,20,0), a=(30,40,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,20,3), a=(4,50,6)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).transpose().to_frame()
        self.assertEqual(f3.to_pairs(0),
                (('x', ((('f1', 'b'), 0), (('f1', 'a'), 50), (('f2', 'b'), 3), (('f2', 'a'), 6))), ('y', ((('f1', 'b'), 20), (('f1', 'a'), 40), (('f2', 'b'), 1), (('f2', 'a'), 4))), ('z', ((('f1', 'b'), 10), (('f1', 'a'), 30), (('f2', 'b'), 20), (('f2', 'a'), 50))))
        )
예제 #30
0
    def test_batch_clip_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(10,20,0), a=(30,40,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,20,3), a=(4,50,6)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).clip(upper=22, lower=20).to_frame()
        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'z'), 20), (('f1', 'y'), 20), (('f1', 'x'), 20), (('f2', 'y'), 20), (('f2', 'z'), 20), (('f2', 'x'), 20))), ('a', ((('f1', 'z'), 22), (('f1', 'y'), 22), (('f1', 'x'), 22), (('f2', 'y'), 20), (('f2', 'z'), 22), (('f2', 'x'), 20))))
                )