def test_batch_apply_items_a(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('x', 'q'),
                name='f3')

        b1 = Batch.from_frames((f1, f2, f3)).apply_items(
                lambda k, x: (k, x['b'].mean()))

        self.assertEqual(b1.to_frame().to_pairs(0),
                ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),)
)
        b2 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply_items(
                lambda k, x: (k, x['b'].mean()))

        self.assertEqual(b2.to_frame().to_pairs(0),
                ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),)
                )
    def test_batch_apply_b(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('x', 'q'),
                name='f3')

        b1 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply(lambda x: x.shape)
        self.assertEqual(b1.to_frame().to_pairs(0),
                ((None, (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2)))),)
                )

        f2 = Frame(np.arange(4).reshape(2, 2), name='f2')
        post = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[1, 1]).to_frame(fill_value=0.0)

        self.assertEqual(
                post.to_pairs(0),
                ((None, (('f1', 4), ('f2', 3))),)
                )
    def test_batch_g(self) -> None:
        f1 = Frame(np.arange(6).reshape(2,3), index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f1')
        f2 = Frame(np.arange(6).reshape(2,3) * 30.5, index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f2')

        # this results in two rows. one column labelled None
        f3 = Batch.from_frames((f1, f2)).sum().sum().to_frame()
        self.assertEqual(f3.to_pairs(0),
                ((None, (('f1', 15.0), ('f2', 457.5))),))

        f4 = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[0, 0]).to_frame()
        self.assertEqual(f4.to_pairs(0),
                ((None, (('f1', 0.0), ('f2', 0.0))),))
Beispiel #4
0
 def b2() -> None:
     batch1 = Batch.from_frames((f1, f2, f3, f4, f5, f6, f7, f8),
                                max_workers=8,
                                use_threads=False,
                                chunksize=2)
     batch2 = batch1.apply(func_b)
     _ = tuple(batch2.items())
    def test_batch_to_zip_pickle_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(a=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        b1 = Batch.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp, config=config)
            b2 = Batch.from_zip_pickle(fp, config=config)
            frames = dict(b2.items())

        for frame in (f1, f2, f3):
            # parquet brings in characters as objects, thus forcing different dtypes
            self.assertEqualFrames(frame, frames[frame.name], compare_dtype=False)
    def test_batch_drop_c(self) -> None:

        f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)')
        f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b')

        post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu']
        self.assertEqual(
            [list(v.columns) for _, v in post.items()], #type: ignore
            [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']]
            )

        post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu']
        self.assertEqual(
            [list(v.index) for _, v in post.items()],
            [[34715], [34715]]
            )
    def test_batch_iloc_cov_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(1,2,3), a=(4,5,6)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,10,100), a=(1,2,3)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).cov().to_frame()
        self.assertEqual(f3.to_pairs(),
                (('b', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 2997.0), (('f2', 'a'), 49.5))), ('a', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 49.5), (('f2', 'a'), 1.0)))))

        f4 = Batch.from_frames((f1, f2)).cov(axis=0).to_frame()
        self.assertEqual( f4.to_pairs(),
                (('x', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 388.0), (('f2', 'x'), 4704.5))), ('y', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 0.0), (('f2', 'x'), 0.0))), ('z', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 32.0), (('f2', 'x'), 388.0)))))
    def test_batch_count_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(20,20,0), a=(20,20,np.nan)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,np.nan,1), a=(1,50,1)),
                index=('y', 'z', 'x'),
                name='f2')

        self.assertEqual(
                Batch.from_frames((f1, f2)).count(axis=0).to_frame().to_pairs(0),
            (('b', (('f1', 3), ('f2', 2))), ('a', (('f1', 2), ('f2', 3)))))

        self.assertEqual(
            Batch.from_frames((f1, f2)).count(axis=1).to_frame().to_pairs(0),
            (('x', (('f1', 1), ('f2', 2))), ('y', (('f1', 2), ('f2', 2))), ('z', (('f1', 2), ('f2', 1))))
            )
    def test_batch_i(self) -> None:
        # assure processing of same named Frame
        f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)')
        f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)')

        post = Batch.from_frames((f1, f2)).drop['zZbu']
        self.assertEqual(
            [list(v.columns) for _, v in post.items()],
            [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']]
            )
Beispiel #10
0
    def test_batch_iter_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2), name='foo').cumsum()
        self.assertEqual(list(b1), ['f1', 'f2'])
Beispiel #11
0
    def test_batch_iloc_max_a(self) -> None:
        f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)),
                             index=('z', 'y', 'x'),
                             name='f1')
        f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)),
                             index=('y', 'z', 'x'),
                             name='f2')

        f3 = Batch.from_frames((f1, f2)).iloc_max().to_frame()
        self.assertEqual(f3.to_pairs(0), (('b', (('f1', 0), ('f2', 1))),
                                          ('a', (('f1', 2), ('f2', 1)))))
    def test_batch_to_npz(self) -> None:
        # assure processing of same named Frame
        f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)').rename('a')
        f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b')

        b1 = Batch.from_frames((f1, f2))
        with temp_file('.zip') as fp:
            b1.to_zip_npz(fp)
            b2 = Batch.from_zip_npz(fp)
            frames = dict(b2.items())

            self.assertTrue(frames['a'].equals(f1, compare_name=True, compare_dtype=True, compare_class=True))
Beispiel #13
0
    def test_batch_slotted_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')

        b1 = Batch.from_frames((f1, ))

        with self.assertRaises(AttributeError):
            b1.g = 30  # type: ignore #pylint: disable=E0237
        with self.assertRaises(AttributeError):
            b1.__dict__  #pylint: disable=W0104
Beispiel #14
0
    def test_batch_iloc_b(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2), max_workers=8, use_threads=True)
        b2 = b1.iloc[1, 1]
        post = list(s.values.tolist() for s in b2.values)
        self.assertEqual(post, [[4], [5]])
Beispiel #15
0
    def test_batch_bloc_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(10, 20, 0), b=(30, 40, 50)),
                             index=('x', 'y', 'z'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2))
        b2 = b1.bloc[f2 >= 2]
        post = list(s.values.tolist() for s in b2.values)
        self.assertEqual(post, [[30, 40, 50], [4, 2, 5, 3, 6]])
Beispiel #16
0
    def test_batch_drop_duplicated_a(self) -> None:
        f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)),
                             index=('z', 'y', 'x'),
                             name='f1')
        f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)),
                             index=('y', 'z', 'x'),
                             name='f2')

        f3 = Batch.from_frames((f1, f2)).drop_duplicated().to_frame()

        self.assertEqual(f3.to_pairs(0),
                         (('b', ((('f1', 'x'), 0), (('f2', 'z'), 20))),
                          ('a', ((('f1', 'x'), 50), (('f2', 'z'), 50)))))
    def test_batch_sort_values_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(50,20,0), a=(30,40,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(3,2,1), a=(4,5,6)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).sort_values('b').to_frame()
        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'x'), 0), (('f1', 'y'), 20), (('f1', 'z'), 50), (('f2', 'x'), 1), (('f2', 'z'), 2), (('f2', 'y'), 3))), ('a', ((('f1', 'x'), 50), (('f1', 'y'), 40), (('f1', 'z'), 30), (('f2', 'x'), 6), (('f2', 'z'), 5), (('f2', 'y'), 4)))))
    def test_batch_apply_except_b(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('x', 'q'),
                name='f3')

        post = Batch.from_frames((f1, f2, f3), max_workers=3
                ).apply_except(func1, KeyError).to_frame()
        self.assertEqual(post.to_pairs(),
                (('d', (('f3', 20),)), ('b', (('f3', 60),))))

        with self.assertRaises(NotImplementedError):
            _ = Batch.from_frames((f1, f2, f3), max_workers=3, chunksize=2,
                    ).apply_except(func1, KeyError).to_frame()
    def test_batch_round_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(20, 20.234, 0), a=(20.234, 20.234, 50.828)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1, 20.234, 1.043), a=(1.043, 50.828, 1.043)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = round(Batch.from_frames((f1, f2)), 1).to_frame() #type: ignore
        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'z'), 20.0), (('f1', 'y'), 20.2), (('f1', 'x'), 0.0), (('f2', 'y'), 1.0), (('f2', 'z'), 20.2), (('f2', 'x'), 1.0))), ('a', ((('f1', 'z'), 20.2), (('f1', 'y'), 20.2), (('f1', 'x'), 50.8), (('f2', 'y'), 1.0), (('f2', 'z'), 50.8), (('f2', 'x'), 1.0))))
                )
    def test_batch_transpose_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(10,20,0), a=(30,40,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,20,3), a=(4,50,6)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).transpose().to_frame()
        self.assertEqual(f3.to_pairs(0),
                (('x', ((('f1', 'b'), 0), (('f1', 'a'), 50), (('f2', 'b'), 3), (('f2', 'a'), 6))), ('y', ((('f1', 'b'), 20), (('f1', 'a'), 40), (('f2', 'b'), 1), (('f2', 'a'), 4))), ('z', ((('f1', 'b'), 10), (('f1', 'a'), 30), (('f2', 'b'), 20), (('f2', 'a'), 50))))
        )
    def test_batch_to_frame_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(10,20,0), b=(30,40,50)),
                index=('x', 'y', 'z'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')

        b1 = Batch.from_frames((f1, f2))
        f3 = b1.loc['y':].to_frame(fill_value=0) #type: ignore
        self.assertEqual(f3.to_pairs(0),
                (('a', ((('f1', 'y'), 20), (('f1', 'z'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'y'), 40), (('f1', 'z'), 50), (('f2', 'y'), 5), (('f2', 'z'), 6))), ('c', ((('f1', 'y'), 0), (('f1', 'z'), 0), (('f2', 'y'), 2), (('f2', 'z'), 3)))))
    def test_batch_clip_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(10,20,0), a=(30,40,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,20,3), a=(4,50,6)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).clip(upper=22, lower=20).to_frame()
        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'z'), 20), (('f1', 'y'), 20), (('f1', 'x'), 20), (('f2', 'y'), 20), (('f2', 'z'), 20), (('f2', 'x'), 20))), ('a', ((('f1', 'z'), 22), (('f1', 'y'), 22), (('f1', 'x'), 22), (('f2', 'y'), 20), (('f2', 'z'), 22), (('f2', 'x'), 20))))
                )
    def test_batch_repr_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        b1 = Batch.from_frames((f1, f2))
        self.assertTrue(repr(b1).startswith('<Batch at '))

        b2 = b1.rename('foo')
        self.assertTrue(repr(b2).startswith('<Batch: foo at '))
    def test_batch_sample_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')

        self.assertEqual(
                Batch.from_frames((f1, f2)).sample(1, 1, seed=22).to_frame().to_pairs(0),
                (('a', ((('f1', 'x'), 1), (('f2', 'z'), 3))),)
                )
    def test_batch_shift_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(20,20,0), a=(20,20,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,20,1), a=(1,50,1)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).shift(index=1, columns=-1, fill_value=0).to_frame()

        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'z'), 0), (('f1', 'y'), 20), (('f1', 'x'), 20), (('f2', 'y'), 0), (('f2', 'z'), 1), (('f2', 'x'), 50))), ('a', ((('f1', 'z'), 0), (('f1', 'y'), 0), (('f1', 'x'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0), (('f2', 'x'), 0))))
                )
    def test_batch_isin_a(self) -> None:
        f1 = Frame.from_dict(
                dict(b=(10,20,0), a=(30,40,50)),
                index=('z', 'y', 'x'),
                name='f1')
        f2 = Frame.from_dict(
                dict(b=(1,20,3), a=(4,50,6)),
                index=('y', 'z', 'x'),
                name='f2')

        f3 = Batch.from_frames((f1, f2)).isin((20, 50)).to_frame()

        self.assertEqual(f3.to_pairs(0),
                (('b', ((('f1', 'z'), False), (('f1', 'y'), True), (('f1', 'x'), False), (('f2', 'y'), False), (('f2', 'z'), True), (('f2', 'x'), False))), ('a', ((('f1', 'z'), False), (('f1', 'y'), False), (('f1', 'x'), True), (('f2', 'y'), False), (('f2', 'z'), True), (('f2', 'x'), False))))
                )
Beispiel #27
0
    def test_batch_name_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2), name='foo')
        self.assertEqual(b1.name, 'foo')

        b2 = b1.rename('bar')
        self.assertEqual(b2.name, 'bar')
        self.assertEqual(tuple(b2.keys()), ('f1', 'f2'))
Beispiel #28
0
    def test_batch_c1(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('x', 'q'),
                             name='f3')

        b1 = Batch.from_frames((f1, f2, f3))

        self.assertEqual(b1.shapes.to_pairs(),
                         (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2))))
Beispiel #29
0
    def test_batch_drop_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(10, 20, 0), b=(30, 40, 50)),
                             index=('x', 'y', 'z'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')

        b1 = Batch.from_frames((f1, f2))
        f3 = b1.drop.iloc[1, 1].to_frame(fill_value=0)

        self.assertEqual(f3.to_pairs(0),
                         (('a', ((('f1', 'x'), 10), (('f1', 'z'), 0),
                                 (('f2', 'x'), 0), (('f2', 'z'), 0))),
                          ('c', ((('f1', 'x'), 0), (('f1', 'z'), 0),
                                 (('f2', 'x'), 1), (('f2', 'z'), 3)))))
    def test_batch_to_bus_a(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')

        batch1 = Batch.from_frames((f1, f2))
        bus1 = batch1.to_bus()

        self.assertEqual(Frame.from_concat_items(bus1.items(), fill_value=0).to_pairs(0),
                (('a', ((('f1', 'x'), 1), (('f1', 'y'), 2), (('f2', 'x'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'x'), 3), (('f1', 'y'), 4), (('f2', 'x'), 4), (('f2', 'y'), 5), (('f2', 'z'), 6))), ('c', ((('f1', 'x'), 0), (('f1', 'y'), 0), (('f2', 'x'), 1), (('f2', 'y'), 2), (('f2', 'z'), 3))))
                )