def test_batch_apply_items_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('x', 'q'), name='f3') b1 = Batch.from_frames((f1, f2, f3)).apply_items( lambda k, x: (k, x['b'].mean())) self.assertEqual(b1.to_frame().to_pairs(0), ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),) ) b2 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply_items( lambda k, x: (k, x['b'].mean())) self.assertEqual(b2.to_frame().to_pairs(0), ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),) )
def test_batch_to_zip_pickle_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(a=(10,20), b=(50,60)), index=('p', 'q'), name='f3') config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) b1 = Batch.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp, config=config) b2 = Batch.from_zip_pickle(fp, config=config) frames = dict(b2.items()) for frame in (f1, f2, f3): # parquet brings in characters as objects, thus forcing different dtypes self.assertEqualFrames(frame, frames[frame.name], compare_dtype=False)
def test_batch_f(self) -> None: f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']}) f2 = Batch(f1.iter_group_items('group')).loc[:, 'b'].sum().to_frame() self.assertEqual(f2.to_pairs(0), ((None, (('x', 2), ('z', 10))),))
def test_batch_apply_b(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('x', 'q'), name='f3') b1 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply(lambda x: x.shape) self.assertEqual(b1.to_frame().to_pairs(0), ((None, (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2)))),) ) f2 = Frame(np.arange(4).reshape(2, 2), name='f2') post = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[1, 1]).to_frame(fill_value=0.0) self.assertEqual( post.to_pairs(0), ((None, (('f1', 4), ('f2', 3))),) )
def test_batch_apply_items_b(self) -> None: f1 = ff.parse('s(20,4)|v(bool,bool,int,float)|c(I,str)|i(I,str)') b1 = Batch(f1.iter_group_items(['zZbu', 'ztsv'])).apply_items(lambda k, f: f.iloc[:1] if k != (True, True) else f.iloc[:3]).to_frame() self.assertEqual(b1.to_pairs(0), (('zZbu', ((((False, False), 'zZbu'), False), (((False, True), 'zr4u'), False), (((True, False), 'zkuW'), True), (((True, True), 'zIA5'), True), (((True, True), 'zGDJ'), True), (((True, True), 'zo2Q'), True))), ('ztsv', ((((False, False), 'zZbu'), False), (((False, True), 'zr4u'), True), (((True, False), 'zkuW'), False), (((True, True), 'zIA5'), True), (((True, True), 'zGDJ'), True), (((True, True), 'zo2Q'), True))), ('zUvW', ((((False, False), 'zZbu'), -3648), (((False, True), 'zr4u'), 197228), (((True, False), 'zkuW'), 54020), (((True, True), 'zIA5'), 194224), (((True, True), 'zGDJ'), 172133), (((True, True), 'zo2Q'), -88017))), ('zkuW', ((((False, False), 'zZbu'), 1080.4), (((False, True), 'zr4u'), 3884.48), (((True, False), 'zkuW'), 3338.48), (((True, True), 'zIA5'), -1760.34), (((True, True), 'zGDJ'), 1857.34), (((True, True), 'zo2Q'), 268.96)))) )
def test_batch_g(self) -> None: f1 = Frame(np.arange(6).reshape(2,3), index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f1') f2 = Frame(np.arange(6).reshape(2,3) * 30.5, index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f2') # this results in two rows. one column labelled None f3 = Batch.from_frames((f1, f2)).sum().sum().to_frame() self.assertEqual(f3.to_pairs(0), ((None, (('f1', 15.0), ('f2', 457.5))),)) f4 = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[0, 0]).to_frame() self.assertEqual(f4.to_pairs(0), ((None, (('f1', 0.0), ('f2', 0.0))),))
def test_batch_to_npz(self) -> None: # assure processing of same named Frame f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)').rename('a') f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b') b1 = Batch.from_frames((f1, f2)) with temp_file('.zip') as fp: b1.to_zip_npz(fp) b2 = Batch.from_zip_npz(fp) frames = dict(b2.items()) self.assertTrue(frames['a'].equals(f1, compare_name=True, compare_dtype=True, compare_class=True))
def test_batch_e(self) -> None: f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']}) gi = f1.iter_group_items('group') f2 = Batch(gi)[['a', 'b']].sum().to_frame() self.assertEqual(f2.to_pairs(0), (('a', (('x', 1), ('z', 5))), ('b', (('x', 2), ('z', 10)))) ) gi = f1.iter_group_items('group') f3 = Frame.from_concat((-Batch(gi)[['a', 'b']]).values) self.assertEqual(f3.to_pairs(0), (('a', ((0, -1), (1, -2), (2, -3))), ('b', ((0, -2), (1, -4), (2, -6)))))
def test_batch_display_a(self) -> None: dc = DisplayConfig.from_default(type_color=False) f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']}) gi = f1.iter_group_items('group') d1 = Batch(gi)[['a', 'b']].display(dc) self.assertEqual(d1.to_rows(), ['<Batch>', '<Index>', 'x <Frame>', 'z <Frame>', '<<U1> <object>' ])
def b2() -> None: batch1 = Batch.from_frames((f1, f2, f3, f4, f5, f6, f7, f8), max_workers=8, use_threads=False, chunksize=2) batch2 = batch1.apply(func_b) _ = tuple(batch2.items())
def test_batch_drop_c(self) -> None: f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)') f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b') post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu'] self.assertEqual( [list(v.columns) for _, v in post.items()], #type: ignore [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']] ) post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu'] self.assertEqual( [list(v.index) for _, v in post.items()], [[34715], [34715]] )
def get_instance(cls, target: tp.Type[ContainerBase]) -> ContainerBase: ''' Get a sample instance from any ContainerBase; cache to only create one per life of process. ''' if target not in cls._CLS_TO_INSTANCE_CACHE: if target is TypeBlocks: instance = target.from_blocks(np.array((0,))) #type: ignore elif target is Bus: f = Frame.from_elements((0,), name='frame') instance = target.from_frames((f,)) #type: ignore elif target is Batch: instance = Batch(iter(())) elif target in (DisplayConfig, StoreFilter, StoreConfig): instance = target() elif issubclass(target, IndexHierarchy): instance = target.from_labels(((0,0),)) elif issubclass(target, (IndexYearMonth, IndexYear, IndexDate)): instance = target(np.array((0,), dtype=DT64_S)) elif target in (ContainerOperand, ContainerBase, IndexBase): instance = target() elif issubclass(target, Frame): instance = target.from_elements((0,)) else: instance = target((0,)) #type: ignore cls._CLS_TO_INSTANCE_CACHE[target] = instance return cls._CLS_TO_INSTANCE_CACHE[target]
def test_batch_iloc_cov_a(self) -> None: f1 = Frame.from_dict( dict(b=(1,2,3), a=(4,5,6)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,10,100), a=(1,2,3)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).cov().to_frame() self.assertEqual(f3.to_pairs(), (('b', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 2997.0), (('f2', 'a'), 49.5))), ('a', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 49.5), (('f2', 'a'), 1.0))))) f4 = Batch.from_frames((f1, f2)).cov(axis=0).to_frame() self.assertEqual( f4.to_pairs(), (('x', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 388.0), (('f2', 'x'), 4704.5))), ('y', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 0.0), (('f2', 'x'), 0.0))), ('z', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 32.0), (('f2', 'x'), 388.0)))))
def test_batch_count_a(self) -> None: f1 = Frame.from_dict( dict(b=(20,20,0), a=(20,20,np.nan)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,np.nan,1), a=(1,50,1)), index=('y', 'z', 'x'), name='f2') self.assertEqual( Batch.from_frames((f1, f2)).count(axis=0).to_frame().to_pairs(0), (('b', (('f1', 3), ('f2', 2))), ('a', (('f1', 2), ('f2', 3))))) self.assertEqual( Batch.from_frames((f1, f2)).count(axis=1).to_frame().to_pairs(0), (('x', (('f1', 1), ('f2', 2))), ('y', (('f1', 2), ('f2', 2))), ('z', (('f1', 2), ('f2', 1)))) )
def test_batch_b(self) -> None: f1 = Frame.from_dict( {'a':[1,49,2,3], 'b':[2,4,381, 6], 'group': ['x', 'x','z','z']}, index=('r', 's', 't', 'u')) b1 = Batch(f1.iter_group_items('group')) self.assertEqual(b1['b'].sum().to_frame().to_pairs(0), ((None, (('x', 6), ('z', 387))),) )
def test_batch_i(self) -> None: # assure processing of same named Frame f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)') f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)') post = Batch.from_frames((f1, f2)).drop['zZbu'] self.assertEqual( [list(v.columns) for _, v in post.items()], [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']] )
def test_batch_shapes_a(self) -> None: dc = DisplayConfig.from_default(type_color=False) f1 = Frame.from_dict({'a':[1,2,3], 'b':[2,4,6], 'group': ['x','z','z']}) b1 = Batch(f1.iter_group_items('group'))[['a', 'b']] self.assertEqual(b1.shapes.to_pairs(), (('x', (1, 2)), ('z', (2, 2))) )
def test_batch_iloc_max_a(self) -> None: f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).iloc_max().to_frame() self.assertEqual(f3.to_pairs(0), (('b', (('f1', 0), ('f2', 1))), ('a', (('f1', 2), ('f2', 1)))))
def test_batch_iter_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2), name='foo').cumsum() self.assertEqual(list(b1), ['f1', 'f2'])
def test_batch_slotted_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') b1 = Batch.from_frames((f1, )) with self.assertRaises(AttributeError): b1.g = 30 # type: ignore #pylint: disable=E0237 with self.assertRaises(AttributeError): b1.__dict__ #pylint: disable=W0104
def test_batch_bloc_a(self) -> None: f1 = Frame.from_dict(dict(a=(10, 20, 0), b=(30, 40, 50)), index=('x', 'y', 'z'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2)) b2 = b1.bloc[f2 >= 2] post = list(s.values.tolist() for s in b2.values) self.assertEqual(post, [[30, 40, 50], [4, 2, 5, 3, 6]])
def test_batch_iloc_b(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2), max_workers=8, use_threads=True) b2 = b1.iloc[1, 1] post = list(s.values.tolist() for s in b2.values) self.assertEqual(post, [[4], [5]])
def test_batch_drop_duplicated_a(self) -> None: f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).drop_duplicated().to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'x'), 0), (('f2', 'z'), 20))), ('a', ((('f1', 'x'), 50), (('f2', 'z'), 50)))))
def test_batch_to_zip_parquet_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') config = StoreConfig(index_depth=1, columns_depth=1, include_columns=True, include_index=True) b1 = Batch.from_frames((f1, f2), config=config) with temp_file('.xlsx') as fp: b1.to_xlsx(fp) b2 = (Batch.from_xlsx(fp, config=config) * 20).sum() self.assertEqual(b2.to_frame().to_pairs(0), (('a', (('f1', 60), ('f2', 120))), ('b', (('f1', 140), ('f2', 300)))))
def test_batch_sort_values_a(self) -> None: f1 = Frame.from_dict( dict(b=(50,20,0), a=(30,40,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(3,2,1), a=(4,5,6)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).sort_values('b').to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'x'), 0), (('f1', 'y'), 20), (('f1', 'z'), 50), (('f2', 'x'), 1), (('f2', 'z'), 2), (('f2', 'y'), 3))), ('a', ((('f1', 'x'), 50), (('f1', 'y'), 40), (('f1', 'z'), 30), (('f2', 'x'), 6), (('f2', 'z'), 5), (('f2', 'y'), 4)))))
def test_batch_a(self) -> None: f1 = Frame.from_dict( {'a':[1,49,2,3], 'b':[2,4,381, 6], 'group': ['x', 'x','z','z']}, index=('r', 's', 't', 'u')) b1 = Batch(f1.iter_group_items('group')) b2 = b1 * 3 post = tuple(b2.items()) self.assertEqual(post[0][1].to_pairs(0), (('a', (('r', 3), ('s', 147))), ('b', (('r', 6), ('s', 12))), ('group', (('r', 'xxx'), ('s', 'xxx')))), )
def test_batch_to_frame_a(self) -> None: f1 = Frame.from_dict( dict(a=(10,20,0), b=(30,40,50)), index=('x', 'y', 'z'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2)) f3 = b1.loc['y':].to_frame(fill_value=0) #type: ignore self.assertEqual(f3.to_pairs(0), (('a', ((('f1', 'y'), 20), (('f1', 'z'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'y'), 40), (('f1', 'z'), 50), (('f2', 'y'), 5), (('f2', 'z'), 6))), ('c', ((('f1', 'y'), 0), (('f1', 'z'), 0), (('f2', 'y'), 2), (('f2', 'z'), 3)))))
def test_batch_round_a(self) -> None: f1 = Frame.from_dict( dict(b=(20, 20.234, 0), a=(20.234, 20.234, 50.828)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1, 20.234, 1.043), a=(1.043, 50.828, 1.043)), index=('y', 'z', 'x'), name='f2') f3 = round(Batch.from_frames((f1, f2)), 1).to_frame() #type: ignore self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), 20.0), (('f1', 'y'), 20.2), (('f1', 'x'), 0.0), (('f2', 'y'), 1.0), (('f2', 'z'), 20.2), (('f2', 'x'), 1.0))), ('a', ((('f1', 'z'), 20.2), (('f1', 'y'), 20.2), (('f1', 'x'), 50.8), (('f2', 'y'), 1.0), (('f2', 'z'), 50.8), (('f2', 'x'), 1.0)))) )
def test_batch_transpose_a(self) -> None: f1 = Frame.from_dict( dict(b=(10,20,0), a=(30,40,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,20,3), a=(4,50,6)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).transpose().to_frame() self.assertEqual(f3.to_pairs(0), (('x', ((('f1', 'b'), 0), (('f1', 'a'), 50), (('f2', 'b'), 3), (('f2', 'a'), 6))), ('y', ((('f1', 'b'), 20), (('f1', 'a'), 40), (('f2', 'b'), 1), (('f2', 'a'), 4))), ('z', ((('f1', 'b'), 10), (('f1', 'a'), 30), (('f2', 'b'), 20), (('f2', 'a'), 50)))) )
def test_batch_clip_a(self) -> None: f1 = Frame.from_dict( dict(b=(10,20,0), a=(30,40,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,20,3), a=(4,50,6)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).clip(upper=22, lower=20).to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), 20), (('f1', 'y'), 20), (('f1', 'x'), 20), (('f2', 'y'), 20), (('f2', 'z'), 20), (('f2', 'x'), 20))), ('a', ((('f1', 'z'), 22), (('f1', 'y'), 22), (('f1', 'x'), 22), (('f2', 'y'), 20), (('f2', 'z'), 22), (('f2', 'x'), 20)))) )