def test_batch_apply_items_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('x', 'q'), name='f3') b1 = Batch.from_frames((f1, f2, f3)).apply_items( lambda k, x: (k, x['b'].mean())) self.assertEqual(b1.to_frame().to_pairs(0), ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),) ) b2 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply_items( lambda k, x: (k, x['b'].mean())) self.assertEqual(b2.to_frame().to_pairs(0), ((None, (('f1', ('f1', 3.5)), ('f2', ('f2', 5.0)), ('f3', ('f3', 55.0)))),) )
def test_batch_apply_b(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('x', 'q'), name='f3') b1 = Batch.from_frames((f1, f2, f3), use_threads=True, max_workers=8).apply(lambda x: x.shape) self.assertEqual(b1.to_frame().to_pairs(0), ((None, (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2)))),) ) f2 = Frame(np.arange(4).reshape(2, 2), name='f2') post = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[1, 1]).to_frame(fill_value=0.0) self.assertEqual( post.to_pairs(0), ((None, (('f1', 4), ('f2', 3))),) )
def test_batch_g(self) -> None: f1 = Frame(np.arange(6).reshape(2,3), index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f1') f2 = Frame(np.arange(6).reshape(2,3) * 30.5, index=(('a', 'b')), columns=(('x', 'y', 'z')), name='f2') # this results in two rows. one column labelled None f3 = Batch.from_frames((f1, f2)).sum().sum().to_frame() self.assertEqual(f3.to_pairs(0), ((None, (('f1', 15.0), ('f2', 457.5))),)) f4 = Batch.from_frames((f1, f2)).apply(lambda f: f.iloc[0, 0]).to_frame() self.assertEqual(f4.to_pairs(0), ((None, (('f1', 0.0), ('f2', 0.0))),))
def b2() -> None: batch1 = Batch.from_frames((f1, f2, f3, f4, f5, f6, f7, f8), max_workers=8, use_threads=False, chunksize=2) batch2 = batch1.apply(func_b) _ = tuple(batch2.items())
def test_batch_to_zip_pickle_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(a=(10,20), b=(50,60)), index=('p', 'q'), name='f3') config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) b1 = Batch.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp, config=config) b2 = Batch.from_zip_pickle(fp, config=config) frames = dict(b2.items()) for frame in (f1, f2, f3): # parquet brings in characters as objects, thus forcing different dtypes self.assertEqualFrames(frame, frames[frame.name], compare_dtype=False)
def test_batch_drop_c(self) -> None: f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)') f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b') post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu'] self.assertEqual( [list(v.columns) for _, v in post.items()], #type: ignore [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']] ) post = Batch.from_frames((f1, f2)).drop.loc[-3648:, 'zZbu'] self.assertEqual( [list(v.index) for _, v in post.items()], [[34715], [34715]] )
def test_batch_iloc_cov_a(self) -> None: f1 = Frame.from_dict( dict(b=(1,2,3), a=(4,5,6)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,10,100), a=(1,2,3)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).cov().to_frame() self.assertEqual(f3.to_pairs(), (('b', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 2997.0), (('f2', 'a'), 49.5))), ('a', ((('f1', 'b'), 1.0), (('f1', 'a'), 1.0), (('f2', 'b'), 49.5), (('f2', 'a'), 1.0))))) f4 = Batch.from_frames((f1, f2)).cov(axis=0).to_frame() self.assertEqual( f4.to_pairs(), (('x', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 388.0), (('f2', 'x'), 4704.5))), ('y', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 0.0), (('f2', 'x'), 0.0))), ('z', ((('f1', 'z'), 4.5), (('f1', 'y'), 4.5), (('f1', 'x'), 4.5), (('f2', 'y'), 0.0), (('f2', 'z'), 32.0), (('f2', 'x'), 388.0)))))
def test_batch_count_a(self) -> None: f1 = Frame.from_dict( dict(b=(20,20,0), a=(20,20,np.nan)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,np.nan,1), a=(1,50,1)), index=('y', 'z', 'x'), name='f2') self.assertEqual( Batch.from_frames((f1, f2)).count(axis=0).to_frame().to_pairs(0), (('b', (('f1', 3), ('f2', 2))), ('a', (('f1', 2), ('f2', 3))))) self.assertEqual( Batch.from_frames((f1, f2)).count(axis=1).to_frame().to_pairs(0), (('x', (('f1', 1), ('f2', 2))), ('y', (('f1', 2), ('f2', 2))), ('z', (('f1', 2), ('f2', 1)))) )
def test_batch_i(self) -> None: # assure processing of same named Frame f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)') f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)') post = Batch.from_frames((f1, f2)).drop['zZbu'] self.assertEqual( [list(v.columns) for _, v in post.items()], [['ztsv'], ['ztsv', 'zUvW', 'zkuW', 'zmVj']] )
def test_batch_iter_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2), name='foo').cumsum() self.assertEqual(list(b1), ['f1', 'f2'])
def test_batch_iloc_max_a(self) -> None: f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).iloc_max().to_frame() self.assertEqual(f3.to_pairs(0), (('b', (('f1', 0), ('f2', 1))), ('a', (('f1', 2), ('f2', 1)))))
def test_batch_to_npz(self) -> None: # assure processing of same named Frame f1 = ff.parse('s(3,2)|v(bool)|c(I,str)|i(I,int)').rename('a') f2 = ff.parse('s(3,5)|v(bool)|c(I,str)|i(I,int)').rename('b') b1 = Batch.from_frames((f1, f2)) with temp_file('.zip') as fp: b1.to_zip_npz(fp) b2 = Batch.from_zip_npz(fp) frames = dict(b2.items()) self.assertTrue(frames['a'].equals(f1, compare_name=True, compare_dtype=True, compare_class=True))
def test_batch_slotted_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') b1 = Batch.from_frames((f1, )) with self.assertRaises(AttributeError): b1.g = 30 # type: ignore #pylint: disable=E0237 with self.assertRaises(AttributeError): b1.__dict__ #pylint: disable=W0104
def test_batch_iloc_b(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2), max_workers=8, use_threads=True) b2 = b1.iloc[1, 1] post = list(s.values.tolist() for s in b2.values) self.assertEqual(post, [[4], [5]])
def test_batch_bloc_a(self) -> None: f1 = Frame.from_dict(dict(a=(10, 20, 0), b=(30, 40, 50)), index=('x', 'y', 'z'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2)) b2 = b1.bloc[f2 >= 2] post = list(s.values.tolist() for s in b2.values) self.assertEqual(post, [[30, 40, 50], [4, 2, 5, 3, 6]])
def test_batch_drop_duplicated_a(self) -> None: f1 = Frame.from_dict(dict(b=(20, 20, 0), a=(20, 20, 50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict(dict(b=(1, 20, 1), a=(1, 50, 1)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).drop_duplicated().to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'x'), 0), (('f2', 'z'), 20))), ('a', ((('f1', 'x'), 50), (('f2', 'z'), 50)))))
def test_batch_sort_values_a(self) -> None: f1 = Frame.from_dict( dict(b=(50,20,0), a=(30,40,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(3,2,1), a=(4,5,6)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).sort_values('b').to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'x'), 0), (('f1', 'y'), 20), (('f1', 'z'), 50), (('f2', 'x'), 1), (('f2', 'z'), 2), (('f2', 'y'), 3))), ('a', ((('f1', 'x'), 50), (('f1', 'y'), 40), (('f1', 'z'), 30), (('f2', 'x'), 6), (('f2', 'z'), 5), (('f2', 'y'), 4)))))
def test_batch_apply_except_b(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('x', 'q'), name='f3') post = Batch.from_frames((f1, f2, f3), max_workers=3 ).apply_except(func1, KeyError).to_frame() self.assertEqual(post.to_pairs(), (('d', (('f3', 20),)), ('b', (('f3', 60),)))) with self.assertRaises(NotImplementedError): _ = Batch.from_frames((f1, f2, f3), max_workers=3, chunksize=2, ).apply_except(func1, KeyError).to_frame()
def test_batch_round_a(self) -> None: f1 = Frame.from_dict( dict(b=(20, 20.234, 0), a=(20.234, 20.234, 50.828)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1, 20.234, 1.043), a=(1.043, 50.828, 1.043)), index=('y', 'z', 'x'), name='f2') f3 = round(Batch.from_frames((f1, f2)), 1).to_frame() #type: ignore self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), 20.0), (('f1', 'y'), 20.2), (('f1', 'x'), 0.0), (('f2', 'y'), 1.0), (('f2', 'z'), 20.2), (('f2', 'x'), 1.0))), ('a', ((('f1', 'z'), 20.2), (('f1', 'y'), 20.2), (('f1', 'x'), 50.8), (('f2', 'y'), 1.0), (('f2', 'z'), 50.8), (('f2', 'x'), 1.0)))) )
def test_batch_transpose_a(self) -> None: f1 = Frame.from_dict( dict(b=(10,20,0), a=(30,40,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,20,3), a=(4,50,6)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).transpose().to_frame() self.assertEqual(f3.to_pairs(0), (('x', ((('f1', 'b'), 0), (('f1', 'a'), 50), (('f2', 'b'), 3), (('f2', 'a'), 6))), ('y', ((('f1', 'b'), 20), (('f1', 'a'), 40), (('f2', 'b'), 1), (('f2', 'a'), 4))), ('z', ((('f1', 'b'), 10), (('f1', 'a'), 30), (('f2', 'b'), 20), (('f2', 'a'), 50)))) )
def test_batch_to_frame_a(self) -> None: f1 = Frame.from_dict( dict(a=(10,20,0), b=(30,40,50)), index=('x', 'y', 'z'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2)) f3 = b1.loc['y':].to_frame(fill_value=0) #type: ignore self.assertEqual(f3.to_pairs(0), (('a', ((('f1', 'y'), 20), (('f1', 'z'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'y'), 40), (('f1', 'z'), 50), (('f2', 'y'), 5), (('f2', 'z'), 6))), ('c', ((('f1', 'y'), 0), (('f1', 'z'), 0), (('f2', 'y'), 2), (('f2', 'z'), 3)))))
def test_batch_clip_a(self) -> None: f1 = Frame.from_dict( dict(b=(10,20,0), a=(30,40,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,20,3), a=(4,50,6)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).clip(upper=22, lower=20).to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), 20), (('f1', 'y'), 20), (('f1', 'x'), 20), (('f2', 'y'), 20), (('f2', 'z'), 20), (('f2', 'x'), 20))), ('a', ((('f1', 'z'), 22), (('f1', 'y'), 22), (('f1', 'x'), 22), (('f2', 'y'), 20), (('f2', 'z'), 22), (('f2', 'x'), 20)))) )
def test_batch_repr_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2)) self.assertTrue(repr(b1).startswith('<Batch at ')) b2 = b1.rename('foo') self.assertTrue(repr(b2).startswith('<Batch: foo at '))
def test_batch_sample_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') self.assertEqual( Batch.from_frames((f1, f2)).sample(1, 1, seed=22).to_frame().to_pairs(0), (('a', ((('f1', 'x'), 1), (('f2', 'z'), 3))),) )
def test_batch_shift_a(self) -> None: f1 = Frame.from_dict( dict(b=(20,20,0), a=(20,20,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,20,1), a=(1,50,1)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).shift(index=1, columns=-1, fill_value=0).to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), 0), (('f1', 'y'), 20), (('f1', 'x'), 20), (('f2', 'y'), 0), (('f2', 'z'), 1), (('f2', 'x'), 50))), ('a', ((('f1', 'z'), 0), (('f1', 'y'), 0), (('f1', 'x'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0), (('f2', 'x'), 0)))) )
def test_batch_isin_a(self) -> None: f1 = Frame.from_dict( dict(b=(10,20,0), a=(30,40,50)), index=('z', 'y', 'x'), name='f1') f2 = Frame.from_dict( dict(b=(1,20,3), a=(4,50,6)), index=('y', 'z', 'x'), name='f2') f3 = Batch.from_frames((f1, f2)).isin((20, 50)).to_frame() self.assertEqual(f3.to_pairs(0), (('b', ((('f1', 'z'), False), (('f1', 'y'), True), (('f1', 'x'), False), (('f2', 'y'), False), (('f2', 'z'), True), (('f2', 'x'), False))), ('a', ((('f1', 'z'), False), (('f1', 'y'), False), (('f1', 'x'), True), (('f2', 'y'), False), (('f2', 'z'), True), (('f2', 'x'), False)))) )
def test_batch_name_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2), name='foo') self.assertEqual(b1.name, 'foo') b2 = b1.rename('bar') self.assertEqual(b2.name, 'bar') self.assertEqual(tuple(b2.keys()), ('f1', 'f2'))
def test_batch_c1(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('x', 'q'), name='f3') b1 = Batch.from_frames((f1, f2, f3)) self.assertEqual(b1.shapes.to_pairs(), (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2))))
def test_batch_drop_a(self) -> None: f1 = Frame.from_dict(dict(a=(10, 20, 0), b=(30, 40, 50)), index=('x', 'y', 'z'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') b1 = Batch.from_frames((f1, f2)) f3 = b1.drop.iloc[1, 1].to_frame(fill_value=0) self.assertEqual(f3.to_pairs(0), (('a', ((('f1', 'x'), 10), (('f1', 'z'), 0), (('f2', 'x'), 0), (('f2', 'z'), 0))), ('c', ((('f1', 'x'), 0), (('f1', 'z'), 0), (('f2', 'x'), 1), (('f2', 'z'), 3)))))
def test_batch_to_bus_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') batch1 = Batch.from_frames((f1, f2)) bus1 = batch1.to_bus() self.assertEqual(Frame.from_concat_items(bus1.items(), fill_value=0).to_pairs(0), (('a', ((('f1', 'x'), 1), (('f1', 'y'), 2), (('f2', 'x'), 0), (('f2', 'y'), 0), (('f2', 'z'), 0))), ('b', ((('f1', 'x'), 3), (('f1', 'y'), 4), (('f2', 'x'), 4), (('f2', 'y'), 5), (('f2', 'z'), 6))), ('c', ((('f1', 'x'), 0), (('f1', 'y'), 0), (('f2', 'x'), 1), (('f2', 'y'), 2), (('f2', 'z'), 3)))) )