def test_yarn_get_a(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False, name='foo') self.assertTrue(y1.get('f2').equals(f2)) self.assertEqual(y1.get('f99'), None)
def test_yarn_tail_a(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False, name='foo') self.assertEqual(y1.tail(2).shape, (2, )) self.assertEqual(tuple(y1.tail(2).keys()), ('f6', 'f7'))
def test_yarn_loc_c(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=True, name='foo') y2 = y1.loc[[('a', 'f3'), ('b', 'f5'), ('c', 'f6')]] self.assertEqual(y2.shapes.to_pairs(), ((('a', 'f3'), (4, 4)), (('b', 'f5'), (4, 4)), (('c', 'f6'), (2, 4))))
def test_yarn_from_buses_b(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=True) self.assertEqual(y1.shape, (7, )) self.assertEqual(len(y1), 7)
def test_pivot_items_to_frame_b(self) -> None: f = ff.parse('s(6,4)|v(int)').assign[0]( range(6) ) post = pivot_items_to_frame( blocks=f._blocks, group_fields_iloc=[0, 1], group_depth=2, data_field_iloc=3, func_single=None, frame_cls=Frame, name='foo', dtype=np.dtype(int), index_constructor=IndexHierarchy.from_labels, columns_constructor=Index, kind='mergesort', ) self.assertEqual(post.to_pairs(), (('foo', (((0, 162197), 129017), ((1, -41157), 35021), ((2, 5729), 166924), ((3, -168387), 122246), ((4, 140627), 197228), ((5, 66269), 105269))),), )
def test_pivot_core_a(self) -> None: frame = ff.parse('s(20,4)|v(int)').assign[0].apply(lambda s: s % 4).assign[1].apply(lambda s: s % 3) # by default we get a tuple index post1 = frame.pivot([0, 1]) self.assertEqual(post1.index.name, (0, 1)) self.assertIs(post1.index.__class__, Index) self.assertTrue(post1.to_pairs(), ((2, (((0, 0), 463099), ((0, 1), -88017), ((0, 2), 35021), ((1, 0), 92867), ((1, 2), 96520), ((2, 0), 172133), ((2, 1), 279191), ((2, 2), 13448), ((3, 0), 255338), ((3, 1), 372807), ((3, 2), 155574))), (3, (((0, 0), 348362), ((0, 1), 175579), ((0, 2), 105269), ((1, 0), 58768), ((1, 2), 13448), ((2, 0), 84967), ((2, 1), 239151), ((2, 2), 170440), ((3, 0), 269300), ((3, 1), 204528), ((3, 2), 493169)))) ) # can provide index constructor post2 = frame.pivot([0, 1], index_constructor=IndexHierarchy.from_labels) self.assertEqual(post2.index.name, (0, 1)) self.assertIs(post2.index.__class__, IndexHierarchy) self.assertTrue(post2.to_pairs(), ((2, (((0, 0), 463099), ((0, 1), -88017), ((0, 2), 35021), ((1, 0), 92867), ((1, 2), 96520), ((2, 0), 172133), ((2, 1), 279191), ((2, 2), 13448), ((3, 0), 255338), ((3, 1), 372807), ((3, 2), 155574))), (3, (((0, 0), 348362), ((0, 1), 175579), ((0, 2), 105269), ((1, 0), 58768), ((1, 2), 13448), ((2, 0), 84967), ((2, 1), 239151), ((2, 2), 170440), ((3, 0), 269300), ((3, 1), 204528), ((3, 2), 493169)))) )
def __init__(self) -> None: super().__init__() self.sff1 = ff.parse( 's(10,10_000)|v(int,bool,float)|i(I,str)|c(I,str)') _, self.fp_npz = tempfile.mkstemp(suffix='.zip') self.sff1.to_npz(self.fp_npz) _, self.fp_parquet = tempfile.mkstemp(suffix='.parquet') self.sff1.to_parquet(self.fp_parquet) from static_frame.core.archive_npy import NPYConverter self.meta = { 'wide_mixed_index_str': FunctionMetaData( perf_status=PerfStatus.EXPLAINED_LOSS, line_target=NPYConverter._header_decode, ), }
def test_pivot_items_to_frame_a(self) -> None: f = ff.parse('s(6,4)|v(int)').assign[0]( range(6) ) post = pivot_items_to_frame( blocks=f._blocks, group_fields_iloc=[0], group_depth=1, data_field_iloc=3, func_single=lambda x: str(x) if x % 2 else sum(x), frame_cls=Frame, name='foo', dtype=None, index_constructor=Index, columns_constructor=Index, kind='mergesort', ) self.assertEqual(post.to_pairs(), (('foo', ((0, '[129017]'), (1, '[35021]'), (2, 166924), (3, 122246), (4, 197228), (5, '[105269]'))),))
def test_yarn_iloc_a(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False, name='foo') self.assertEqual(y1.iloc[3].shape, (4, 4)) self.assertEqual(y1.iloc[3:].shape, (4,)) self.assertEqual(y1.iloc[[1, 6]].shape, (2,)) self.assertEqual(y1.iloc[y1.index.via_str.startswith('f3')].shape, (1,))
def test_yarn_from_concat_a(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_concat((Yarn.from_buses((b1,), retain_labels=True), Yarn.from_buses((b2, b3), retain_labels=True))) self.assertEqual(y1.shape, (7,)) self.assertEqual(y1.index.values.tolist(), [['a', 'f1'], ['a', 'f2'], ['a', 'f3'], ['b', 'f4'], ['b', 'f5'], ['c', 'f6'], ['c', 'f7']] )
def test_pivot_records_items_b(self) -> None: frame = ff.parse('s(3,6)|v(int,str,bool)|c(I,str)|i(I,int)') group_fields = ['zUvW',] # needs to be valif loc selection group_depth = 1 data_fields = ['zkuW', 'z2Oo'] func_single = None func_map = (('zkuW', sum), ('z2Oo', min)) loc_to_iloc = frame.columns.loc_to_iloc post = tuple(pivot_records_items( blocks=frame._blocks, group_fields_iloc=loc_to_iloc(group_fields), group_depth=group_depth, data_fields_iloc=loc_to_iloc(data_fields), func_single=func_single, func_map=func_map, )) self.assertEqual(post, ((False, [201945, 35021, 1, False]), (True, [129017, 129017, False, False])) )
def test_yarn_display_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3), name='a') b2 = Bus.from_frames((f4, f5, f6), name='b') y1 = Yarn.from_buses((b1, b2), retain_labels=False) d = y1.display(DisplayConfig(type_show=True, type_color=False)) self.assertEqual(d.to_rows(), ['<Yarn>', '<Index>', 'f1 Frame', 'f2 Frame', 'f3 Frame', 'f4 Frame', 'f5 Frame', 'f6 Frame', '<<U2> <object>'])
def test_yarn_loc_b(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False, name='foo') y2 = y1.loc[y1.index.via_re('[26]').search()] self.assertEqual(y2.index.values.tolist(), ['f2', 'f6']) self.assertEqual(y2.shapes.to_pairs(), (('f2', (4, 4)), ('f6', (2, 4)))) self.assertEqual(y2.name, 'foo')
def test_yarn_loc_e(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False, name='foo') y2 = y1['f2':'f6'] #type: ignore self.assertEqual(y2.shapes.to_pairs(), (('f2', (4, 4)), ('f3', (4, 4)), ('f4', (4, 4)), ('f5', (4, 4)), ('f6', (2, 4)))) self.assertEqual(y2['f5'].to_pairs(), ((0, ((0, 'zjZQ'), (1, 'zO5l'), (2, 'zEdH'), (3, 'zB7E'))), (1, ((0, 'zaji'), (1, 'zJnC'), (2, 'zDdR'), (3, 'zuVU'))), (2, ((0, 'ztsv'), (1, 'zUvW'), (2, 'zkuW'), (3, 'zmVj'))), (3, ((0, 'z2Oo'), (1, 'z5l6'), (2, 'zCE3'), (3, 'zr4u')))))
def test_frame_group_c(self) -> None: f = ff.parse('s(10,3)|v(int,str,bool)').assign[0].apply( lambda s: s % 4) post1 = tuple(f.iter_group(0, axis=0, drop=True)) self.assertEqual(len(post1), 2) self.assertEqual(post1[0].to_pairs(), ((1, ((3, 'zuVU'), (5, 'zJXD'), (6, 'zPAQ'), (7, 'zyps'))), (2, ((3, True), (5, False), (6, True), (7, True))))) self.assertEqual(post1[1].to_pairs(), ((1, ((0, 'zaji'), (1, 'zJnC'), (2, 'zDdR'), (4, 'zKka'), (8, 'zyG5'), (9, 'zvMu'))), (2, ((0, True), (1, False), (2, False), (4, False), (8, True), (9, False))))) post2 = tuple(f.iter_group_items(0, axis=0, drop=True)) self.assertEqual(len(post2), 2) self.assertEqual(post2[0][0], 0) self.assertEqual(post2[0][1].shape, (4, 2)) self.assertEqual(post2[1][0], 3) self.assertEqual(post2[1][1].shape, (6, 2))
def test_pivot_items_to_block_a(self) -> None: f = ff.parse('s(6,4)|v(int)').assign[0]( range(6) ) group_fields_iloc = [0] index_outer = Index(f[0].values.tolist()) post = pivot_items_to_block( blocks=f._blocks, group_fields_iloc=group_fields_iloc, group_depth=1, data_field_iloc=3, func_single=None, dtype=np.dtype(int), fill_value=0, fill_value_dtype=np.dtype(int), index_outer=index_outer, kind='mergesort', ) self.assertEqual(post.tolist(), [129017, 35021, 166924, 122246, 197228, 105269] )
def test_yarn_values_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3), name='a') b2 = Bus.from_frames((f4, f5, f6), name='b') y1 = Yarn.from_buses((b1, b2), retain_labels=False) s1 = y1.values self.assertEqual(len(s1), len(y1)) self.assertEqual([f.shape for f in y1.values], [(4, 2), (4, 5), (2, 2), (2, 8), (4, 4), (6, 4)])
def test_yarn_rehierarch_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, )) b3 = Bus.from_frames((f5, f6)) y1 = Yarn((b1, b2, b3), index=IndexHierarchy.from_product(('a', 'b'), (1, 2, 3))) self.assertEqual( y1.iloc[[0, 2, 4]].rehierarch((1, 0)).status['shape'].to_pairs(), (((1, 'a'), (4, 2)), ((3, 'a'), (2, 2)), ((2, 'b'), (4, 4))))
def test_frame_iter_series_b(self) -> None: f1 = ff.parse('s(10,4)|i(I,str)|c(I,str)|v(float)') post1 = f1.iter_series(axis=1).apply(lambda s: s.sum(), dtype=float) self.assertEqual(post1.dtype, float) self.assertEqual(post1.shape, (10,)) post2 = f1[sf.ILoc[0]].iter_element().apply(lambda e: e == 647.9, dtype=bool) self.assertEqual(post2.dtype, bool) self.assertEqual(post2.shape, (10,)) self.assertEqual(post2.sum(), 1) post3 = f1.iter_series(axis=1).apply(lambda s: str(s.sum()), dtype=str) self.assertEqual(post3.dtype, np.dtype('<U18')) self.assertEqual(post3.shape, (10,)) post4 = f1.iter_series(axis=1).apply(lambda s: int(s.sum()), dtype=int) self.assertEqual(post4.dtype, np.dtype(int)) self.assertEqual(post4.shape, (10,)) post5 = f1.iter_series(axis=1).apply(lambda s: int(s.sum()), dtype=object) self.assertEqual(post5.dtype, object) self.assertEqual(post5.shape, (10,))
def test_yarn_relabel_flat_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4,)) b3 = Bus.from_frames((f5, f6)) y1 = Yarn((b1, b2, b3), index=IndexHierarchy.from_product(('a', 'b'), (1, 2, 3))) self.assertEqual( y1.relabel_flat()[('a', 3):].status['shape'].to_pairs(), ((('a', 3), (2, 2)), (('b', 1), (2, 8)), (('b', 2), (4, 4)), (('b', 3), (6, 4))) )
def test_yarn_iter_element_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3), name='b1') b2 = Bus.from_frames((f4, f5, f6), name='b2') y1 = Yarn.from_buses((b1, b2), retain_labels=False) s1 = y1.iter_element().apply(lambda f: f.shape) self.assertEqual(s1.to_pairs(), (('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)), ('f4', (2, 8)), ('f5', (4, 4)), ('f6', (6, 4)))) self.assertEqual([f.name for f in y1.iter_element() if f.shape[0] > 2], #type: ignore ['f1', 'f2', 'f5', 'f6'], )
def test_yarn_to_zip_pickle_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2, temp_file('.zip') as fp3: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) y1.to_zip_pickle(fp3) b3 = Bus.from_zip_pickle(fp3) self.assertTrue(b3.index.equals(y1.index))
def test_frame_via_re_e(self) -> None: f1 = ff.parse('s(2,5)|c(I,str)|i(I,str)|v(int,bool,bool,float,str)') self.assertEqual(f1[f1.columns.via_re('[uU][vW]').search()].to_pairs(), (('zUvW', (('zZbu', True), ('ztsv', False))), ('zkuW', (('zZbu', 1080.4), ('ztsv', 2580.34)))))
def test_frame_he_hash_d(self) -> None: f1 = ff.parse('s(10,2)|i(I,str)|v(str)').to_frame_he() f2 = ff.parse('s(10,2)|i(I,str)|v(str)').to_frame_he().astype(object) self.assertTrue(f1 == f2) self.assertEqual(len(set((f1, f2))), 1)
def test_frame_he_hash_c(self) -> None: f1 = ff.parse('s(10,1)|i(I,str)').to_frame_he() f2 = ff.parse('s(10,1)|i(I,str)').to_frame_he().rename('foo') self.assertFalse(f1 == f2)
def test_frame_he_hash_b(self) -> None: f1 = ff.parse('s(10,1)|i(I,str)').to_frame_he() self.assertFalse(hasattr(f1, '_hash')) self.assertEqual(hash(f1), f1._hash)
def test_frame_he_ne_a(self) -> None: f1 = ff.parse('s(5,3)|i(I,str)|c(I,int)|v(int,str,bool)').to_frame_he() f2 = ff.parse('s(5,2)|i(I,str)|c(I,int)|v(int,str,bool)').to_frame_he() self.assertTrue(f1 != f2)
def get_test_framesB() -> tp.Tuple[Frame, Frame]: return ( ff.parse('s(4,4)|i(ID,dtD)|v(int)').rename('a'), ff.parse('s(4,4)|i(ID,dtD)|v(int)').rename('b'), )
def test_frame_iter_group_array_b(self) -> None: f1 = ff.parse('s(7,3)|v(int)').assign[1].apply(lambda s: s % 3) post = tuple(f1.iter_group_array(1, drop=True)) self.assertEqual([p.shape for p in post], [(3, 2), (4, 2)]) self.assertEqual([p.__class__ for p in post], [np.ndarray, np.ndarray])
def __init__(self) -> None: super().__init__() _, self.fp = tempfile.mkstemp(suffix='.zip') self.sff1 = ff.parse('s(10,10_000)|v(int,bool,float)|i(I,str)|c(I,str)')