def test_yarn_items_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) labels = [] for label, frame in y1.items(): self.assertTrue(frame.__class__ is Frame) labels.append(label) self.assertEqual(labels, list(y1.index)) self.assertEqual(y1.status['loaded'].sum(), 2) self.assertEqual(y1.status.loc[y1.status['loaded']].index.values.tolist(), ['f3', 'f6'])
def test_yarn_max_persist(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) self.assertEqual(y1.nbytes, 0) self.assertEqual(y1.status['loaded'].sum(), 0) self.assertEqual(y1['f2'].shape, (4, 5)) self.assertEqual(y1['f6'].shape, (6, 4)) self.assertEqual(y1.nbytes, 352) self.assertEqual(y1.status['loaded'].sum(), 2) self.assertEqual(y1.shapes.to_pairs(), (('f1', None), ('f2', (4, 5)), ('f3', None), ('f4', None), ('f5', None), ('f6', (6, 4))) ) self.assertEqual(y1.mloc.isna().sum(), 4) self.assertEqual((y1.dtypes == float).sum().sum(), 9)
def test_yarn_from_concat_b(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_concat((Yarn.from_buses((bus_a,), retain_labels=True), Yarn.from_buses((bus_b,), retain_labels=True))) y2 = Yarn.from_concat((y1, y1), index=IndexAutoFactory) self.assertEqual(y2[3].shape, (2, 8)) self.assertEqual(y2[0].shape, (4, 2)) self.assertEqual(y2[5].shape, (6, 4)) y3 = y2.iloc[4:] self.assertEqual(y3.shape, (8,))
def test_yarn_unpersist_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) self.assertEqual(len(tuple(y1.items())), 6) self.assertEqual(y1.status['loaded'].sum(), 2) y1.unpersist() self.assertEqual(y1.status['loaded'].sum(), 0) self.assertEqual(len(tuple(y1.items())), 6) self.assertEqual(y1.status['loaded'].sum(), 2)
def test_yarn_equals_e(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3), name='a') b2 = Bus.from_frames((f4, f5, f6), name='b') y1 = Yarn.from_buses((b1, b2), retain_labels=True) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y2 = Yarn.from_buses((bus_a, bus_b), retain_labels=True) self.assertEqual(y2.status['loaded'].sum(), 0) self.assertTrue(y1.equals(y2)) self.assertEqual(y2.status['loaded'].sum(), 2)
def test_yarn_items_b(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) s1 = y1.to_series() self.assertEqual( [(label, f.shape) for label, f in s1.items()], [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)), ('f4', (2, 8)), ('f5', (4, 4)), ('f6', (6, 4))] )
def test_bus_max_persist_a(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=3) for i in b2.index: _ = b2[i] self.assertTrue(b2._loaded.sum() <= 3) # after iteration only the last three are loaded self.assertEqual(b2._loaded.tolist(), [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True])
def test_bus_max_persist_3(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(4): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4) _ = b2.iloc[[0, 1]] _ = b2.iloc[[2, 3]] self.assertTrue(b2._loaded_all) _ = b2.iloc[[1, 0]] self.assertEqual(list(b2._last_accessed.keys()), ['2', '3', '1', '0']) _ = b2.iloc[3] self.assertEqual(list(b2._last_accessed.keys()), ['2', '1', '0', '3']) _ = b2.iloc[:3] self.assertEqual(list(b2._last_accessed.keys()), ['3', '0', '1', '2'])
def test_bus_nbytes_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(a=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) f2_loaded = b2['f2'] self.assertEqual(b2.nbytes, 48) f3_loaded = b2['f3'] self.assertEqual(b2.nbytes, 80) f1_loaded = b2['f1'] self.assertEqual(b2.nbytes, 112)
def test_bus_max_persist_b(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=1) b3 = b2.iloc[10:] self.assertEqual(b3._loaded.sum(), 1) # only the last one is loasded self.assertEqual(b3._loaded.tolist(), [False, False, False, False, False, False, False, False, False, True] ) self.assertEqual(b3.iloc[0].sum().sum(), 145) self.assertEqual(b3._loaded.tolist(), [True, False, False, False, False, False, False, False, False, False] ) self.assertEqual(b3.iloc[4].sum().sum(), 185) self.assertEqual(b3._loaded.tolist(), [False, False, False, False, True, False, False, False, False, False] )
def test_bus_keys_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') f4 = Frame.from_dict( dict(q=(None,None), r=(np.nan,np.nan)), index=(1000, 1001), name='f4') b1 = Bus.from_frames((f1, f2, f3, f4)) self.assertEqual(b1.keys().values.tolist(), ['f1', 'f2', 'f3', 'f4']) self.assertEqual(b1.values[2].name, 'f3') with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) self.assertFalse(b2._loaded_all) self.assertEqual(b2.keys().values.tolist(), ['f1', 'f2', 'f3', 'f4']) self.assertFalse(b2._loaded.any()) # accessing values forces loading all self.assertEqual(b2.values[2].name, 'f3') self.assertTrue(b2._loaded_all)
def test_bus_shapes_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(a=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) f2_loaded = b2['f2'] self.assertEqual(b2.shapes.to_pairs(), (('f1', None), ('f2', (3, 2)), ('f3', None))) f3_loaded = b2['f3'] self.assertEqual(b2.shapes.to_pairs(), (('f1', None), ('f2', (3, 2)), ('f3', (2, 2 ))) )
def test_bus_status_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) status = b2.status self.assertEqual(status.shape, (3, 4)) # force load all tuple(b2.items()) self.assertEqual( b2.status.to_pairs(0), (('loaded', (('f1', True), ('f2', True), ('f3', True))), ('size', (('f1', 4.0), ('f2', 6.0), ('f3', 4.0))), ('nbytes', (('f1', 32.0), ('f2', 48.0), ('f3', 32.0))),('shape', (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2))))) )
def test_bus_mloc_c(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) f2_loaded = b2['f2'] mloc1 = b2.mloc f3_loaded = b2['f3'] f1_loaded = b2['f1'] self.assertEqual(mloc1['f2'], b2.mloc.loc['f2'])
def test_bus_max_persist_c(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(4): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4) for _ in b2.items(): pass self.assertTrue(b2._loaded.all()) b3 = Bus.from_zip_pickle(fp, config=config, max_persist=3) _ = b3.iloc[[0, 2, 3]] self.assertEqual(b3._loaded.tolist(), [True, False, True, True]) _ = b3.iloc[[0, 1, 3]] self.assertEqual(b3._loaded.tolist(), [True, True, False, True]) _ = b3.iloc[[1, 2, 3]] self.assertEqual(b3._loaded.tolist(), [False, True, True, True]) _ = b3.iloc[[0, 1, 2]] self.assertEqual(b3._loaded.tolist(), [True, True, True, False]) _ = b3.iloc[[0, 2, 3]] self.assertEqual(b3._loaded.tolist(), [True, False, True, True])
def test_yarn_to_zip_pickle_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2, temp_file('.zip') as fp3: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) y1.to_zip_pickle(fp3) b3 = Bus.from_zip_pickle(fp3) self.assertTrue(b3.index.equals(y1.index))
def test_bus_iloc_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) self.assertEqual(b2.iloc[[0, 2]].status['loaded'].to_pairs(), (('f1', True), ('f3', True)))