def test_yarn_init_b(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3)) f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5)) y1 = Yarn((b1, b2), index=tuple('abcde')) self.assertEqual(y1.index.values.tolist(), list('abcde')) self.assertEqual(y1[['a', 'c', 'e']].shape, (3, )) y2 = Yarn((b1, b2)) self.assertEqual(y2.index.values.tolist(), list(range(5))) self.assertEqual(y2[2:].shape, (3, )) y3 = Yarn((b2, ), index=('2021-01-01', '2021-02-15'), index_constructor=IndexDate) self.assertEqual(y3.index.__class__, IndexDate) self.assertEqual( y3.index.values.tolist(), [datetime.date(2021, 1, 1), datetime.date(2021, 2, 15)]) with self.assertRaises(ErrorInitYarn): y4 = Yarn((b2, ), index=range(5))
def test_yarn_loc_e(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') f6 = ff.parse('s(2,4)|v(int,float)').rename('f6') f7 = ff.parse('s(4,2)|v(str)').rename('f7') b3 = Bus.from_frames((f6, f7), name='c') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False, name='foo') y2 = y1['f2':'f6'] #type: ignore self.assertEqual(y2.shapes.to_pairs(), (('f2', (4, 4)), ('f3', (4, 4)), ('f4', (4, 4)), ('f5', (4, 4)), ('f6', (2, 4)))) self.assertEqual(y2['f5'].to_pairs(), ((0, ((0, 'zjZQ'), (1, 'zO5l'), (2, 'zEdH'), (3, 'zB7E'))), (1, ((0, 'zaji'), (1, 'zJnC'), (2, 'zDdR'), (3, 'zuVU'))), (2, ((0, 'ztsv'), (1, 'zUvW'), (2, 'zkuW'), (3, 'zmVj'))), (3, ((0, 'z2Oo'), (1, 'z5l6'), (2, 'zCE3'), (3, 'zr4u')))))
def test_yarn_from_concat_b(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_concat((Yarn.from_buses((bus_a, ), retain_labels=True), Yarn.from_buses((bus_b, ), retain_labels=True))) y2 = Yarn.from_concat((y1, y1), index=IndexAutoFactory) self.assertEqual(y2[3].shape, (2, 8)) self.assertEqual(y2[0].shape, (4, 2)) self.assertEqual(y2[5].shape, (6, 4)) y3 = y2.iloc[4:] self.assertEqual(y3.shape, (8, ))
def test_yarn_max_persist(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) self.assertEqual(y1.nbytes, 0) self.assertEqual(y1.status['loaded'].sum(), 0) self.assertEqual(y1['f2'].shape, (4, 5)) self.assertEqual(y1['f6'].shape, (6, 4)) self.assertEqual(y1.nbytes, 352) self.assertEqual(y1.status['loaded'].sum(), 2) self.assertEqual(y1.shapes.to_pairs(), (('f1', None), ('f2', (4, 5)), ('f3', None), ('f4', None), ('f5', None), ('f6', (6, 4)))) self.assertEqual(y1.mloc.isna().sum(), 4) self.assertEqual((y1.dtypes == float).sum().sum(), 9)
def test_bus_max_persist_a(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=3) for i in b2.index: _ = b2[i] self.assertTrue(b2._loaded.sum() <= 3) # after iteration only the last three are loaded self.assertEqual(b2._loaded.tolist(), [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True])
def test_yarn_items_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) labels = [] for label, frame in y1.items(): self.assertTrue(frame.__class__ is Frame) labels.append(label) self.assertEqual(labels, list(y1.index)) self.assertEqual(y1.status['loaded'].sum(), 2) self.assertEqual( y1.status.loc[y1.status['loaded']].index.values.tolist(), ['f3', 'f6'])
def test_yarn_relabel_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, )) b3 = Bus.from_frames((f5, f6)) y1 = Yarn((b1, b2, b3)) self.assertEqual( y1.relabel(lambda x: f'--{x}--').loc['--4--'].shape, (4, 4)) # None is a no-op self.assertEqual(y1.relabel(None).loc[4].shape, (4, 4)) with self.assertRaises(RelabelInvalid): y1.relabel({3, 4, 5}) self.assertEqual( y1.relabel(tuple('abcdef'))['d':].status['shape'].to_pairs(), (('d', (2, 8)), ('e', (4, 4)), ('f', (6, 4)))) y2 = Yarn((b1, b2, b3), index=tuple('abcdef')) self.assertEqual(y2.index.values.tolist(), ['a', 'b', 'c', 'd', 'e', 'f']) self.assertEqual( y2.relabel(IndexAutoFactory).index.values.tolist(), [0, 1, 2, 3, 4, 5])
def test_bus_nbytes_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(a=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) f2_loaded = b2['f2'] self.assertEqual(b2.nbytes, 48) f3_loaded = b2['f3'] self.assertEqual(b2.nbytes, 80) f1_loaded = b2['f1'] self.assertEqual(b2.nbytes, 112)
def test_bus_init_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='bar') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) b1 = Bus.from_frames((f1, f2), config=config) self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar']) with temp_file('.zip') as fp: b1.to_zip_tsv(fp) b2 = Bus.from_zip_tsv(fp) f3 = b2['bar'] f4 = b2['foo'] # import ipdb; ipdb.set_trace() zs = StoreZipTSV(fp) zs.write(b1.items()) # how to show that this derived getitem has derived type? f3 = zs.read('foo', config=config['foo']) self.assertEqual(f3.to_pairs(0), (('a', (('x', 1), ('y', 2))), ('b', (('x', 3), ('y', 4)))))
def test_bus_init_c(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='bar') config = StoreConfigMap.from_config(StoreConfig(index_depth=1)) b1 = Bus.from_frames((f1, f2), config=config) self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar']) with temp_file('.zip') as fp: b1.to_zip_csv(fp) b2 = Bus.from_zip_csv(fp, config=config) f1_loaded = b2['foo'] f2_loaded = b2['bar'] self.assertEqualFrames(f1, f1_loaded) self.assertEqualFrames(f2, f2_loaded)
def test_bus_to_xlsx_a(self) -> None: f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)), index=('p', 'q'), name='f3') config = StoreConfigMap.from_config( StoreConfig(index_depth=1, columns_depth=1, include_columns=True, include_index=True)) b1 = Bus.from_frames((f1, f2, f3), config=config) with temp_file('.xlsx') as fp: b1.to_xlsx(fp) b2 = Bus.from_xlsx(fp, config=config) tuple(b2.items()) # force loading all for frame in (f1, f2, f3): self.assertEqualFrames(frame, b2[frame.name])
def test_yarn_items_b(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) s1 = y1.to_series() self.assertEqual([(label, f.shape) for label, f in s1.items()], [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)), ('f4', (2, 8)), ('f5', (4, 4)), ('f6', (6, 4))])
def test_bus_to_parquet_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) b1 = Bus.from_frames((f1, f2, f3), config=config) with temp_file('.zip') as fp: b1.to_zip_parquet(fp) b2 = Bus.from_zip_parquet(fp, config=config) tuple(b2.items()) # force loading all for frame in (f1, f2, f3): # parquet brings in characters as objects, thus forcing different dtypes self.assertEqualFrames(frame, b2[frame.name], compare_dtype=False)
def test_bus_keys_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') f4 = Frame.from_dict( dict(q=(None,None), r=(np.nan,np.nan)), index=(1000, 1001), name='f4') b1 = Bus.from_frames((f1, f2, f3, f4)) self.assertEqual(b1.keys().values.tolist(), ['f1', 'f2', 'f3', 'f4']) self.assertEqual(b1.values[2].name, 'f3') with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) self.assertFalse(b2._loaded_all) self.assertEqual(b2.keys().values.tolist(), ['f1', 'f2', 'f3', 'f4']) self.assertFalse(b2._loaded.any()) # accessing values forces loading all self.assertEqual(b2.values[2].name, 'f3') self.assertTrue(b2._loaded_all)
def test_bus_shapes_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(a=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) f2_loaded = b2['f2'] self.assertEqual(b2.shapes.to_pairs(), (('f1', None), ('f2', (3, 2)), ('f3', None))) f3_loaded = b2['f3'] self.assertEqual(b2.shapes.to_pairs(), (('f1', None), ('f2', (3, 2)), ('f3', (2, 2 ))) )
def test_bus_max_persist_3(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(4): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4) _ = b2.iloc[[0, 1]] _ = b2.iloc[[2, 3]] self.assertTrue(b2._loaded_all) _ = b2.iloc[[1, 0]] self.assertEqual(list(b2._last_accessed.keys()), ['2', '3', '1', '0']) _ = b2.iloc[3] self.assertEqual(list(b2._last_accessed.keys()), ['2', '1', '0', '3']) _ = b2.iloc[:3] self.assertEqual(list(b2._last_accessed.keys()), ['3', '0', '1', '2'])
def test_bus_to_xlsx_c(self) -> None: ''' Test manipulating a file behind the Bus. ''' f1 = Frame.from_dict( dict(a=(1,2,3)), index=('x', 'y', 'z'), name='f1') f2 = Frame.from_dict( dict(x=(10,20,30)), index=('q', 'r', 's'), name='f2') b1 = Bus.from_frames((f1,),) with temp_file('.xlsx') as fp: b1.to_xlsx(fp) b2 = Bus.from_xlsx(fp) f2.to_xlsx(fp) with self.assertRaises(StoreFileMutation): tuple(b2.items())
def test_yarn_drop_b(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3), name='b1') b2 = Bus.from_frames((f4, ), name='b2') b3 = Bus.from_frames((f5, f6), name='b3') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False) y2 = y1.drop.iloc[2:5] self.assertEqual(len(y2._series), 2) # 2 buses remain self.assertEqual([(f.name, f.shape) for f in y2.values], [('f1', (4, 2)), ('f2', (4, 5)), ('f6', (6, 4))]) y3 = y1.drop.iloc[np.array([True, False, False, False, False, True])] self.assertEqual([(f.name, f.shape) for f in y3.values], [('f2', (4, 5)), ('f3', (2, 2)), ('f4', (2, 8)), ('f5', (4, 4))]) y4 = y1.drop.iloc[3] self.assertEqual([(f.name, f.shape) for f in y4.values], [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)), ('f5', (4, 4)), ('f6', (6, 4))]) y5 = y1.drop.iloc[[3, 4, 5]] self.assertEqual([(f.name, f.shape) for f in y5.values], [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2))])
def test_yarn_unpersist_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3)) b2 = Bus.from_frames((f4, f5, f6)) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False) self.assertEqual(len(tuple(y1.items())), 6) self.assertEqual(y1.status['loaded'].sum(), 2) y1.unpersist() self.assertEqual(y1.status['loaded'].sum(), 0) self.assertEqual(len(tuple(y1.items())), 6) self.assertEqual(y1.status['loaded'].sum(), 2)
def test_yarn_drop_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3), name='b1') b2 = Bus.from_frames((f4, ), name='b2') b3 = Bus.from_frames((f5, f6), name='b3') y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False) y2 = y1.drop['f3':'f5'] #type: ignore self.assertEqual(len(y2._series), 2) # 2 buses remain self.assertEqual([(f.name, f.shape) for f in y2.values], [('f1', (4, 2)), ('f2', (4, 5)), ('f6', (6, 4))]) y3 = y1.drop[y1.index.isin(('f1', 'f6'))] self.assertEqual([(f.name, f.shape) for f in y3.values], [('f2', (4, 5)), ('f3', (2, 2)), ('f4', (2, 8)), ('f5', (4, 4))]) y4 = y1.drop['f4'] self.assertEqual([(f.name, f.shape) for f in y4.values], [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)), ('f5', (4, 4)), ('f6', (6, 4))]) y5 = y1.drop[['f4', 'f5', 'f6']] self.assertEqual([(f.name, f.shape) for f in y5.values], [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2))])
def test_yarn_from_buses_a(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)').rename('f1') f2 = ff.parse('s(4,4)|v(str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') f4 = ff.parse('s(4,4)|v(int,float)').rename('f4') f5 = ff.parse('s(4,4)|v(str)').rename('f5') b2 = Bus.from_frames((f4, f5), name='b') y1 = Yarn.from_buses((b1, b2), retain_labels=True) self.assertEqual(len(y1), 5) self.assertEqual(y1.index.shape, (5, 2)) self.assertEqual(y1.shape, (5, )) self.assertEqual(y1.size, 5) self.assertEqual(y1.dtype, object) self.assertEqual(y1.ndim, 1) y3 = y1[('a', 'f2'):] #type: ignore self.assertEqual(y3.shape, (4, )) y2 = Yarn.from_buses((b1, b2), retain_labels=False) self.assertEqual(len(y2), 5) self.assertEqual(y2.index.shape, (5, )) self.assertEqual(y1.shape, (5, )) self.assertEqual(y1.size, 5) self.assertEqual(y1.dtype, object) self.assertEqual(y1.ndim, 1)
def test_yarn_equals_e(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') f5 = ff.parse('s(4,4)').rename('f5') f6 = ff.parse('s(6,4)').rename('f6') b1 = Bus.from_frames((f1, f2, f3), name='a') b2 = Bus.from_frames((f4, f5, f6), name='b') y1 = Yarn.from_buses((b1, b2), retain_labels=True) with temp_file('.zip') as fp1, temp_file('.zip') as fp2: b1.to_zip_pickle(fp1) b2.to_zip_pickle(fp2) bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a') bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b') y2 = Yarn.from_buses((bus_a, bus_b), retain_labels=True) self.assertEqual(y2.status['loaded'].sum(), 0) self.assertTrue(y1.equals(y2)) self.assertEqual(y2.status['loaded'].sum(), 2)
def test_bus_status_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) status = b2.status self.assertEqual(status.shape, (3, 4)) # force load all tuple(b2.items()) self.assertEqual( b2.status.to_pairs(0), (('loaded', (('f1', True), ('f2', True), ('f3', True))), ('size', (('f1', 4.0), ('f2', 6.0), ('f3', 4.0))), ('nbytes', (('f1', 32.0), ('f2', 48.0), ('f3', 32.0))),('shape', (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2))))) )
def test_bus_to_xlsx_f(self) -> None: f = Frame.from_records([ [np.datetime64('1983-02-20 05:34:18.763'), np.datetime64('2020-08-01')], [np.datetime64('1975-03-20 05:20:18.001'), np.datetime64('2020-07-31')] ], columns=(date(2020, 7, 31), date(2020, 8, 1)), index=(datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)), name='frame') b1 = Bus.from_frames([f]) with temp_file('.xlsx') as fp: b1.to_xlsx(fp) config = StoreConfig(include_index=True, index_depth=1) b2 = Bus.from_xlsx(fp, config=config) tuple(b2.items()) # force loading all self.assertEqual(b2['frame'].index.values.tolist(), [datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)]) self.assertEqual(b2['frame'].index.values.tolist(), [datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)]) self.assertEqual(b2['frame'].values.tolist(), [[datetime(1983, 2, 20, 5, 34, 18, 763000), datetime(2020, 8, 1, 0, 0)], [datetime(1975, 3, 20, 5, 20, 18, 1000), datetime(2020, 7, 31, 0, 0)]] )
def test_bus_init_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='foo') f2 = Frame.from_dict( dict(a=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='bar') b1 = Bus.from_frames((f1, f2)) self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar']) with temp_file('.zip') as fp: b1.to_zip_tsv(fp) b2 = Bus.from_zip_tsv(fp) f3 = b2['bar'] f4 = b2['foo'] # import ipdb; ipdb.set_trace() zs = StoreZipTSV(fp) zs.write(b1.items()) f3 = zs.read('foo') self.assertEqual( f3.to_pairs(0), (('a', (('x', 1), ('y', 2))), ('b', (('x', 3), ('y', 4)))) )
def test_bus_to_hdf5_a(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') frames = (f1, f2, f3) config = StoreConfigMap.from_frames(frames) b1 = Bus.from_frames(frames, config=config) with temp_file('.h5') as fp: b1.to_hdf5(fp) b2 = Bus.from_hdf5(fp, config=config) tuple(b2.items()) # force loading all for frame in frames: self.assertEqualFrames(frame, b2[frame.name])
def test_bus_init_b(self) -> None: with self.assertRaises(ErrorInitBus): Bus(Series([1, 2, 3])) with self.assertRaises(ErrorInitBus): Bus(Series([3, 4], dtype=object))
def test_bus_mloc_c(self) -> None: f1 = Frame.from_dict( dict(a=(1,2), b=(3,4)), index=('x', 'y'), name='f1') f2 = Frame.from_dict( dict(c=(1,2,3), b=(4,5,6)), index=('x', 'y', 'z'), name='f2') f3 = Frame.from_dict( dict(d=(10,20), b=(50,60)), index=('p', 'q'), name='f3') b1 = Bus.from_frames((f1, f2, f3)) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp) f2_loaded = b2['f2'] mloc1 = b2.mloc f3_loaded = b2['f3'] f1_loaded = b2['f1'] self.assertEqual(mloc1['f2'], b2.mloc.loc['f2'])
def test_bus_max_persist_b(self) -> None: def items() -> tp.Iterator[tp.Tuple[str, Frame]]: for i in range(20): yield str(i), Frame(np.arange(i, i+10).reshape(2, 5)) s = Series.from_items(items(), dtype=object) b1 = Bus(s) config = StoreConfig( index_depth=1, columns_depth=1, include_columns=True, include_index=True ) with temp_file('.zip') as fp: b1.to_zip_pickle(fp) b2 = Bus.from_zip_pickle(fp, config=config, max_persist=1) b3 = b2.iloc[10:] self.assertEqual(b3._loaded.sum(), 1) # only the last one is loasded self.assertEqual(b3._loaded.tolist(), [False, False, False, False, False, False, False, False, False, True] ) self.assertEqual(b3.iloc[0].sum().sum(), 145) self.assertEqual(b3._loaded.tolist(), [True, False, False, False, False, False, False, False, False, False] ) self.assertEqual(b3.iloc[4].sum().sum(), 185) self.assertEqual(b3._loaded.tolist(), [False, False, False, False, True, False, False, False, False, False] )
def test_yarn_dtypes_a(self) -> None: f1 = ff.parse('s(4,2)').rename('f1') f2 = ff.parse('s(4,5)').rename('f2') f3 = ff.parse('s(2,2)').rename('f3') f4 = ff.parse('s(2,8)').rename('f4') b1 = Bus.from_frames((f1, f2), name='a') b2 = Bus.from_frames((f3, f4), name='b') y1 = Yarn.from_buses((b1, b2), retain_labels=False) self.assertEqual(y1.dtypes.shape, (4, 8))