Ejemplo n.º 1
0
    def test_yarn_init_b(self) -> None:

        f1 = ff.parse('s(4,4)|v(int,float)').rename('f1')
        f2 = ff.parse('s(4,4)|v(str)').rename('f2')
        f3 = ff.parse('s(4,4)|v(bool)').rename('f3')
        b1 = Bus.from_frames((f1, f2, f3))

        f4 = ff.parse('s(4,4)|v(int,float)').rename('f4')
        f5 = ff.parse('s(4,4)|v(str)').rename('f5')
        b2 = Bus.from_frames((f4, f5))

        y1 = Yarn((b1, b2), index=tuple('abcde'))
        self.assertEqual(y1.index.values.tolist(), list('abcde'))
        self.assertEqual(y1[['a', 'c', 'e']].shape, (3, ))

        y2 = Yarn((b1, b2))
        self.assertEqual(y2.index.values.tolist(), list(range(5)))
        self.assertEqual(y2[2:].shape, (3, ))

        y3 = Yarn((b2, ),
                  index=('2021-01-01', '2021-02-15'),
                  index_constructor=IndexDate)
        self.assertEqual(y3.index.__class__, IndexDate)
        self.assertEqual(
            y3.index.values.tolist(),
            [datetime.date(2021, 1, 1),
             datetime.date(2021, 2, 15)])

        with self.assertRaises(ErrorInitYarn):
            y4 = Yarn((b2, ), index=range(5))
Ejemplo n.º 2
0
    def test_yarn_loc_e(self) -> None:
        f1 = ff.parse('s(4,4)|v(int,float)').rename('f1')
        f2 = ff.parse('s(4,4)|v(str)').rename('f2')
        f3 = ff.parse('s(4,4)|v(bool)').rename('f3')
        b1 = Bus.from_frames((f1, f2, f3), name='a')

        f4 = ff.parse('s(4,4)|v(int,float)').rename('f4')
        f5 = ff.parse('s(4,4)|v(str)').rename('f5')
        b2 = Bus.from_frames((f4, f5), name='b')

        f6 = ff.parse('s(2,4)|v(int,float)').rename('f6')
        f7 = ff.parse('s(4,2)|v(str)').rename('f7')
        b3 = Bus.from_frames((f6, f7), name='c')

        y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False, name='foo')
        y2 = y1['f2':'f6']  #type: ignore
        self.assertEqual(y2.shapes.to_pairs(),
                         (('f2', (4, 4)), ('f3', (4, 4)), ('f4', (4, 4)),
                          ('f5', (4, 4)), ('f6', (2, 4))))
        self.assertEqual(y2['f5'].to_pairs(),
                         ((0, ((0, 'zjZQ'), (1, 'zO5l'), (2, 'zEdH'),
                               (3, 'zB7E'))), (1, ((0, 'zaji'), (1, 'zJnC'),
                                                   (2, 'zDdR'), (3, 'zuVU'))),
                          (2, ((0, 'ztsv'), (1, 'zUvW'), (2, 'zkuW'),
                               (3, 'zmVj'))), (3, ((0, 'z2Oo'), (1, 'z5l6'),
                                                   (2, 'zCE3'), (3, 'zr4u')))))
Ejemplo n.º 3
0
    def test_yarn_from_concat_b(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4, f5, f6))

        with temp_file('.zip') as fp1, temp_file('.zip') as fp2:
            b1.to_zip_pickle(fp1)
            b2.to_zip_pickle(fp2)

            bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a')
            bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b')

            y1 = Yarn.from_concat((Yarn.from_buses((bus_a, ),
                                                   retain_labels=True),
                                   Yarn.from_buses((bus_b, ),
                                                   retain_labels=True)))

            y2 = Yarn.from_concat((y1, y1), index=IndexAutoFactory)

            self.assertEqual(y2[3].shape, (2, 8))
            self.assertEqual(y2[0].shape, (4, 2))
            self.assertEqual(y2[5].shape, (6, 4))

            y3 = y2.iloc[4:]
            self.assertEqual(y3.shape, (8, ))
Ejemplo n.º 4
0
    def test_yarn_max_persist(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4, f5, f6))

        with temp_file('.zip') as fp1, temp_file('.zip') as fp2:
            b1.to_zip_pickle(fp1)
            b2.to_zip_pickle(fp2)

            bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a')
            bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b')

            y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False)
            self.assertEqual(y1.nbytes, 0)
            self.assertEqual(y1.status['loaded'].sum(), 0)

            self.assertEqual(y1['f2'].shape, (4, 5))
            self.assertEqual(y1['f6'].shape, (6, 4))
            self.assertEqual(y1.nbytes, 352)
            self.assertEqual(y1.status['loaded'].sum(), 2)

            self.assertEqual(y1.shapes.to_pairs(),
                             (('f1', None), ('f2', (4, 5)), ('f3', None),
                              ('f4', None), ('f5', None), ('f6', (6, 4))))
            self.assertEqual(y1.mloc.isna().sum(), 4)
            self.assertEqual((y1.dtypes == float).sum().sum(), 9)
Ejemplo n.º 5
0
    def test_bus_max_persist_a(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(20):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)

            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=3)
            for i in b2.index:
                _ = b2[i]
                self.assertTrue(b2._loaded.sum() <= 3)

            # after iteration only the last three are loaded
            self.assertEqual(b2._loaded.tolist(),
                    [False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, True, True])
Ejemplo n.º 6
0
    def test_yarn_items_a(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4, f5, f6))

        with temp_file('.zip') as fp1, temp_file('.zip') as fp2:
            b1.to_zip_pickle(fp1)
            b2.to_zip_pickle(fp2)

            bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a')
            bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b')

            y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False)

            labels = []
            for label, frame in y1.items():
                self.assertTrue(frame.__class__ is Frame)
                labels.append(label)

            self.assertEqual(labels, list(y1.index))
            self.assertEqual(y1.status['loaded'].sum(), 2)
            self.assertEqual(
                y1.status.loc[y1.status['loaded']].index.values.tolist(),
                ['f3', 'f6'])
Ejemplo n.º 7
0
    def test_yarn_relabel_a(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4, ))
        b3 = Bus.from_frames((f5, f6))

        y1 = Yarn((b1, b2, b3))

        self.assertEqual(
            y1.relabel(lambda x: f'--{x}--').loc['--4--'].shape, (4, 4))

        # None is a no-op
        self.assertEqual(y1.relabel(None).loc[4].shape, (4, 4))

        with self.assertRaises(RelabelInvalid):
            y1.relabel({3, 4, 5})

        self.assertEqual(
            y1.relabel(tuple('abcdef'))['d':].status['shape'].to_pairs(),
            (('d', (2, 8)), ('e', (4, 4)), ('f', (6, 4))))

        y2 = Yarn((b1, b2, b3), index=tuple('abcdef'))
        self.assertEqual(y2.index.values.tolist(),
                         ['a', 'b', 'c', 'd', 'e', 'f'])
        self.assertEqual(
            y2.relabel(IndexAutoFactory).index.values.tolist(),
            [0, 1, 2, 3, 4, 5])
Ejemplo n.º 8
0
    def test_bus_nbytes_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(a=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        b1 = Bus.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp)

            f2_loaded = b2['f2']

            self.assertEqual(b2.nbytes, 48)

            f3_loaded = b2['f3']

            self.assertEqual(b2.nbytes, 80)

            f1_loaded = b2['f1']

            self.assertEqual(b2.nbytes, 112)
Ejemplo n.º 9
0
    def test_bus_init_a(self) -> None:

        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='bar')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))
        b1 = Bus.from_frames((f1, f2), config=config)

        self.assertEqual(b1.keys().values.tolist(), ['foo', 'bar'])

        with temp_file('.zip') as fp:
            b1.to_zip_tsv(fp)
            b2 = Bus.from_zip_tsv(fp)

            f3 = b2['bar']
            f4 = b2['foo']
            # import ipdb; ipdb.set_trace()
            zs = StoreZipTSV(fp)
            zs.write(b1.items())

            # how to show that this derived getitem has derived type?
            f3 = zs.read('foo', config=config['foo'])
            self.assertEqual(f3.to_pairs(0), (('a', (('x', 1), ('y', 2))),
                                              ('b', (('x', 3), ('y', 4)))))
Ejemplo n.º 10
0
    def test_bus_init_c(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='foo')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='bar')

        config = StoreConfigMap.from_config(StoreConfig(index_depth=1))
        b1 = Bus.from_frames((f1, f2), config=config)

        self.assertEqual(b1.keys().values.tolist(),
                ['foo', 'bar'])

        with temp_file('.zip') as fp:
            b1.to_zip_csv(fp)
            b2 = Bus.from_zip_csv(fp, config=config)

            f1_loaded = b2['foo']
            f2_loaded = b2['bar']

            self.assertEqualFrames(f1, f1_loaded)
            self.assertEqualFrames(f2, f2_loaded)
Ejemplo n.º 11
0
    def test_bus_to_xlsx_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='f1')
        f2 = Frame.from_dict(dict(c=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='f3')

        config = StoreConfigMap.from_config(
            StoreConfig(index_depth=1,
                        columns_depth=1,
                        include_columns=True,
                        include_index=True))
        b1 = Bus.from_frames((f1, f2, f3), config=config)

        with temp_file('.xlsx') as fp:
            b1.to_xlsx(fp)

            b2 = Bus.from_xlsx(fp, config=config)
            tuple(b2.items())  # force loading all

        for frame in (f1, f2, f3):
            self.assertEqualFrames(frame, b2[frame.name])
Ejemplo n.º 12
0
    def test_yarn_items_b(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4, f5, f6))

        with temp_file('.zip') as fp1, temp_file('.zip') as fp2:
            b1.to_zip_pickle(fp1)
            b2.to_zip_pickle(fp2)

            bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a')
            bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b')

            y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False)

            s1 = y1.to_series()

            self.assertEqual([(label, f.shape) for label, f in s1.items()],
                             [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)),
                              ('f4', (2, 8)), ('f5', (4, 4)), ('f6', (6, 4))])
Ejemplo n.º 13
0
    def test_bus_to_parquet_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )
        b1 = Bus.from_frames((f1, f2, f3), config=config)

        with temp_file('.zip') as fp:
            b1.to_zip_parquet(fp)

            b2 = Bus.from_zip_parquet(fp, config=config)
            tuple(b2.items()) # force loading all

        for frame in (f1, f2, f3):
            # parquet brings in characters as objects, thus forcing different dtypes
            self.assertEqualFrames(frame, b2[frame.name], compare_dtype=False)
Ejemplo n.º 14
0
    def test_bus_keys_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')
        f4 = Frame.from_dict(
                dict(q=(None,None), r=(np.nan,np.nan)),
                index=(1000, 1001),
                name='f4')

        b1 = Bus.from_frames((f1, f2, f3, f4))

        self.assertEqual(b1.keys().values.tolist(), ['f1', 'f2', 'f3', 'f4'])
        self.assertEqual(b1.values[2].name, 'f3')

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp)
            self.assertFalse(b2._loaded_all)

            self.assertEqual(b2.keys().values.tolist(), ['f1', 'f2', 'f3', 'f4'])
            self.assertFalse(b2._loaded.any())
            # accessing values forces loading all
            self.assertEqual(b2.values[2].name, 'f3')
            self.assertTrue(b2._loaded_all)
Ejemplo n.º 15
0
    def test_bus_shapes_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(a=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        b1 = Bus.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:

            b1.to_zip_pickle(fp)

            b2 = Bus.from_zip_pickle(fp)

            f2_loaded = b2['f2']

            self.assertEqual(b2.shapes.to_pairs(),
                    (('f1', None), ('f2', (3, 2)), ('f3', None)))

            f3_loaded = b2['f3']

            self.assertEqual(b2.shapes.to_pairs(),
                    (('f1', None), ('f2', (3, 2)), ('f3', (2, 2 )))
                    )
Ejemplo n.º 16
0
    def test_bus_max_persist_3(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(4):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=4)

            _ = b2.iloc[[0, 1]]
            _ = b2.iloc[[2, 3]]
            self.assertTrue(b2._loaded_all)

            _ = b2.iloc[[1, 0]]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['2', '3', '1', '0'])

            _ = b2.iloc[3]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['2', '1', '0', '3'])

            _ = b2.iloc[:3]
            self.assertEqual(list(b2._last_accessed.keys()),
                    ['3', '0', '1', '2'])
Ejemplo n.º 17
0
    def test_bus_to_xlsx_c(self) -> None:
        '''
        Test manipulating a file behind the Bus.
        '''
        f1 = Frame.from_dict(
                dict(a=(1,2,3)),
                index=('x', 'y', 'z'),
                name='f1')

        f2 = Frame.from_dict(
                dict(x=(10,20,30)),
                index=('q', 'r', 's'),
                name='f2')

        b1 = Bus.from_frames((f1,),)

        with temp_file('.xlsx') as fp:

            b1.to_xlsx(fp)

            b2 = Bus.from_xlsx(fp)

            f2.to_xlsx(fp)

            with self.assertRaises(StoreFileMutation):
                tuple(b2.items())
Ejemplo n.º 18
0
    def test_yarn_drop_b(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3), name='b1')
        b2 = Bus.from_frames((f4, ), name='b2')
        b3 = Bus.from_frames((f5, f6), name='b3')

        y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False)

        y2 = y1.drop.iloc[2:5]
        self.assertEqual(len(y2._series), 2)  # 2 buses remain
        self.assertEqual([(f.name, f.shape) for f in y2.values],
                         [('f1', (4, 2)), ('f2', (4, 5)), ('f6', (6, 4))])

        y3 = y1.drop.iloc[np.array([True, False, False, False, False, True])]
        self.assertEqual([(f.name, f.shape) for f in y3.values],
                         [('f2', (4, 5)), ('f3', (2, 2)), ('f4', (2, 8)),
                          ('f5', (4, 4))])

        y4 = y1.drop.iloc[3]
        self.assertEqual([(f.name, f.shape) for f in y4.values],
                         [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)),
                          ('f5', (4, 4)), ('f6', (6, 4))])

        y5 = y1.drop.iloc[[3, 4, 5]]
        self.assertEqual([(f.name, f.shape) for f in y5.values],
                         [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2))])
Ejemplo n.º 19
0
    def test_yarn_unpersist_a(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4, f5, f6))

        with temp_file('.zip') as fp1, temp_file('.zip') as fp2:
            b1.to_zip_pickle(fp1)
            b2.to_zip_pickle(fp2)

            bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a')
            bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b')

            y1 = Yarn.from_buses((bus_a, bus_b), retain_labels=False)
            self.assertEqual(len(tuple(y1.items())), 6)

            self.assertEqual(y1.status['loaded'].sum(), 2)
            y1.unpersist()

            self.assertEqual(y1.status['loaded'].sum(), 0)
            self.assertEqual(len(tuple(y1.items())), 6)
            self.assertEqual(y1.status['loaded'].sum(), 2)
Ejemplo n.º 20
0
    def test_yarn_drop_a(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3), name='b1')
        b2 = Bus.from_frames((f4, ), name='b2')
        b3 = Bus.from_frames((f5, f6), name='b3')

        y1 = Yarn.from_buses((b1, b2, b3), retain_labels=False)

        y2 = y1.drop['f3':'f5']  #type: ignore
        self.assertEqual(len(y2._series), 2)  # 2 buses remain
        self.assertEqual([(f.name, f.shape) for f in y2.values],
                         [('f1', (4, 2)), ('f2', (4, 5)), ('f6', (6, 4))])

        y3 = y1.drop[y1.index.isin(('f1', 'f6'))]
        self.assertEqual([(f.name, f.shape) for f in y3.values],
                         [('f2', (4, 5)), ('f3', (2, 2)), ('f4', (2, 8)),
                          ('f5', (4, 4))])

        y4 = y1.drop['f4']
        self.assertEqual([(f.name, f.shape) for f in y4.values],
                         [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2)),
                          ('f5', (4, 4)), ('f6', (6, 4))])

        y5 = y1.drop[['f4', 'f5', 'f6']]
        self.assertEqual([(f.name, f.shape) for f in y5.values],
                         [('f1', (4, 2)), ('f2', (4, 5)), ('f3', (2, 2))])
Ejemplo n.º 21
0
    def test_yarn_from_buses_a(self) -> None:

        f1 = ff.parse('s(4,4)|v(int,float)').rename('f1')
        f2 = ff.parse('s(4,4)|v(str)').rename('f2')
        f3 = ff.parse('s(4,4)|v(bool)').rename('f3')
        b1 = Bus.from_frames((f1, f2, f3), name='a')

        f4 = ff.parse('s(4,4)|v(int,float)').rename('f4')
        f5 = ff.parse('s(4,4)|v(str)').rename('f5')
        b2 = Bus.from_frames((f4, f5), name='b')

        y1 = Yarn.from_buses((b1, b2), retain_labels=True)
        self.assertEqual(len(y1), 5)
        self.assertEqual(y1.index.shape, (5, 2))
        self.assertEqual(y1.shape, (5, ))
        self.assertEqual(y1.size, 5)
        self.assertEqual(y1.dtype, object)
        self.assertEqual(y1.ndim, 1)

        y3 = y1[('a', 'f2'):]  #type: ignore
        self.assertEqual(y3.shape, (4, ))

        y2 = Yarn.from_buses((b1, b2), retain_labels=False)
        self.assertEqual(len(y2), 5)
        self.assertEqual(y2.index.shape, (5, ))
        self.assertEqual(y1.shape, (5, ))
        self.assertEqual(y1.size, 5)
        self.assertEqual(y1.dtype, object)
        self.assertEqual(y1.ndim, 1)
Ejemplo n.º 22
0
    def test_yarn_equals_e(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3), name='a')
        b2 = Bus.from_frames((f4, f5, f6), name='b')

        y1 = Yarn.from_buses((b1, b2), retain_labels=True)

        with temp_file('.zip') as fp1, temp_file('.zip') as fp2:
            b1.to_zip_pickle(fp1)
            b2.to_zip_pickle(fp2)

            bus_a = Bus.from_zip_pickle(fp1, max_persist=1).rename('a')
            bus_b = Bus.from_zip_pickle(fp2, max_persist=1).rename('b')

            y2 = Yarn.from_buses((bus_a, bus_b), retain_labels=True)
            self.assertEqual(y2.status['loaded'].sum(), 0)

            self.assertTrue(y1.equals(y2))
            self.assertEqual(y2.status['loaded'].sum(), 2)
Ejemplo n.º 23
0
    def test_bus_status_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        b1 = Bus.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp)

            status = b2.status
            self.assertEqual(status.shape, (3, 4))
            # force load all
            tuple(b2.items())

            self.assertEqual(
                    b2.status.to_pairs(0),                                                           (('loaded', (('f1', True), ('f2', True), ('f3', True))), ('size', (('f1', 4.0), ('f2', 6.0), ('f3', 4.0))), ('nbytes', (('f1', 32.0), ('f2', 48.0), ('f3', 32.0))),('shape', (('f1', (2, 2)), ('f2', (3, 2)), ('f3', (2, 2)))))
            )
Ejemplo n.º 24
0
    def test_bus_to_xlsx_f(self) -> None:
        f = Frame.from_records([
                [np.datetime64('1983-02-20 05:34:18.763'), np.datetime64('2020-08-01')],
                [np.datetime64('1975-03-20 05:20:18.001'), np.datetime64('2020-07-31')]
                ],
                columns=(date(2020, 7, 31), date(2020, 8, 1)),
                index=(datetime(2020, 7, 31, 14, 20, 8), datetime(2017, 4, 28, 2, 30, 2)),
                name='frame')
        b1 = Bus.from_frames([f])

        with temp_file('.xlsx') as fp:
            b1.to_xlsx(fp)

            config = StoreConfig(include_index=True, index_depth=1)
            b2 = Bus.from_xlsx(fp, config=config)
            tuple(b2.items()) # force loading all

        self.assertEqual(b2['frame'].index.values.tolist(),
                [datetime(2020, 7, 31, 14, 20, 8),
                datetime(2017, 4, 28, 2, 30, 2)])

        self.assertEqual(b2['frame'].index.values.tolist(),
                [datetime(2020, 7, 31, 14, 20, 8),
                datetime(2017, 4, 28, 2, 30, 2)])

        self.assertEqual(b2['frame'].values.tolist(),
                [[datetime(1983, 2, 20, 5, 34, 18, 763000), datetime(2020, 8, 1, 0, 0)], [datetime(1975, 3, 20, 5, 20, 18, 1000), datetime(2020, 7, 31, 0, 0)]]
)
Ejemplo n.º 25
0
    def test_bus_init_a(self) -> None:

        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='foo')
        f2 = Frame.from_dict(
                dict(a=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='bar')

        b1 = Bus.from_frames((f1, f2))

        self.assertEqual(b1.keys().values.tolist(),
                ['foo', 'bar'])


        with temp_file('.zip') as fp:
            b1.to_zip_tsv(fp)
            b2 = Bus.from_zip_tsv(fp)

            f3 = b2['bar']
            f4 = b2['foo']
            # import ipdb; ipdb.set_trace()
            zs = StoreZipTSV(fp)
            zs.write(b1.items())

            f3 = zs.read('foo')
            self.assertEqual(
                f3.to_pairs(0),
                (('a', (('x', 1), ('y', 2))), ('b', (('x', 3), ('y', 4))))
            )
Ejemplo n.º 26
0
    def test_bus_to_hdf5_a(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        frames = (f1, f2, f3)
        config = StoreConfigMap.from_frames(frames)
        b1 = Bus.from_frames(frames, config=config)

        with temp_file('.h5') as fp:
            b1.to_hdf5(fp)
            b2 = Bus.from_hdf5(fp, config=config)
            tuple(b2.items()) # force loading all

        for frame in frames:
            self.assertEqualFrames(frame, b2[frame.name])
Ejemplo n.º 27
0
    def test_bus_init_b(self) -> None:

        with self.assertRaises(ErrorInitBus):
            Bus(Series([1, 2, 3]))

        with self.assertRaises(ErrorInitBus):
            Bus(Series([3, 4], dtype=object))
Ejemplo n.º 28
0
    def test_bus_mloc_c(self) -> None:
        f1 = Frame.from_dict(
                dict(a=(1,2), b=(3,4)),
                index=('x', 'y'),
                name='f1')
        f2 = Frame.from_dict(
                dict(c=(1,2,3), b=(4,5,6)),
                index=('x', 'y', 'z'),
                name='f2')
        f3 = Frame.from_dict(
                dict(d=(10,20), b=(50,60)),
                index=('p', 'q'),
                name='f3')

        b1 = Bus.from_frames((f1, f2, f3))

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)
            b2 = Bus.from_zip_pickle(fp)

            f2_loaded = b2['f2']

            mloc1 = b2.mloc

            f3_loaded = b2['f3']
            f1_loaded = b2['f1']

            self.assertEqual(mloc1['f2'], b2.mloc.loc['f2'])
Ejemplo n.º 29
0
    def test_bus_max_persist_b(self) -> None:
        def items() -> tp.Iterator[tp.Tuple[str, Frame]]:
            for i in range(20):
                yield str(i), Frame(np.arange(i, i+10).reshape(2, 5))

        s = Series.from_items(items(), dtype=object)
        b1 = Bus(s)

        config = StoreConfig(
                index_depth=1,
                columns_depth=1,
                include_columns=True,
                include_index=True
                )

        with temp_file('.zip') as fp:
            b1.to_zip_pickle(fp)

            b2 = Bus.from_zip_pickle(fp, config=config, max_persist=1)
            b3 = b2.iloc[10:]
            self.assertEqual(b3._loaded.sum(), 1)
            # only the last one is loasded
            self.assertEqual(b3._loaded.tolist(),
                    [False, False, False, False, False, False, False, False, False, True]
                    )
            self.assertEqual(b3.iloc[0].sum().sum(), 145)
            self.assertEqual(b3._loaded.tolist(),
                    [True, False, False, False, False, False, False, False, False, False]
                    )
            self.assertEqual(b3.iloc[4].sum().sum(), 185)
            self.assertEqual(b3._loaded.tolist(),
                    [False, False, False, False, True, False, False, False, False, False]
                    )
Ejemplo n.º 30
0
    def test_yarn_dtypes_a(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')

        b1 = Bus.from_frames((f1, f2), name='a')
        b2 = Bus.from_frames((f3, f4), name='b')

        y1 = Yarn.from_buses((b1, b2), retain_labels=False)
        self.assertEqual(y1.dtypes.shape, (4, 8))