コード例 #1
0
    def test_store_sqlite_read_many_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)
        config_map_write = StoreConfigMap.from_config(
            StoreConfig(include_index=True, include_columns=True))

        with temp_file('.sqlite') as fp:

            st1 = StoreSQLite(fp)
            st1.write(((f.name, f) for f in frames), config=config_map_write)

            labels = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), labels)

            config_map_read: tp.Dict[tp.Hashable, StoreConfig] = {}
            for i, name in enumerate(labels):
                f_src = frames[i]
                c = StoreConfig(index_depth=f_src.index.depth,
                                columns_depth=f_src.columns.depth)
                config_map_read[name] = c

            for i, f_loaded in enumerate(
                    st1.read_many(labels, config=config_map_read)):
                f_src = frames[i]
                self.assertEqualFrames(f_src, f_loaded, compare_dtype=False)
コード例 #2
0
    def test_store_hdf5_write_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(1, 2, -5, 200), y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10, 20, 50, 60), (50.0, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)

        with temp_file('.hdf5') as fp:

            st1 = StoreHDF5(fp)
            st1.write((f.name, f) for f in frames)

            sheet_names = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), sheet_names)

            for i, name in enumerate(sheet_names):
                f_src = frames[i]
                f_loaded = st1.read(name,
                                    index_depth=f_src.index.depth,
                                    columns_depth=f_src.columns.depth)
                # print(f_loaded)

                #     self.assertEqualFrames(f_src, f_loaded)
                # import ipdb; ipdb.set_trace()
                pass
コード例 #3
0
    def test_bus_to_hierarchy_b(self) -> None:

        class CustomError(Exception):
            pass

        tree1 = dict(a_I=Index((1,2,3)), a_II=Index((1,2,3)))
        tree2 = dict(b_I=Index((1,2,3)), b_II=Index((1,2,3)))
        tree3 = dict(c_I=Index((1,2,3)), c_II=Index((1,2,3)))
        index1 = IndexHierarchy.from_tree(tree1)
        index2 = IndexHierarchy.from_tree(tree2)
        index3 = IndexHierarchy.from_tree(tree3)
        values = np.arange(36).reshape(6,6)

        # Align all the frames on columns!
        f1 = Frame(values, index=index1, columns=index1, name='f1')
        f2 = Frame(values, index=index2, columns=index1, name='f2')
        f3 = Frame(values, index=index3, columns=index1, name='f3')
        b1 = Bus.from_frames((f1, f2, f3))

        def test_assertions(hierarchy: IndexHierarchy, opposite: Index) -> None:
            expected_tree = dict(f1=tree1, f2=tree2, f3=tree3)
            self.compare_trees(hierarchy.to_tree(), expected_tree)
            self.assertTrue(index1.equals(opposite))

        test_assertions(*bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=False, init_exception_cls=CustomError))
        test_assertions(*bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=True, init_exception_cls=CustomError))

        # Cannot do this since the frames do not share the same index
        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=False, init_exception_cls=CustomError)

        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=True, init_exception_cls=CustomError)

        # Align all the frames on index!
        f1 = Frame(values, index=index1, columns=index1, name='f1')
        f2 = Frame(values, index=index1, columns=index2, name='f2')
        f3 = Frame(values, index=index1, columns=index3, name='f3')
        b1 = Bus.from_frames((f1, f2, f3))

        test_assertions(*bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=False, init_exception_cls=CustomError))
        test_assertions(*bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=True, init_exception_cls=CustomError))

        # Cannot do this since the frames do not share the same columns
        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=False, init_exception_cls=CustomError)

        with self.assertRaises(CustomError):
            bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=True, init_exception_cls=CustomError)
コード例 #4
0
    def test_store_sqlite_write_a(self) -> None:

        f1 = Frame.from_dict(dict(x=(None, -np.inf, np.inf, None),
                                  y=(3, 4, -5, -3000)),
                             index=IndexHierarchy.from_product(('I', 'II'),
                                                               ('a', 'b')),
                             name='f1')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='f2')
        f3 = Frame.from_records(((10.4, 20.1, 50, 60), (50.1, 60.4, -50, -60)),
                                index=('p', 'q'),
                                columns=IndexHierarchy.from_product(
                                    ('I', 'II'), ('a', 'b')),
                                name='f3')
        f4 = Frame.from_records(
            (
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
                (10, 20, 50, False, 10, 20, 50, False),
                (50.0, 60.4, -50, True, 50.0, 60.4, -50, True),
                (234, 44452, 0, False, 234, 44452, 0, False),
                (4, -4, 2000, True, 4, -4, 2000, True),
            ),
            index=IndexHierarchy.from_product(
                ('top', 'bottom'), ('far', 'near'), ('left', 'right')),
            columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b'),
                                                (1, 2)),
            name='f4')

        frames = (f1, f2, f3, f4)

        with temp_file('.sqlite') as fp:

            st1 = StoreSQLite(fp)
            st1.write((f.name, f) for f in frames)

            sheet_names = tuple(
                st1.labels())  # this will read from file, not in memory
            self.assertEqual(tuple(f.name for f in frames), sheet_names)

            for i, name in enumerate(sheet_names):
                f_src = frames[i]
                config = StoreConfig.from_frame(f_src)
                f_loaded = st1.read(name, config=config)
                self.assertEqualFrames(f_src, f_loaded)
コード例 #5
0
    def test_store_xlsx_read_many_d(self) -> None:
        records = (
                (2, 2, 'a', False, None),
                (30, 73, 'd', True, None),
                (None, None, None, None, None),
                (None, None, None, None, None),
                )
        columns = IndexHierarchy.from_labels((
                ('a', 1), ('a', 2), ('b', 1), ('b', 2), (None, None)
                ))
        f1 = Frame.from_records(records, columns=columns)

        with temp_file('.xlsx') as fp:
            f1.to_xlsx(fp, label='f1', include_index=False, include_columns=True)

            st1 = StoreXLSX(fp)
            c = StoreConfig(
                    index_depth=0,
                    columns_depth=2,
                    trim_nadir=True,
                    )
            f2 = next(st1.read_many(('f1',), config=c))
            self.assertEqual(f2.shape, (2, 4))
            self.assertEqual(f2.to_pairs(),
                    ((('a', 1), ((0, 2), (1, 30))), (('a', 2), ((0, 2), (1, 73))), (('b', 1), ((0, 'a'), (1, 'd'))), (('b', 2), ((0, False), (1, True)))))
コード例 #6
0
    def test_store_xlsx_write_b(self) -> None:

        f1 = Frame.from_records(
                ((None, np.nan, 50, 'a'), (None, -np.inf, -50, 'b'), (None, 60.4, -50, 'c')),
                index=('p', 'q', 'r'),
                columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                )

        config_map = StoreConfigMap.from_config(
                StoreConfig(include_index=True, include_columns=True))

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((STORE_LABEL_DEFAULT, f1),), config=config_map)

            c = StoreConfig(
                    index_depth=f1.index.depth,
                    columns_depth=f1.columns.depth
                    )
            f2 = st.read(STORE_LABEL_DEFAULT, config=c)

            # just a sample column for now
            self.assertEqual(
                    f1[HLoc[('II', 'a')]].values.tolist(),
                    f2[HLoc[('II', 'a')]].values.tolist() )

            self.assertEqualFrames(f1, f2)
コード例 #7
0
def buses_to_hierarchy(
    buses: tp.Iterable[Bus],
    labels: tp.Iterable[tp.Hashable],
    deepcopy_from_bus: bool,
    init_exception_cls: tp.Type[Exception],
) -> IndexHierarchy:
    '''
    Given an iterable of named :obj:`Bus` derive a :obj:`Series` with an :obj:`IndexHierarchy`.
    '''
    # NOTE: for now, the Returned Series will have bus Names as values; this requires the Yarn to store a dict, not a list

    extractor = get_extractor(deepcopy_from_bus,
                              is_array=False,
                              memo_active=False)

    tree = {}
    for label, bus in zip(labels, buses):
        if not isinstance(bus, Bus):
            raise init_exception_cls(f'Must provide an interable of Bus.')
        if label in tree:
            raise init_exception_cls(
                f'Bus names must be unique: {label} duplicated')
        tree[label] = extractor(bus._index)

    return IndexHierarchy.from_tree(tree)
コード例 #8
0
    def test_store_sqlite_write_b(self) -> None:

        f1 = Frame.from_dict(
                dict(
                        x=(Fraction(3,2), Fraction(1,2), Fraction(2,3), Fraction(3,7)),
                        y=(3,4,-5,-3000)),
                index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                name='f1-dash')

        frames = (f1,)

        with temp_file('.sqlite') as fp:

            st1 = StoreSQLite(fp)
            st1.write((f.name, f) for f in frames)

            config = StoreConfig.from_frame(f1)

            f_loaded = st1.read(f1.name, config=config)

            # for now, Fractions come back as strings
            self.assertEqual(
                    f_loaded['x'].to_pairs(),
                    ((('I', 'a'), '3/2'), (('I', 'b'), '1/2'), (('II', 'a'), '2/3'), (('II', 'b'), '3/7'))
            )
コード例 #9
0
    def test_store_xlsx_read_c(self) -> None:
        index = IndexHierarchy.from_product(('left', 'right'), ('up', 'down'))
        columns = IndexHierarchy.from_labels(((100, -5, 20),))

        f1 = Frame([1, 2, 3, 4], index=index, columns=columns)

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((None, f1),), include_index=True, include_columns=False)
            f2 = st.read(index_depth=f1.index.depth, columns_depth=0)

        self.assertTrue((f1.values == f2.values).all())
        self.assertEqual(f2.to_pairs(0),
                ((0, ((('left', 'up'), 1), (('left', 'down'), 2), (('right', 'up'), 3), (('right', 'down'), 4))),)
                )
コード例 #10
0
    def test_store_xlsx_read_b(self) -> None:
        index = IndexHierarchy.from_product(('left', 'right'), ('up', 'down'))
        columns = IndexHierarchy.from_labels(((100, -5, 20), ))

        f1 = Frame.from_elements([1, 2, 3, 4], index=index, columns=columns)

        config_map = StoreConfigMap.from_config(
            StoreConfig(include_index=False, include_columns=True))

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((None, f1), ), config=config_map)

            c = StoreConfig(index_depth=0, columns_depth=f1.columns.depth)
            f2 = st.read(None, config=c)

        self.assertTrue((f1.values == f2.values).all())
        self.assertEqual(f2.to_pairs(0),
                         (((100, -5, 20), ((0, 1), (1, 2), (2, 3), (3, 4))), ))
コード例 #11
0
    def test_store_get_field_names_and_dtypes_d(self) -> None:

        from static_frame.core.index_hierarchy import IndexHierarchy
        columns = IndexHierarchy.from_labels(((1, 'a'), (1, 'b'), (2, 'c')),
                                             name=('foo', 'bar'))
        f1 = Frame.from_records((('a', True, None), ),
                                index=(('a', )),
                                columns=columns)

        field_names, dtypes = Store.get_field_names_and_dtypes(
            frame=f1,
            include_index=True,
            include_index_name=False,
            include_columns=True,
            include_columns_name=True,
        )
        self.assertEqual(field_names,
                         [('foo', 'bar'), "[1 'a']", "[1 'b']", "[2 'c']"])
        self.assertTrue(len(field_names) == len(dtypes))

        field_names, dtypes = Store.get_field_names_and_dtypes(
            frame=f1,
            include_index=True,
            include_index_name=False,
            include_columns=True,
            include_columns_name=True,
            force_brackets=True,
        )
        self.assertEqual(field_names,
                         ["['foo' 'bar']", "[1 'a']", "[1 'b']", "[2 'c']"])

        with self.assertRaises(StoreParameterConflict):
            field_names, dtypes = Store.get_field_names_and_dtypes(
                frame=f1,
                include_index=True,
                include_index_name=False,
                include_columns=True,
                include_columns_name=False,
            )

        with self.assertRaises(StoreParameterConflict):
            field_names, dtypes = Store.get_field_names_and_dtypes(
                frame=f1,
                include_index=False,
                include_index_name=False,
                include_columns=True,
                include_columns_name=True,
            )
コード例 #12
0
def bus_to_hierarchy(
    bus: tp.Union[Bus, 'Yarn'],
    axis: int,
    deepcopy_from_bus: bool,
    init_exception_cls: tp.Type[Exception],
) -> tp.Tuple[IndexHierarchy, IndexBase]:
    '''
    Given a :obj:`Bus` and an axis, derive a :obj:`IndexHierarchy`; also return and validate the :obj:`Index` of the opposite axis.
    '''
    # NOTE: need to extract just axis labels, not the full Frame; need new Store/Bus loaders just for label data
    extractor = get_extractor(deepcopy_from_bus,
                              is_array=False,
                              memo_active=False)

    def tree_extractor(index: IndexBase) -> tp.Union[IndexBase, TreeNodeT]:
        index = extractor(index)
        if isinstance(index, IndexHierarchy):
            return index.to_tree()
        return index

    tree: TreeNodeT = {}
    opposite: tp.Optional[IndexBase] = None

    for label, f in bus.items():
        if axis == 0:
            tree[label] = tree_extractor(f.index)
            if opposite is None:
                opposite = extractor(f.columns)
            else:
                if not opposite.equals(f.columns):
                    raise init_exception_cls(
                        'opposite axis must have equivalent indices')
        elif axis == 1:
            tree[label] = tree_extractor(f.columns)
            if opposite is None:
                opposite = extractor(f.index)
            else:
                if not opposite.equals(f.index):
                    raise init_exception_cls(
                        'opposite axis must have equivalent indices')
        else:
            raise AxisInvalid(f'invalid axis {axis}')

    # NOTE: we could try to collect index constructors by using the index of the Bus and observing the inidices of the contained Frames, but it is not clear that will be better then using IndexAutoConstructorFactory

    return IndexHierarchy.from_tree(
        tree, index_constructors=IndexAutoConstructorFactory
    ), opposite  # type: ignore
コード例 #13
0
    def from_pandas(
        cls,
        value: 'pandas.Index',
    ) -> 'IndexBase':
        '''
        Given a Pandas index, return the appropriate IndexBase derived class.
        '''
        import pandas
        if not isinstance(value, pandas.Index):
            raise ErrorInitIndex(
                f'from_pandas must be called with a Pandas Index object, not: {type(value)}'
            )

        from static_frame import Index
        from static_frame import IndexGO
        from static_frame import IndexHierarchy
        from static_frame import IndexHierarchyGO
        from static_frame import IndexNanosecond
        from static_frame import IndexNanosecondGO
        from static_frame.core.index_datetime import IndexDatetime

        if isinstance(value, pandas.MultiIndex):
            # iterating over a hierarchical index will iterate over labels
            name: tp.Optional[tp.Tuple[tp.Hashable, ...]] = tuple(value.names)
            # if not assigned Pandas returns None for all components, which will raise issue if trying to unset this index.
            if all(n is None for n in name):  #type: ignore
                name = None
            depth = value.nlevels

            if not cls.STATIC:
                return IndexHierarchyGO.from_labels(value,
                                                    name=name,
                                                    depth_reference=depth)
            return IndexHierarchy.from_labels(value,
                                              name=name,
                                              depth_reference=depth)
        elif isinstance(value, pandas.DatetimeIndex):
            # if IndexDatetime, use cls, else use IndexNanosecond
            if issubclass(cls, IndexDatetime):
                return cls(value, name=value.name)
            else:
                if not cls.STATIC:
                    return IndexNanosecondGO(value, name=value.name)
                return IndexNanosecond(value, name=value.name)

        if not cls.STATIC:
            return IndexGO(value, name=value.name)
        return Index(value, name=value.name)
コード例 #14
0
ファイル: test_bus.py プロジェクト: MadisonAster/static-frame
    def test_bus_extract_loc_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='bar')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='f3')

        ih = IndexHierarchy.from_labels((('a', 1), ('b', 2), ('b', 1)))
        s1 = Series((f1, f2, f3), index=ih, dtype=object)

        # do not support IndexHierarchy, as lables are tuples, not strings
        with self.assertRaises(ErrorInitBus):
            b1 = Bus(s1)
コード例 #15
0
    def test_yarn_rehierarch_a(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4, ))
        b3 = Bus.from_frames((f5, f6))

        y1 = Yarn((b1, b2, b3),
                  index=IndexHierarchy.from_product(('a', 'b'), (1, 2, 3)))
        self.assertEqual(
            y1.iloc[[0, 2, 4]].rehierarch((1, 0)).status['shape'].to_pairs(),
            (((1, 'a'), (4, 2)), ((3, 'a'), (2, 2)), ((2, 'b'), (4, 4))))
コード例 #16
0
    def test_yarn_relabel_flat_a(self) -> None:
        f1 = ff.parse('s(4,2)').rename('f1')
        f2 = ff.parse('s(4,5)').rename('f2')
        f3 = ff.parse('s(2,2)').rename('f3')
        f4 = ff.parse('s(2,8)').rename('f4')
        f5 = ff.parse('s(4,4)').rename('f5')
        f6 = ff.parse('s(6,4)').rename('f6')

        b1 = Bus.from_frames((f1, f2, f3))
        b2 = Bus.from_frames((f4,))
        b3 = Bus.from_frames((f5, f6))

        y1 = Yarn((b1, b2, b3), index=IndexHierarchy.from_product(('a', 'b'), (1, 2, 3)))

        self.assertEqual(
                y1.relabel_flat()[('a', 3):].status['shape'].to_pairs(),
                ((('a', 3), (2, 2)), (('b', 1), (2, 8)), (('b', 2), (4, 4)), (('b', 3), (6, 4)))
                )
コード例 #17
0
ファイル: series.py プロジェクト: CrepeGoat/FEHnt
    def from_concat(cls,
                    containers: tp.Iterable['Series'],
                    *,
                    name: tp.Hashable = None):
        '''
        Concatenate multiple Series into a new Series, assuming the combination of all Indices result in a unique Index.
        '''
        array_values = []
        array_index = []
        for c in containers:
            array_values.append(c.values)
            array_index.append(c.index.values)

        # returns immutable arrays
        values = concat_resolved(array_values)
        index = concat_resolved(array_index)

        if index.ndim == 2:
            index = IndexHierarchy.from_labels(index)

        return cls(values, index=index, name=name)
コード例 #18
0
    def test_store_xlsx_write_b(self) -> None:

        f1 = Frame.from_records(
                ((None, np.nan, 50, 'a'), (None, -np.inf, -50, 'b'), (None, 60.4, -50, 'c')),
                index=('p', 'q', 'r'),
                columns=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                )

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((None, f1),))

            f2 = st.read(index_depth=f1.index.depth, columns_depth=f1.columns.depth)

            # just a sample column for now
            self.assertEqual(
                    f1[HLoc[('II', 'a')]].values.tolist(),
                    f2[HLoc[('II', 'a')]].values.tolist() )

            self.assertEqualFrames(f1, f2)
コード例 #19
0
    def test_store_sqlite_write_c(self) -> None:

        f1 = Frame.from_dict(
                dict(
                        x=np.array([1.2, 4.5, 3.2, 6.5], dtype=np.float16),
                        y=(3,4,-5,-3000)),
                index=IndexHierarchy.from_product(('I', 'II'), ('a', 'b')),
                name='f1')

        frames = (f1,)

        with temp_file('.sqlite') as fp:
            st1 = StoreSQLite(fp)
            st1.write((f.name, f) for f in frames)

            config = StoreConfig.from_frame(f1)

            f_loaded = st1.read(f1.name, config=config)

            self.assertAlmostEqualItems(f_loaded['x'].to_pairs(),
                    ((('I', 'a'), 1.2001953125), (('I', 'b'), 4.5), (('II', 'a'), 3.19921875), (('II', 'b'), 6.5))
                    )
コード例 #20
0
 def test_assertions(hierarchy: IndexHierarchy,
                     opposite: Index) -> None:
     expected_tree = dict(f1=tree1, f2=tree2, f3=tree3)
     self.compare_trees(hierarchy.to_tree(), expected_tree)
     self.assertTrue(index1.equals(opposite))
コード例 #21
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        index_depth: int = 1,
        columns_depth: int = 1,
        dtypes: DtypesSpecifier = None,
        store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT
    ) -> Frame:
        '''
        Args:
            {dtypes}
        '''
        wb = self._load_workbook(self._fp)

        if label is None:
            ws = wb[wb.sheetnames[0]]
            name = None  # do not set to default sheet name
        else:
            ws = wb[label]
            name = ws.title

        if ws.max_column <= 1 or ws.max_row <= 1:
            # https://openpyxl.readthedocs.io/en/stable/optimized.html
            # says that some clients might not repare correct dimensions; not sure what conditions are best to show this
            ws.calculate_dimension()

        max_column = ws.max_column
        max_row = ws.max_row

        index_values: tp.List[tp.Any] = []
        columns_values: tp.List[tp.Any] = []

        # print()
        # for row in ws.iter_rows():
        #     print(tuple(str(c.value).ljust(10) for c in row))

        data = []

        for row_count, row in enumerate(
                ws.iter_rows()):  # cannot use values_only on 2.5.4
            if store_filter is None:
                row = tuple(c.value for c in row)
            else:  # only need to filter string values, but probably too expensive to pre-check
                row = tuple(
                    store_filter.to_type_filter_element(c.value) for c in row)

            if row_count <= columns_depth - 1:
                if columns_depth == 1:
                    columns_values.extend(row[index_depth:])
                elif columns_depth > 1:
                    # NOTE: this orientation will need to be rotated
                    columns_values.append(row[index_depth:])
                continue

            if index_depth == 0:
                data.append(row)
            elif index_depth == 1:
                index_values.append(row[0])
                data.append(row[1:])
            else:
                index_values.append(row[:index_depth])
                data.append(row[index_depth:])

        wb.close()

        index: tp.Optional[IndexBase] = None
        own_index = False
        if index_depth == 1:
            index = Index(index_values)
            own_index = True
        elif index_depth > 1:
            index = IndexHierarchy.from_labels(index_values,
                                               continuation_token=None)
            own_index = True

        columns: tp.Optional[IndexBase] = None
        own_columns = False
        if columns_depth == 1:
            columns = Index(columns_values)
            own_columns = True
        elif columns_depth > 1:
            columns = IndexHierarchy.from_labels(zip(*columns_values),
                                                 continuation_token=None)
            own_columns = True

        return tp.cast(
            Frame,
            Frame.from_records(data,
                               index=index,
                               columns=columns,
                               dtypes=dtypes,
                               own_index=own_index,
                               own_columns=own_columns,
                               name=name))
コード例 #22
0
    def from_frame(cls,
            frame: Frame,
            *,
            chunksize: int,
            retain_labels: bool,
            axis: int = 0,
            name: NameType = None,
            label_extractor: tp.Optional[tp.Callable[[IndexBase], tp.Hashable]] = None,
            config: StoreConfigMapInitializer = None,
            deepcopy_from_bus: bool = False,
            ) -> 'Quilt':
        '''
        Given a :obj:`Frame`, create a :obj:`Quilt` by partitioning it along the specified ``axis`` in units of ``chunksize``, where ``axis`` 0 partitions vertically (retaining aligned columns) and 1 partions horizontally (retaining aligned index).

        Args:
            label_extractor: Function that, given the partitioned index component along the specified axis, returns a string label for that chunk.
        '''
        vector = frame._index if axis == 0 else frame._columns
        vector_len = len(vector)

        starts = range(0, vector_len, chunksize)
        if len(starts) == 1:
            ends: tp.Iterable[int] = (vector_len,)
        else:
            ends = range(starts[1], vector_len, chunksize)

        if label_extractor is None:
            label_extractor = lambda x: x.iloc[0] #type: ignore

        axis_map_components: tp.Dict[tp.Hashable, IndexBase] = {}
        opposite = None

        def values() -> tp.Iterator[Frame]:
            nonlocal opposite

            for start, end in zip_longest(starts, ends, fillvalue=vector_len):
                if axis == 0: # along rows
                    f = frame.iloc[start:end]
                    label = label_extractor(f.index) #type: ignore
                    axis_map_components[label] = f.index
                    if opposite is None:
                        opposite = f.columns
                elif axis == 1: # along columns
                    f = frame.iloc[:, start:end]
                    label = label_extractor(f.columns) #type: ignore
                    axis_map_components[label] = f.columns
                    if opposite is None:
                        opposite = f.index
                else:
                    raise AxisInvalid(f'invalid axis {axis}')
                yield f.rename(label)

        name = name if name else frame.name
        bus = Bus.from_frames(values(), config=config, name=name)

        axis_hierarchy = IndexHierarchy.from_tree(axis_map_components)

        return cls(bus,
                axis=axis,
                axis_hierarchy=axis_hierarchy,
                axis_opposite=opposite,
                retain_labels=retain_labels,
                deepcopy_from_bus=deepcopy_from_bus,
                )
コード例 #23
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        config: tp.Optional[StoreConfig] = None,
        store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT,
        container_type: tp.Type[Frame] = Frame,
    ) -> Frame:
        '''
        Args:
            label: Name of sheet to read from XLSX.
            container_type: Type of container to be returned, either Frame or a Frame subclass

        '''
        if config is None:
            config = StoreConfig()  # get default

        index_depth = config.index_depth
        index_name_depth_level = config.index_name_depth_level
        columns_depth = config.columns_depth
        columns_name_depth_level = config.columns_name_depth_level
        trim_nadir = config.trim_nadir

        skip_header = config.skip_header
        skip_footer = config.skip_footer

        wb = self._load_workbook(self._fp)

        if label is None:
            ws = wb[wb.sheetnames[0]]
            name = None  # do not set to default sheet name
        else:
            ws = wb[label]
            name = ws.title

        if ws.max_column <= 1 or ws.max_row <= 1:
            # https://openpyxl.readthedocs.io/en/stable/optimized.html
            # says that some clients might not report correct dimensions
            ws.calculate_dimension()

        max_column = ws.max_column
        max_row = ws.max_row

        # adjust for downward shift for skipping header, then reduce for footer; at this value and beyond we stop
        last_row_count = max_row - skip_header - skip_footer

        index_values: tp.List[tp.Any] = []
        columns_values: tp.List[tp.Any] = []

        data = []  # pre-size with None?
        apex_rows = []

        if trim_nadir:
            mask = np.full((last_row_count, max_column), False)

        for row_count, row in enumerate(ws.iter_rows(max_row=max_row),
                                        start=-skip_header):
            if row_count < 0:
                continue  # due to skip header; perserves comparison to columns_depth
            if row_count >= last_row_count:
                break

            if trim_nadir:
                row_data: tp.Sequence[tp.Any] = []
                for col_count, c in enumerate(row):
                    if store_filter is None:
                        value = c.value
                    else:
                        value = store_filter.to_type_filter_element(c.value)
                    if value is None:  # NOTE: only checking None, not np.nan
                        mask[row_count, col_count] = True
                    row_data.append(value)  # type: ignore
                if not row_data:
                    mask[row_count] = True
            else:
                if store_filter is None:
                    row_data = tuple(c.value for c in row)
                else:  # only need to filter string values, but probably too expensive to pre-check
                    row_data = tuple(
                        store_filter.to_type_filter_element(c.value)
                        for c in row)

            if row_count <= columns_depth - 1:
                apex_rows.append(row_data[:index_depth])
                if columns_depth == 1:
                    columns_values.extend(row_data[index_depth:])
                elif columns_depth > 1:
                    columns_values.append(row_data[index_depth:])
                continue

            if index_depth == 0:
                data.append(row_data)
            elif index_depth == 1:
                index_values.append(row_data[0])
                data.append(row_data[1:])
            else:
                index_values.append(row_data[:index_depth])
                data.append(row_data[index_depth:])

        wb.close()

        #-----------------------------------------------------------------------
        # Trim all-empty trailing rows created from style formatting GH#146. As the wb is opened in read-only mode, reverse iterating on the wb is not an option, nor is direct row access by integer

        if trim_nadir:
            # NOTE: `mask` is all data, while `data` is post index/columns extraction; this means that if a non-None label is found, the row/column will not be trimmed.
            row_mask = mask.all(axis=1)
            row_trim_start = array1d_to_last_contiguous_to_edge(
                row_mask) - columns_depth
            if row_trim_start < len(row_mask) - columns_depth:
                data = data[:row_trim_start]
                if index_depth > 0:  # this handles depth 1 and greater
                    index_values = index_values[:row_trim_start]

            col_mask = mask.all(axis=0)
            col_trim_start = array1d_to_last_contiguous_to_edge(
                col_mask) - index_depth
            if col_trim_start < len(col_mask) - index_depth:
                data = (r[:col_trim_start] for r in data)  #type: ignore
                if columns_depth == 1:
                    columns_values = columns_values[:col_trim_start]
                if columns_depth > 1:
                    columns_values = (r[:col_trim_start]
                                      for r in columns_values)  #type: ignore

        #-----------------------------------------------------------------------
        # continue with Index and Frame creation
        index_name = None if columns_depth == 0 else apex_to_name(
            rows=apex_rows,
            depth_level=index_name_depth_level,
            axis=0,
            axis_depth=index_depth)

        index: tp.Optional[IndexBase] = None
        own_index = False
        if index_depth == 1:
            index = Index(index_values, name=index_name)
            own_index = True
        elif index_depth > 1:
            index = IndexHierarchy.from_labels(
                index_values,
                continuation_token=None,
                name=index_name,
            )
            own_index = True

        columns_name = None if index_depth == 0 else apex_to_name(
            rows=apex_rows,
            depth_level=columns_name_depth_level,
            axis=1,
            axis_depth=columns_depth)

        columns: tp.Optional[IndexBase] = None
        own_columns = False
        if columns_depth == 1:
            columns = container_type._COLUMNS_CONSTRUCTOR(columns_values,
                                                          name=columns_name)
            own_columns = True
        elif columns_depth > 1:
            columns = container_type._COLUMNS_HIERARCHY_CONSTRUCTOR.from_labels(
                zip(*columns_values),
                continuation_token=None,
                name=columns_name,
            )
            own_columns = True

        return container_type.from_records(
            data,  #type: ignore
            index=index,
            columns=columns,
            dtypes=config.dtypes,
            own_index=own_index,
            own_columns=own_columns,
            name=name,
            consolidate_blocks=config.consolidate_blocks)
コード例 #24
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        config: tp.Optional[StoreConfig] = None,
        store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT,
        container_type: tp.Type[Frame] = Frame,
    ) -> Frame:
        '''
        Args:
            label: Name of sheet to read from XLSX.
            container_type: Type of container to be returned, either Frame or a Frame subclass

        '''
        if config is None:
            config = StoreConfig()  # get default

        index_depth = config.index_depth
        columns_depth = config.columns_depth

        wb = self._load_workbook(self._fp)

        if label is None:
            ws = wb[wb.sheetnames[0]]
            name = None  # do not set to default sheet name
        else:
            ws = wb[label]
            name = ws.title

        if ws.max_column <= 1 or ws.max_row <= 1:
            # https://openpyxl.readthedocs.io/en/stable/optimized.html
            # says that some clients might not repare correct dimensions; not sure what conditions are best to show this
            ws.calculate_dimension()

        max_column = ws.max_column
        max_row = ws.max_row

        index_values: tp.List[tp.Any] = []
        columns_values: tp.List[tp.Any] = []

        data = []  # pre-size with None?

        for row_count, row in enumerate(ws.iter_rows(max_row=max_row)):
            if store_filter is None:
                row = tuple(c.value for c in row)
            else:  # only need to filter string values, but probably too expensive to pre-check
                row = tuple(
                    store_filter.to_type_filter_element(c.value) for c in row)

            if row_count <= columns_depth - 1:
                if columns_depth == 1:
                    columns_values.extend(row[index_depth:])
                elif columns_depth > 1:
                    # NOTE: this orientation will need to be rotated
                    columns_values.append(row[index_depth:])
                continue

            if index_depth == 0:
                data.append(row)
            elif index_depth == 1:
                index_values.append(row[0])
                data.append(row[1:])
            else:
                index_values.append(row[:index_depth])
                data.append(row[index_depth:])

        wb.close()

        # Trim all-empty trailing rows created from style formatting GH#146. As the wb is opened in read-only mode, reverse iterating on the wb is not an option, nor is direct row access by integer; alos, evaluating all rows on forward iteration is expensive. Instead, after collecting all the data in a list and closing the wb, reverse iterate and find rows that are all empty.
        # NOTE: need to handle case where there are valid index values

        empty_token = (None if store_filter is None else
                       store_filter.to_type_filter_element(None))

        for row_count in range(len(data) - 1, -2, -1):
            if row_count < 0:
                break
            if any(c != empty_token
                   for c in data[row_count]):  # try to break early with any
                break
            if index_depth == 1 and index_values[row_count] != empty_token:
                break
            if index_depth > 1 and any(c != empty_token
                                       for c in index_values[row_count]):
                break

        # row_count is set to the first row that has data or index; can be -1
        empty_row_idx = row_count + 1  # index of all-empty row
        if empty_row_idx != len(data):
            # trim data and index_values, if index_depth > 0
            data = data[:empty_row_idx]
            if index_depth > 0:
                index_values = index_values[:empty_row_idx]

        # continue with Index and Frame creation
        index: tp.Optional[IndexBase] = None
        own_index = False
        if index_depth == 1:
            index = Index(index_values)
            own_index = True
        elif index_depth > 1:
            index = IndexHierarchy.from_labels(index_values,
                                               continuation_token=None)
            own_index = True

        columns: tp.Optional[IndexBase] = None
        own_columns = False
        if columns_depth == 1:
            columns = container_type._COLUMNS_CONSTRUCTOR(columns_values)
            own_columns = True
        elif columns_depth > 1:
            columns = container_type._COLUMNS_HIERARCHY_CONSTRUCTOR.from_labels(
                zip(*columns_values), continuation_token=None)
            own_columns = True

        # NOTE: this might be a Frame or a FrameGO
        return tp.cast(
            Frame,
            container_type.from_records(
                data,
                index=index,
                columns=columns,
                dtypes=config.dtypes,
                own_index=own_index,
                own_columns=own_columns,
                name=name,
                consolidate_blocks=config.consolidate_blocks))