コード例 #1
0
    def test_store_xlsx_read_many_d(self) -> None:
        records = (
                (2, 2, 'a', False, None),
                (30, 73, 'd', True, None),
                (None, None, None, None, None),
                (None, None, None, None, None),
                )
        columns = IndexHierarchy.from_labels((
                ('a', 1), ('a', 2), ('b', 1), ('b', 2), (None, None)
                ))
        f1 = Frame.from_records(records, columns=columns)

        with temp_file('.xlsx') as fp:
            f1.to_xlsx(fp, label='f1', include_index=False, include_columns=True)

            st1 = StoreXLSX(fp)
            c = StoreConfig(
                    index_depth=0,
                    columns_depth=2,
                    trim_nadir=True,
                    )
            f2 = next(st1.read_many(('f1',), config=c))
            self.assertEqual(f2.shape, (2, 4))
            self.assertEqual(f2.to_pairs(),
                    ((('a', 1), ((0, 2), (1, 30))), (('a', 2), ((0, 2), (1, 73))), (('b', 1), ((0, 'a'), (1, 'd'))), (('b', 2), ((0, False), (1, True)))))
コード例 #2
0
    def from_pandas(
        cls,
        value: 'pandas.Index',
    ) -> 'IndexBase':
        '''
        Given a Pandas index, return the appropriate IndexBase derived class.
        '''
        import pandas
        if not isinstance(value, pandas.Index):
            raise ErrorInitIndex(
                f'from_pandas must be called with a Pandas Index object, not: {type(value)}'
            )

        from static_frame import Index
        from static_frame import IndexGO
        from static_frame import IndexHierarchy
        from static_frame import IndexHierarchyGO
        from static_frame import IndexNanosecond
        from static_frame import IndexNanosecondGO
        from static_frame.core.index_datetime import IndexDatetime

        if isinstance(value, pandas.MultiIndex):
            # iterating over a hierarchical index will iterate over labels
            name: tp.Optional[tp.Tuple[tp.Hashable, ...]] = tuple(value.names)
            # if not assigned Pandas returns None for all components, which will raise issue if trying to unset this index.
            if all(n is None for n in name):  #type: ignore
                name = None
            depth = value.nlevels

            if not cls.STATIC:
                return IndexHierarchyGO.from_labels(value,
                                                    name=name,
                                                    depth_reference=depth)
            return IndexHierarchy.from_labels(value,
                                              name=name,
                                              depth_reference=depth)
        elif isinstance(value, pandas.DatetimeIndex):
            # if IndexDatetime, use cls, else use IndexNanosecond
            if issubclass(cls, IndexDatetime):
                return cls(value, name=value.name)
            else:
                if not cls.STATIC:
                    return IndexNanosecondGO(value, name=value.name)
                return IndexNanosecond(value, name=value.name)

        if not cls.STATIC:
            return IndexGO(value, name=value.name)
        return Index(value, name=value.name)
コード例 #3
0
    def test_store_get_field_names_and_dtypes_d(self) -> None:

        from static_frame.core.index_hierarchy import IndexHierarchy
        columns = IndexHierarchy.from_labels(((1, 'a'), (1, 'b'), (2, 'c')),
                                             name=('foo', 'bar'))
        f1 = Frame.from_records((('a', True, None), ),
                                index=(('a', )),
                                columns=columns)

        field_names, dtypes = Store.get_field_names_and_dtypes(
            frame=f1,
            include_index=True,
            include_index_name=False,
            include_columns=True,
            include_columns_name=True,
        )
        self.assertEqual(field_names,
                         [('foo', 'bar'), "[1 'a']", "[1 'b']", "[2 'c']"])
        self.assertTrue(len(field_names) == len(dtypes))

        field_names, dtypes = Store.get_field_names_and_dtypes(
            frame=f1,
            include_index=True,
            include_index_name=False,
            include_columns=True,
            include_columns_name=True,
            force_brackets=True,
        )
        self.assertEqual(field_names,
                         ["['foo' 'bar']", "[1 'a']", "[1 'b']", "[2 'c']"])

        with self.assertRaises(StoreParameterConflict):
            field_names, dtypes = Store.get_field_names_and_dtypes(
                frame=f1,
                include_index=True,
                include_index_name=False,
                include_columns=True,
                include_columns_name=False,
            )

        with self.assertRaises(StoreParameterConflict):
            field_names, dtypes = Store.get_field_names_and_dtypes(
                frame=f1,
                include_index=False,
                include_index_name=False,
                include_columns=True,
                include_columns_name=True,
            )
コード例 #4
0
    def test_store_xlsx_read_c(self) -> None:
        index = IndexHierarchy.from_product(('left', 'right'), ('up', 'down'))
        columns = IndexHierarchy.from_labels(((100, -5, 20),))

        f1 = Frame([1, 2, 3, 4], index=index, columns=columns)

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((None, f1),), include_index=True, include_columns=False)
            f2 = st.read(index_depth=f1.index.depth, columns_depth=0)

        self.assertTrue((f1.values == f2.values).all())
        self.assertEqual(f2.to_pairs(0),
                ((0, ((('left', 'up'), 1), (('left', 'down'), 2), (('right', 'up'), 3), (('right', 'down'), 4))),)
                )
コード例 #5
0
ファイル: test_bus.py プロジェクト: MadisonAster/static-frame
    def test_bus_extract_loc_a(self) -> None:
        f1 = Frame.from_dict(dict(a=(1, 2), b=(3, 4)),
                             index=('x', 'y'),
                             name='foo')
        f2 = Frame.from_dict(dict(a=(1, 2, 3), b=(4, 5, 6)),
                             index=('x', 'y', 'z'),
                             name='bar')
        f3 = Frame.from_dict(dict(d=(10, 20), b=(50, 60)),
                             index=('p', 'q'),
                             name='f3')

        ih = IndexHierarchy.from_labels((('a', 1), ('b', 2), ('b', 1)))
        s1 = Series((f1, f2, f3), index=ih, dtype=object)

        # do not support IndexHierarchy, as lables are tuples, not strings
        with self.assertRaises(ErrorInitBus):
            b1 = Bus(s1)
コード例 #6
0
    def test_store_xlsx_read_b(self) -> None:
        index = IndexHierarchy.from_product(('left', 'right'), ('up', 'down'))
        columns = IndexHierarchy.from_labels(((100, -5, 20), ))

        f1 = Frame.from_elements([1, 2, 3, 4], index=index, columns=columns)

        config_map = StoreConfigMap.from_config(
            StoreConfig(include_index=False, include_columns=True))

        with temp_file('.xlsx') as fp:

            st = StoreXLSX(fp)
            st.write(((None, f1), ), config=config_map)

            c = StoreConfig(index_depth=0, columns_depth=f1.columns.depth)
            f2 = st.read(None, config=c)

        self.assertTrue((f1.values == f2.values).all())
        self.assertEqual(f2.to_pairs(0),
                         (((100, -5, 20), ((0, 1), (1, 2), (2, 3), (3, 4))), ))
コード例 #7
0
ファイル: series.py プロジェクト: CrepeGoat/FEHnt
    def from_concat(cls,
                    containers: tp.Iterable['Series'],
                    *,
                    name: tp.Hashable = None):
        '''
        Concatenate multiple Series into a new Series, assuming the combination of all Indices result in a unique Index.
        '''
        array_values = []
        array_index = []
        for c in containers:
            array_values.append(c.values)
            array_index.append(c.index.values)

        # returns immutable arrays
        values = concat_resolved(array_values)
        index = concat_resolved(array_index)

        if index.ndim == 2:
            index = IndexHierarchy.from_labels(index)

        return cls(values, index=index, name=name)
コード例 #8
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        config: tp.Optional[StoreConfig] = None,
        store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT,
        container_type: tp.Type[Frame] = Frame,
    ) -> Frame:
        '''
        Args:
            label: Name of sheet to read from XLSX.
            container_type: Type of container to be returned, either Frame or a Frame subclass

        '''
        if config is None:
            config = StoreConfig()  # get default

        index_depth = config.index_depth
        index_name_depth_level = config.index_name_depth_level
        columns_depth = config.columns_depth
        columns_name_depth_level = config.columns_name_depth_level
        trim_nadir = config.trim_nadir

        skip_header = config.skip_header
        skip_footer = config.skip_footer

        wb = self._load_workbook(self._fp)

        if label is None:
            ws = wb[wb.sheetnames[0]]
            name = None  # do not set to default sheet name
        else:
            ws = wb[label]
            name = ws.title

        if ws.max_column <= 1 or ws.max_row <= 1:
            # https://openpyxl.readthedocs.io/en/stable/optimized.html
            # says that some clients might not report correct dimensions
            ws.calculate_dimension()

        max_column = ws.max_column
        max_row = ws.max_row

        # adjust for downward shift for skipping header, then reduce for footer; at this value and beyond we stop
        last_row_count = max_row - skip_header - skip_footer

        index_values: tp.List[tp.Any] = []
        columns_values: tp.List[tp.Any] = []

        data = []  # pre-size with None?
        apex_rows = []

        if trim_nadir:
            mask = np.full((last_row_count, max_column), False)

        for row_count, row in enumerate(ws.iter_rows(max_row=max_row),
                                        start=-skip_header):
            if row_count < 0:
                continue  # due to skip header; perserves comparison to columns_depth
            if row_count >= last_row_count:
                break

            if trim_nadir:
                row_data: tp.Sequence[tp.Any] = []
                for col_count, c in enumerate(row):
                    if store_filter is None:
                        value = c.value
                    else:
                        value = store_filter.to_type_filter_element(c.value)
                    if value is None:  # NOTE: only checking None, not np.nan
                        mask[row_count, col_count] = True
                    row_data.append(value)  # type: ignore
                if not row_data:
                    mask[row_count] = True
            else:
                if store_filter is None:
                    row_data = tuple(c.value for c in row)
                else:  # only need to filter string values, but probably too expensive to pre-check
                    row_data = tuple(
                        store_filter.to_type_filter_element(c.value)
                        for c in row)

            if row_count <= columns_depth - 1:
                apex_rows.append(row_data[:index_depth])
                if columns_depth == 1:
                    columns_values.extend(row_data[index_depth:])
                elif columns_depth > 1:
                    columns_values.append(row_data[index_depth:])
                continue

            if index_depth == 0:
                data.append(row_data)
            elif index_depth == 1:
                index_values.append(row_data[0])
                data.append(row_data[1:])
            else:
                index_values.append(row_data[:index_depth])
                data.append(row_data[index_depth:])

        wb.close()

        #-----------------------------------------------------------------------
        # Trim all-empty trailing rows created from style formatting GH#146. As the wb is opened in read-only mode, reverse iterating on the wb is not an option, nor is direct row access by integer

        if trim_nadir:
            # NOTE: `mask` is all data, while `data` is post index/columns extraction; this means that if a non-None label is found, the row/column will not be trimmed.
            row_mask = mask.all(axis=1)
            row_trim_start = array1d_to_last_contiguous_to_edge(
                row_mask) - columns_depth
            if row_trim_start < len(row_mask) - columns_depth:
                data = data[:row_trim_start]
                if index_depth > 0:  # this handles depth 1 and greater
                    index_values = index_values[:row_trim_start]

            col_mask = mask.all(axis=0)
            col_trim_start = array1d_to_last_contiguous_to_edge(
                col_mask) - index_depth
            if col_trim_start < len(col_mask) - index_depth:
                data = (r[:col_trim_start] for r in data)  #type: ignore
                if columns_depth == 1:
                    columns_values = columns_values[:col_trim_start]
                if columns_depth > 1:
                    columns_values = (r[:col_trim_start]
                                      for r in columns_values)  #type: ignore

        #-----------------------------------------------------------------------
        # continue with Index and Frame creation
        index_name = None if columns_depth == 0 else apex_to_name(
            rows=apex_rows,
            depth_level=index_name_depth_level,
            axis=0,
            axis_depth=index_depth)

        index: tp.Optional[IndexBase] = None
        own_index = False
        if index_depth == 1:
            index = Index(index_values, name=index_name)
            own_index = True
        elif index_depth > 1:
            index = IndexHierarchy.from_labels(
                index_values,
                continuation_token=None,
                name=index_name,
            )
            own_index = True

        columns_name = None if index_depth == 0 else apex_to_name(
            rows=apex_rows,
            depth_level=columns_name_depth_level,
            axis=1,
            axis_depth=columns_depth)

        columns: tp.Optional[IndexBase] = None
        own_columns = False
        if columns_depth == 1:
            columns = container_type._COLUMNS_CONSTRUCTOR(columns_values,
                                                          name=columns_name)
            own_columns = True
        elif columns_depth > 1:
            columns = container_type._COLUMNS_HIERARCHY_CONSTRUCTOR.from_labels(
                zip(*columns_values),
                continuation_token=None,
                name=columns_name,
            )
            own_columns = True

        return container_type.from_records(
            data,  #type: ignore
            index=index,
            columns=columns,
            dtypes=config.dtypes,
            own_index=own_index,
            own_columns=own_columns,
            name=name,
            consolidate_blocks=config.consolidate_blocks)
コード例 #9
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        index_depth: int = 1,
        columns_depth: int = 1,
        dtypes: DtypesSpecifier = None,
        store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT
    ) -> Frame:
        '''
        Args:
            {dtypes}
        '''
        wb = self._load_workbook(self._fp)

        if label is None:
            ws = wb[wb.sheetnames[0]]
            name = None  # do not set to default sheet name
        else:
            ws = wb[label]
            name = ws.title

        if ws.max_column <= 1 or ws.max_row <= 1:
            # https://openpyxl.readthedocs.io/en/stable/optimized.html
            # says that some clients might not repare correct dimensions; not sure what conditions are best to show this
            ws.calculate_dimension()

        max_column = ws.max_column
        max_row = ws.max_row

        index_values: tp.List[tp.Any] = []
        columns_values: tp.List[tp.Any] = []

        # print()
        # for row in ws.iter_rows():
        #     print(tuple(str(c.value).ljust(10) for c in row))

        data = []

        for row_count, row in enumerate(
                ws.iter_rows()):  # cannot use values_only on 2.5.4
            if store_filter is None:
                row = tuple(c.value for c in row)
            else:  # only need to filter string values, but probably too expensive to pre-check
                row = tuple(
                    store_filter.to_type_filter_element(c.value) for c in row)

            if row_count <= columns_depth - 1:
                if columns_depth == 1:
                    columns_values.extend(row[index_depth:])
                elif columns_depth > 1:
                    # NOTE: this orientation will need to be rotated
                    columns_values.append(row[index_depth:])
                continue

            if index_depth == 0:
                data.append(row)
            elif index_depth == 1:
                index_values.append(row[0])
                data.append(row[1:])
            else:
                index_values.append(row[:index_depth])
                data.append(row[index_depth:])

        wb.close()

        index: tp.Optional[IndexBase] = None
        own_index = False
        if index_depth == 1:
            index = Index(index_values)
            own_index = True
        elif index_depth > 1:
            index = IndexHierarchy.from_labels(index_values,
                                               continuation_token=None)
            own_index = True

        columns: tp.Optional[IndexBase] = None
        own_columns = False
        if columns_depth == 1:
            columns = Index(columns_values)
            own_columns = True
        elif columns_depth > 1:
            columns = IndexHierarchy.from_labels(zip(*columns_values),
                                                 continuation_token=None)
            own_columns = True

        return tp.cast(
            Frame,
            Frame.from_records(data,
                               index=index,
                               columns=columns,
                               dtypes=dtypes,
                               own_index=own_index,
                               own_columns=own_columns,
                               name=name))
コード例 #10
0
    def read(
        self,
        label: tp.Optional[str] = None,
        *,
        config: tp.Optional[StoreConfig] = None,
        store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT,
        container_type: tp.Type[Frame] = Frame,
    ) -> Frame:
        '''
        Args:
            label: Name of sheet to read from XLSX.
            container_type: Type of container to be returned, either Frame or a Frame subclass

        '''
        if config is None:
            config = StoreConfig()  # get default

        index_depth = config.index_depth
        columns_depth = config.columns_depth

        wb = self._load_workbook(self._fp)

        if label is None:
            ws = wb[wb.sheetnames[0]]
            name = None  # do not set to default sheet name
        else:
            ws = wb[label]
            name = ws.title

        if ws.max_column <= 1 or ws.max_row <= 1:
            # https://openpyxl.readthedocs.io/en/stable/optimized.html
            # says that some clients might not repare correct dimensions; not sure what conditions are best to show this
            ws.calculate_dimension()

        max_column = ws.max_column
        max_row = ws.max_row

        index_values: tp.List[tp.Any] = []
        columns_values: tp.List[tp.Any] = []

        data = []  # pre-size with None?

        for row_count, row in enumerate(ws.iter_rows(max_row=max_row)):
            if store_filter is None:
                row = tuple(c.value for c in row)
            else:  # only need to filter string values, but probably too expensive to pre-check
                row = tuple(
                    store_filter.to_type_filter_element(c.value) for c in row)

            if row_count <= columns_depth - 1:
                if columns_depth == 1:
                    columns_values.extend(row[index_depth:])
                elif columns_depth > 1:
                    # NOTE: this orientation will need to be rotated
                    columns_values.append(row[index_depth:])
                continue

            if index_depth == 0:
                data.append(row)
            elif index_depth == 1:
                index_values.append(row[0])
                data.append(row[1:])
            else:
                index_values.append(row[:index_depth])
                data.append(row[index_depth:])

        wb.close()

        # Trim all-empty trailing rows created from style formatting GH#146. As the wb is opened in read-only mode, reverse iterating on the wb is not an option, nor is direct row access by integer; alos, evaluating all rows on forward iteration is expensive. Instead, after collecting all the data in a list and closing the wb, reverse iterate and find rows that are all empty.
        # NOTE: need to handle case where there are valid index values

        empty_token = (None if store_filter is None else
                       store_filter.to_type_filter_element(None))

        for row_count in range(len(data) - 1, -2, -1):
            if row_count < 0:
                break
            if any(c != empty_token
                   for c in data[row_count]):  # try to break early with any
                break
            if index_depth == 1 and index_values[row_count] != empty_token:
                break
            if index_depth > 1 and any(c != empty_token
                                       for c in index_values[row_count]):
                break

        # row_count is set to the first row that has data or index; can be -1
        empty_row_idx = row_count + 1  # index of all-empty row
        if empty_row_idx != len(data):
            # trim data and index_values, if index_depth > 0
            data = data[:empty_row_idx]
            if index_depth > 0:
                index_values = index_values[:empty_row_idx]

        # continue with Index and Frame creation
        index: tp.Optional[IndexBase] = None
        own_index = False
        if index_depth == 1:
            index = Index(index_values)
            own_index = True
        elif index_depth > 1:
            index = IndexHierarchy.from_labels(index_values,
                                               continuation_token=None)
            own_index = True

        columns: tp.Optional[IndexBase] = None
        own_columns = False
        if columns_depth == 1:
            columns = container_type._COLUMNS_CONSTRUCTOR(columns_values)
            own_columns = True
        elif columns_depth > 1:
            columns = container_type._COLUMNS_HIERARCHY_CONSTRUCTOR.from_labels(
                zip(*columns_values), continuation_token=None)
            own_columns = True

        # NOTE: this might be a Frame or a FrameGO
        return tp.cast(
            Frame,
            container_type.from_records(
                data,
                index=index,
                columns=columns,
                dtypes=config.dtypes,
                own_index=own_index,
                own_columns=own_columns,
                name=name,
                consolidate_blocks=config.consolidate_blocks))