Beispiel #1
0
    def from_concat(cls,
                    containers: tp.Iterable['Series'],
                    *,
                    name: tp.Hashable = None):
        '''
        Concatenate multiple Series into a new Series, assuming the combination of all Indices result in a unique Index.
        '''
        array_values = []
        array_index = []
        for c in containers:
            array_values.append(c.values)
            array_index.append(c.index.values)

        # returns immutable arrays
        values = concat_resolved(array_values)
        index = concat_resolved(array_index)

        if index.ndim == 2:
            index = IndexHierarchy.from_labels(index)

        return cls(values, index=index, name=name)
Beispiel #2
0
    def test_concat_resolved_a(self):
        a1 = np.array([[3, 4, 5], [0, 0, 0]])
        a2 = np.array([1, 2, 3]).reshape((1, 3))
        a3 = np.array([('3', '4', '5'), ('1', '1', '1')])
        a4 = np.array(['3', '5'])
        a5 = np.array([1, 1, 1])

        post = concat_resolved((a1, a3))
        self.assertEqual(
            post.tolist(),
            [[3, 4, 5], [0, 0, 0], ['3', '4', '5'], ['1', '1', '1']])

        post = concat_resolved((a3, a1, a2))
        self.assertEqual(post.tolist(), [['3', '4', '5'], ['1', '1', '1'],
                                         [3, 4, 5], [0, 0, 0], [1, 2, 3]])

        self.assertEqual(
            concat_resolved((a1, a3), axis=1).tolist(),
            [[3, 4, 5, '3', '4', '5'], [0, 0, 0, '1', '1', '1']])

        self.assertEqual(
            concat_resolved((a4, a5)).tolist(), ['3', '5', 1, 1, 1])
Beispiel #3
0
    def test_concat_resolved_b(self):
        a1 = np.array([[3, 4, 5], [0, 0, 0]])
        a2 = np.array([1, 2, 3]).reshape((1, 3))

        with self.assertRaises(Exception):
            concat_resolved((a1, a2), axis=None)
Beispiel #4
0
    def _extract_array(self,
            row_key: GetItemKeyType = None,
            column_key: GetItemKeyType = None,
            ) -> np.ndarray:
        '''
        Extract a consolidated array based on iloc selection.
        '''
        assert self._axis_hierarchy is not None #mypy

        extractor = get_extractor(
                self._deepcopy_from_bus,
                is_array=True,
                memo_active=False,
                )

        row_key = NULL_SLICE if row_key is None else row_key
        column_key = NULL_SLICE if column_key is None else column_key

        if row_key == NULL_SLICE and column_key == NULL_SLICE:
            if len(self._bus) == 1:
                return extractor(self._bus.iloc[0].values)

            # NOTE: do not need to call extractor when concatenate is called, as a new array is always allocated.
            arrays = [f.values for _, f in self._bus.items()]
            return concat_resolved(
                    arrays,
                    axis=self._axis,
                    )

        parts: tp.List[np.ndarray] = []
        bus_keys: tp.Iterable[tp.Hashable]

        if self._axis == 0:
            sel_key = row_key
            opposite_key = column_key
        else:
            sel_key = column_key
            opposite_key = row_key

        sel_reduces = isinstance(sel_key, INT_TYPES)
        opposite_reduces = isinstance(opposite_key, INT_TYPES)

        sel = np.full(len(self._axis_hierarchy), False)
        sel[sel_key] = True

        # get ordered unique Bus labels
        axis_map_sub = self._axis_hierarchy.iloc[sel_key]
        if isinstance(axis_map_sub, tuple): # type: ignore
            bus_keys = (axis_map_sub[0],) #type: ignore
        else:
            bus_keys = axis_map_sub._levels.index

        for key_count, key in enumerate(bus_keys):
            sel_component = sel[self._axis_hierarchy._loc_to_iloc(HLoc[key])]

            if self._axis == 0:
                component = self._bus.loc[key]._extract_array(sel_component, opposite_key) #type: ignore
                if sel_reduces:
                    component = component[0]
            else:
                component = self._bus.loc[key]._extract_array(opposite_key, sel_component) #type: ignore
                if sel_reduces:
                    if component.ndim == 1:
                        component = component[0]
                    elif component.ndim == 2:
                        component = component[NULL_SLICE, 0]

            parts.append(component)

        if len(parts) == 1:
            return extractor(parts.pop())

        # NOTE: concatenate always allocates a new array, thus no need for extractor above
        if sel_reduces or opposite_reduces:
            # NOTE: not sure if concat_resolved is needed here
            return concat_resolved(parts)
        return concat_resolved(parts, axis=self._axis)
Beispiel #5
0
 def test_concat_resolved_axis_1(self, arrays: tp.List[np.ndarray]) -> None:
     array = util.concat_resolved(arrays, axis=1)
     self.assertEqual(array.ndim, 2)
     self.assertEqual(array.dtype,
                      util.resolve_dtype_iter((x.dtype for x in arrays)))
Beispiel #6
0
    def from_arrays(
        self,
        blocks: tp.Iterable[np.ndarray],
        *,
        index: tp.Optional[IndexInitializer] = None,
        columns: tp.Optional[IndexInitializer] = None,
        name: NameType = None,
        axis: int = 0,
    ) -> None:
        '''
        Given an iterable of arrays, write out an NPZ or NPY directly, without building up intermediary :obj:`Frame`. If axis 0, the arrays are vertically stacked; if axis 1, they are horizontally stacked. For both axis, if included, indices must be of appropriate length.

        Args:
            blocks:
            *,
            index: An array, :obj:`Index`, or :obj:`IndexHierarchy`.
            columns: An array, :obj:`Index`, or :obj:`IndexHierarchy`.
            name:
            axis:
        '''
        if not self._writeable:
            raise UnsupportedOperation('Open with mode "w" to write.')

        metadata: tp.Dict[str, tp.Any] = {}

        if isinstance(index, IndexBase):
            depth_index = index.depth
            name_index = index.name
            cls_index = index.__class__
            ArchiveIndexConverter.index_encode(
                metadata=metadata,
                archive=self._archive,
                index=index,
                key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX,
                key_types=Label.KEY_TYPES_INDEX,
                depth=depth_index,
                include=True,
            )
        elif index is not None:
            if index.__class__ is not np.ndarray:
                raise RuntimeError(
                    'index argument must be an Index, IndexHierarchy, or 1D np.ndarray'
                )

            depth_index = 1
            name_index = None
            cls_index = dtype_to_index_cls(True, index.dtype)  #type: ignore
            ArchiveIndexConverter.array_encode(
                metadata=metadata,
                archive=self._archive,
                array=index,
                key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX,
            )
        else:
            depth_index = 1
            name_index = None
            cls_index = Index

        if isinstance(columns, IndexBase):
            depth_columns = columns.depth
            name_columns = columns.name
            cls_columns = columns.__class__
            ArchiveIndexConverter.index_encode(
                metadata=metadata,
                archive=self._archive,
                index=columns,
                key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS,
                key_types=Label.KEY_TYPES_COLUMNS,
                depth=depth_columns,
                include=True,
            )
        elif columns is not None:
            if columns.__class__ is not np.ndarray:
                raise RuntimeError(
                    'index argument must be an Index, IndexHierarchy, or 1D np.ndarray'
                )

            depth_columns = 1  # only support 1D
            name_columns = None
            cls_columns = dtype_to_index_cls(True,
                                             columns.dtype)  #type: ignore
            ArchiveIndexConverter.array_encode(
                metadata=metadata,
                archive=self._archive,
                array=columns,
                key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS,
            )
        else:
            depth_columns = 1  # only support 1D
            name_columns = None
            cls_columns = Index

        metadata[Label.KEY_NAMES] = [
            name,
            name_index,
            name_columns,
        ]
        # do not store Frame class as caller will determine
        metadata[Label.KEY_TYPES] = [
            cls_index.__name__,
            cls_columns.__name__,
        ]

        if axis == 1:
            rows = 0
            for i, array in enumerate(blocks):
                if not rows:
                    rows = array.shape[0]
                else:
                    if array.shape[0] != rows:
                        raise RuntimeError('incompatible block shapes')
                self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(i),
                                          array)
        elif axis == 0:
            # for now, just vertically concat and write, though this has a 2X memory requirement
            resolved = concat_resolved(blocks, axis=0)
            # if this results in an obect array, an exception will be raised
            self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(0),
                                      resolved)
            i = 0
        else:
            raise AxisInvalid(f'invalid axis {axis}')

        metadata[Label.KEY_DEPTHS] = [
            i + 1,  # block count
            depth_index,
            depth_columns
        ]
        self._archive.write_metadata(metadata)
Beispiel #7
0
 def test_concat_resolved_axis_0(self, arrays):
     array = util.concat_resolved(arrays, axis=0)
     self.assertEqual(array.ndim, 2)
     self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))