def from_concat(cls, containers: tp.Iterable['Series'], *, name: tp.Hashable = None): ''' Concatenate multiple Series into a new Series, assuming the combination of all Indices result in a unique Index. ''' array_values = [] array_index = [] for c in containers: array_values.append(c.values) array_index.append(c.index.values) # returns immutable arrays values = concat_resolved(array_values) index = concat_resolved(array_index) if index.ndim == 2: index = IndexHierarchy.from_labels(index) return cls(values, index=index, name=name)
def test_concat_resolved_a(self): a1 = np.array([[3, 4, 5], [0, 0, 0]]) a2 = np.array([1, 2, 3]).reshape((1, 3)) a3 = np.array([('3', '4', '5'), ('1', '1', '1')]) a4 = np.array(['3', '5']) a5 = np.array([1, 1, 1]) post = concat_resolved((a1, a3)) self.assertEqual( post.tolist(), [[3, 4, 5], [0, 0, 0], ['3', '4', '5'], ['1', '1', '1']]) post = concat_resolved((a3, a1, a2)) self.assertEqual(post.tolist(), [['3', '4', '5'], ['1', '1', '1'], [3, 4, 5], [0, 0, 0], [1, 2, 3]]) self.assertEqual( concat_resolved((a1, a3), axis=1).tolist(), [[3, 4, 5, '3', '4', '5'], [0, 0, 0, '1', '1', '1']]) self.assertEqual( concat_resolved((a4, a5)).tolist(), ['3', '5', 1, 1, 1])
def test_concat_resolved_b(self): a1 = np.array([[3, 4, 5], [0, 0, 0]]) a2 = np.array([1, 2, 3]).reshape((1, 3)) with self.assertRaises(Exception): concat_resolved((a1, a2), axis=None)
def _extract_array(self, row_key: GetItemKeyType = None, column_key: GetItemKeyType = None, ) -> np.ndarray: ''' Extract a consolidated array based on iloc selection. ''' assert self._axis_hierarchy is not None #mypy extractor = get_extractor( self._deepcopy_from_bus, is_array=True, memo_active=False, ) row_key = NULL_SLICE if row_key is None else row_key column_key = NULL_SLICE if column_key is None else column_key if row_key == NULL_SLICE and column_key == NULL_SLICE: if len(self._bus) == 1: return extractor(self._bus.iloc[0].values) # NOTE: do not need to call extractor when concatenate is called, as a new array is always allocated. arrays = [f.values for _, f in self._bus.items()] return concat_resolved( arrays, axis=self._axis, ) parts: tp.List[np.ndarray] = [] bus_keys: tp.Iterable[tp.Hashable] if self._axis == 0: sel_key = row_key opposite_key = column_key else: sel_key = column_key opposite_key = row_key sel_reduces = isinstance(sel_key, INT_TYPES) opposite_reduces = isinstance(opposite_key, INT_TYPES) sel = np.full(len(self._axis_hierarchy), False) sel[sel_key] = True # get ordered unique Bus labels axis_map_sub = self._axis_hierarchy.iloc[sel_key] if isinstance(axis_map_sub, tuple): # type: ignore bus_keys = (axis_map_sub[0],) #type: ignore else: bus_keys = axis_map_sub._levels.index for key_count, key in enumerate(bus_keys): sel_component = sel[self._axis_hierarchy._loc_to_iloc(HLoc[key])] if self._axis == 0: component = self._bus.loc[key]._extract_array(sel_component, opposite_key) #type: ignore if sel_reduces: component = component[0] else: component = self._bus.loc[key]._extract_array(opposite_key, sel_component) #type: ignore if sel_reduces: if component.ndim == 1: component = component[0] elif component.ndim == 2: component = component[NULL_SLICE, 0] parts.append(component) if len(parts) == 1: return extractor(parts.pop()) # NOTE: concatenate always allocates a new array, thus no need for extractor above if sel_reduces or opposite_reduces: # NOTE: not sure if concat_resolved is needed here return concat_resolved(parts) return concat_resolved(parts, axis=self._axis)
def test_concat_resolved_axis_1(self, arrays: tp.List[np.ndarray]) -> None: array = util.concat_resolved(arrays, axis=1) self.assertEqual(array.ndim, 2) self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))
def from_arrays( self, blocks: tp.Iterable[np.ndarray], *, index: tp.Optional[IndexInitializer] = None, columns: tp.Optional[IndexInitializer] = None, name: NameType = None, axis: int = 0, ) -> None: ''' Given an iterable of arrays, write out an NPZ or NPY directly, without building up intermediary :obj:`Frame`. If axis 0, the arrays are vertically stacked; if axis 1, they are horizontally stacked. For both axis, if included, indices must be of appropriate length. Args: blocks: *, index: An array, :obj:`Index`, or :obj:`IndexHierarchy`. columns: An array, :obj:`Index`, or :obj:`IndexHierarchy`. name: axis: ''' if not self._writeable: raise UnsupportedOperation('Open with mode "w" to write.') metadata: tp.Dict[str, tp.Any] = {} if isinstance(index, IndexBase): depth_index = index.depth name_index = index.name cls_index = index.__class__ ArchiveIndexConverter.index_encode( metadata=metadata, archive=self._archive, index=index, key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX, key_types=Label.KEY_TYPES_INDEX, depth=depth_index, include=True, ) elif index is not None: if index.__class__ is not np.ndarray: raise RuntimeError( 'index argument must be an Index, IndexHierarchy, or 1D np.ndarray' ) depth_index = 1 name_index = None cls_index = dtype_to_index_cls(True, index.dtype) #type: ignore ArchiveIndexConverter.array_encode( metadata=metadata, archive=self._archive, array=index, key_template_values=Label.FILE_TEMPLATE_VALUES_INDEX, ) else: depth_index = 1 name_index = None cls_index = Index if isinstance(columns, IndexBase): depth_columns = columns.depth name_columns = columns.name cls_columns = columns.__class__ ArchiveIndexConverter.index_encode( metadata=metadata, archive=self._archive, index=columns, key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS, key_types=Label.KEY_TYPES_COLUMNS, depth=depth_columns, include=True, ) elif columns is not None: if columns.__class__ is not np.ndarray: raise RuntimeError( 'index argument must be an Index, IndexHierarchy, or 1D np.ndarray' ) depth_columns = 1 # only support 1D name_columns = None cls_columns = dtype_to_index_cls(True, columns.dtype) #type: ignore ArchiveIndexConverter.array_encode( metadata=metadata, archive=self._archive, array=columns, key_template_values=Label.FILE_TEMPLATE_VALUES_COLUMNS, ) else: depth_columns = 1 # only support 1D name_columns = None cls_columns = Index metadata[Label.KEY_NAMES] = [ name, name_index, name_columns, ] # do not store Frame class as caller will determine metadata[Label.KEY_TYPES] = [ cls_index.__name__, cls_columns.__name__, ] if axis == 1: rows = 0 for i, array in enumerate(blocks): if not rows: rows = array.shape[0] else: if array.shape[0] != rows: raise RuntimeError('incompatible block shapes') self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(i), array) elif axis == 0: # for now, just vertically concat and write, though this has a 2X memory requirement resolved = concat_resolved(blocks, axis=0) # if this results in an obect array, an exception will be raised self._archive.write_array(Label.FILE_TEMPLATE_BLOCKS.format(0), resolved) i = 0 else: raise AxisInvalid(f'invalid axis {axis}') metadata[Label.KEY_DEPTHS] = [ i + 1, # block count depth_index, depth_columns ] self._archive.write_metadata(metadata)
def test_concat_resolved_axis_0(self, arrays): array = util.concat_resolved(arrays, axis=0) self.assertEqual(array.ndim, 2) self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))