def test_index_many_set_c(self) -> None: idx1 = IndexDate(('2020-02-01', '2020-02-02')) post1 = index_many_set((idx1, ), Index, union=True) self.assertEqual(post1.__class__, IndexDate) self.assertTrue(idx1.equals(post1)) # empty iterable returns an empty index post2 = index_many_set((), Index, union=True) self.assertEqual(len(post2), 0) #type: ignore
def test_index_many_set_b(self) -> None: idx0 = Index(('1997-01-01', '1997-01-02'), name='foo') idx1 = IndexDate(('2020-01-01', '2020-01-02'), name='foo') idx2 = IndexDate(('2020-02-01', '2020-02-02')) post1 = index_many_set((idx0, idx1), IndexGO, union=True) self.assertEqual(post1.__class__, IndexGO) post2 = index_many_set((idx1, idx2), IndexGO, union=False) self.assertEqual(post2.__class__, IndexDateGO)
def test_index_many_set_a(self) -> None: idx0 = Index(('1997-01-01', '1997-01-02'), name='foo') idx1 = IndexDate(('2020-01-01', '2020-01-02'), name='foo') idx2 = IndexDate(('2020-01-02', '2020-01-03')) post1 = index_many_set((idx0, idx1), Index, union=True) assert isinstance(post1, Index) self.assertEqual(post1.name, 'foo') self.assertEqual(post1.__class__, Index) # self.assertEqual(set(post1.values), # {'1997-01-02', # '1997-01-01', # np.datetime64('2020-01-01'), # np.datetime64('2020-01-02')}) # the result of this operation is an unstable ordering values = set(post1.values) self.assertTrue('1997-01-01' in values) self.assertTrue('1997-01-02' in values) self.assertTrue(datetime.date(2020, 1, 1) in values) self.assertTrue(datetime.date(2020, 1, 2) in values) post2 = index_many_set((idx1, idx2), Index, union=True) assert isinstance(post2, Index) self.assertEqual(post2.name, None) self.assertEqual(post2.__class__, IndexDate) self.assertEqual(post2.values.tolist(), [ datetime.date(2020, 1, 1), datetime.date(2020, 1, 2), datetime.date(2020, 1, 3) ]) post3 = index_many_set((idx1, idx2), Index, union=False) assert isinstance(post3, Index) self.assertEqual(post3.name, None) self.assertEqual(post3.__class__, IndexDate) self.assertEqual(post3.values.tolist(), [datetime.date(2020, 1, 2)])
def from_concat( cls, containers: tp.Iterable['Yarn'], *, index: tp.Optional[tp.Union[IndexInitializer, IndexAutoFactoryType]] = None, name: NameType = NAME_DEFAULT, deepcopy_from_bus: bool = False, ) -> 'Yarn': ''' Concatenate multiple :obj:`Yarn` into a new :obj:`Yarn`. Loaded status of :obj:`Frame` within each :obj:`Bus` will not be altered. Args: containers: index: Optionally provide new labels for the result of the concatenation. name: deepcopy_from_bus: ''' bus_components = [] index_components: tp.Optional[ tp.List[IndexBase]] = None if index is not None else [] for element in containers: if isinstance(element, Yarn): bus_components.extend(element._series.values) if index_components is not None: index_components.append(element.index) else: raise NotImplementedError( f'cannot instantiate from {type(element)}') array = np.empty(len(bus_components), dtype=DTYPE_OBJECT) for i, bus in enumerate(bus_components): array[i] = bus array.flags.writeable = False if index_components is not None: index = index_many_set(index_components, Index, union=True) series = Series(array, name=name) return cls( series, deepcopy_from_bus=deepcopy_from_bus, index=index, )
def from_frames( self, frames: tp.Iterable['Frame'], *, include_index: bool = True, include_columns: bool = True, axis: int = 0, union: bool = True, name: NameType = None, fill_value: object = np.nan, ) -> None: '''Given an iterable of Frames, write out an NPZ or NPY directly, without building up an intermediary Frame. If axis 0, the Frames must be block compatible; if axis 1, the Frames must have the same number of rows. For both axis, if included, concatenated indices must be unique or aligned. Args: frames: * include_index: include_columns: axis: union: name: fill_value: ''' if not self._writeable: raise UnsupportedOperation('Open with mode "w" to write.') from static_frame.core.type_blocks import TypeBlocks from static_frame.core.frame import Frame frames = [ f if isinstance(f, Frame) else f.to_frame(axis) for f in frames ] # type: ignore # NOTE: based on Frame.from_concat if axis == 1: # stacks columns (extends rows horizontally) if include_columns: try: columns = index_many_concat( (f._columns for f in frames), Index, ) except ErrorInitIndexNonUnique: raise RuntimeError( 'Column names after horizontal concatenation are not unique; set include_columns to None to ignore.' ) else: columns = None if include_index: index = index_many_set( (f._index for f in frames), Index, union=union, ) else: raise RuntimeError( 'Must include index for horizontal alignment.') def blocks() -> tp.Iterator[np.ndarray]: for f in frames: if len(f.index) != len(index) or (f.index != index).any(): f = f.reindex(index=index, fill_value=fill_value) for block in f._blocks._blocks: yield block elif axis == 0: # stacks rows (extends columns vertically) if include_index: try: index = index_many_concat((f._index for f in frames), Index) except ErrorInitIndexNonUnique: raise RuntimeError( 'Index names after vertical concatenation are not unique; set include_index to None to ignore' ) else: index = None if include_columns: columns = index_many_set( (f._columns for f in frames), Index, union=union, ) else: raise RuntimeError( 'Must include columns for vertical alignment.') def blocks() -> tp.Iterator[np.ndarray]: type_blocks = [] previous_f: tp.Optional[Frame] = None block_compatible = True reblock_compatible = True for f in frames: if len(f.columns) != len(columns) or (f.columns != columns).any(): f = f.reindex(columns=columns, fill_value=fill_value) type_blocks.append(f._blocks) # column size is all the same by this point if previous_f is not None: # after the first if block_compatible: block_compatible &= f._blocks.block_compatible( previous_f._blocks, axis=1) # only compare columns if reblock_compatible: reblock_compatible &= f._blocks.reblock_compatible( previous_f._blocks) previous_f = f yield from TypeBlocks.vstack_blocks_to_blocks( type_blocks=type_blocks, block_compatible=block_compatible, reblock_compatible=reblock_compatible, ) else: raise AxisInvalid(f'no support for {axis}') self.from_arrays( blocks=blocks(), index=index, columns=columns, name=name, axis=1, # blocks are normalized for horizontal concat )
def test_index_many_set_h(self) -> None: post1 = index_many_set((), Index, union=True, explicit_constructor=IndexDate) self.assertIs(post1.__class__, IndexDate)
def test_index_many_set_g(self) -> None: idx1 = Index(range(2), loc_is_iloc=True) idx2 = Index([3, 2, 1, 0]) idx3 = index_many_set((idx1, idx2), Index, union=False) self.assertTrue(idx3._map is not None) #type: ignore self.assertEqual(idx3.values.tolist(), [0, 1]) #type: ignore
def test_index_many_set_d(self) -> None: idx1 = Index(range(3), loc_is_iloc=True) idx2 = Index(range(3), loc_is_iloc=True) idx3 = index_many_set((idx1, idx2), Index, union=True) self.assertTrue(idx3._map is None) #type: ignore self.assertEqual(idx3.values.tolist(), [0, 1, 2]) #type: ignore