def test_indexers_to_iloc_c(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) invalid_indexers = indexers.copy() invalid_indexers[0][0] = 14 invalid_indexers[1][7] = 14 invalid_indexers = invalid_indexers.T.astype(DTYPE_UINT_DEFAULT) with self.assertRaises(KeyError): _ = hlmap.indexers_to_iloc(invalid_indexers.copy()) with self.assertRaises(KeyError): _ = hlmap.indexers_to_iloc(invalid_indexers[[0]].copy()) with self.assertRaises(KeyError): _ = hlmap.indexers_to_iloc(invalid_indexers[[7]].copy()) valid_subset = [1, 2, 3, 4, 5, 6, 8, 9] post = hlmap.indexers_to_iloc(invalid_indexers[valid_subset].copy()) self.assertListEqual(post, valid_subset)
def test_deepcopy_a(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) hlmap_copy = deepcopy(hlmap) self.assertEqual(hlmap.encoding_can_overflow, hlmap_copy.encoding_can_overflow) self.assertListEqual(hlmap.bit_offset_encoders.tolist(), hlmap_copy.bit_offset_encoders.tolist()) self.assertEqual(hlmap.encoded_indexer_map, hlmap_copy.encoded_indexer_map) self.assertNotEqual(id(hlmap.bit_offset_encoders), id(hlmap_copy.bit_offset_encoders)) self.assertNotEqual(id(hlmap.encoded_indexer_map), id(hlmap_copy.encoded_indexer_map))
def __init__(self, labels: IndexInitializer, *, name: tp.Optional[tp.Hashable] = None ): # reduce to arguments relevant for these derived classes Index.__init__(self, labels=labels, name=name)
def test_init_c(self) -> None: indices = [Index((0, 1)), Index((0, 1))] indexers = np.array([ [0, 0, 1, 1, 1], [0, 1, 0, 1, 1], ]) with self.assertRaises(ErrorInitIndexNonUnique): HierarchicalLocMap(indices=indices, indexers=indexers)
def __init__(self, labels: IndexInitializer, *, name: tp.Optional[tp.Hashable] = None): '''Initializer. {args} ''' # __init__ here leaves out the dtype argument, reducing the signature to arguments relevant for these derived classes Index.__init__(self, labels=labels, name=name)
def test_index_correspondence_a(self) -> None: idx0 = Index([0, 1, 2, 3, 4], loc_is_iloc=True) idx1 = Index( [0, 1, 2, 3, 4, '100185', '100828', '101376', '100312', '101092'], dtype=object) ic = IndexCorrespondence.from_correspondence(idx0, idx1) self.assertFalse(ic.is_subset) self.assertTrue(ic.has_common) # this is an array, due to loc_is_iloc being True assert isinstance(ic.iloc_src, np.ndarray) self.assertEqual(ic.iloc_src.tolist(), [0, 1, 2, 3, 4]) self.assertEqual(ic.iloc_dst, [0, 1, 2, 3, 4])
def test_indexers_to_iloc_a(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) post = hlmap.indexers_to_iloc(indexers.T.astype(DTYPE_UINT_DEFAULT)) self.assertListEqual(post, list(range(10)))
def test_nbytes_a(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) self.assertIn(hlmap.nbytes, (720 + 8 + 8 + 25, 721, 705)) # automap + 2 uint64 bit offsets + PyBool
def reindex(self, index: tp.Union[Index, tp.Sequence[tp.Any]], fill_value=np.nan) -> 'Series': ''' Return a new Series based on the passed index. Args: fill_value: attempted to be used, but may be coerced by the dtype of this Series. ` ''' # TODO: implement `method` argument with bfill, ffill options if isinstance(index, (Index, IndexHierarchy)): # always use the Index constructor for safe reuse when possible index = index.__class__(index) else: # create the Index if not already an index, assume 1D index = Index(index) ic = IndexCorrespondence.from_correspondence(self.index, index) if ic.is_subset: # must have some common return self.__class__(self.values[ic.iloc_src], index=index, own_index=True) values = _full_for_fill(self.values.dtype, len(index), fill_value) # if some intersection of values if ic.has_common: values[ic.iloc_dst] = self.values[ic.iloc_src] # make immutable so a copy is not made values.flags.writeable = False return self.__class__(values, index=index, own_index=True)
def test_init_a(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) self.assertListEqual(list(hlmap.encoded_indexer_map), [35, 19, 8, 1, 28, 0, 27, 18, 2, 32]) self.assertFalse(hlmap.encoding_can_overflow) self.assertListEqual(hlmap.bit_offset_encoders.tolist(), [0, 3])
def __init__(self, labels: IndexInitializer, *, name: NameType = NAME_DEFAULT, loc_is_iloc: bool = False, ): '''Initializer. {args} ''' assert not loc_is_iloc # __init__ here leaves out the dtype argument, reducing the signature to arguments relevant for these derived classes Index.__init__(self, labels=labels, name=name, loc_is_iloc=loc_is_iloc, )
def test_init_b(self) -> None: indices = [Index(()) for _ in range(4)] indexers = [np.array(()) for _ in range(4)] hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) self.assertListEqual(list(hlmap.encoded_indexer_map), []) self.assertFalse(hlmap.encoding_can_overflow) self.assertListEqual(hlmap.bit_offset_encoders.tolist(), [0, 0, 0, 0])
def test_indexers_to_iloc_b(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) subsets = [[5, 2, 4, 1, 3], [1], [9, 8, 7, 6, 5], [1, 7, 4, 6]] for subset in subsets: post = hlmap.indexers_to_iloc( indexers.T.astype(DTYPE_UINT_DEFAULT)[subset]) self.assertListEqual(post, subset)
def test_loc_map_b(self) -> None: idx = Index(['a', 'b', 'c', 'd', 'e']) post1 = LocMap.loc_to_iloc( label_to_pos=idx._map, labels=idx._labels, positions=idx._positions, key=['b', 'd'], partial_selection=False, ) self.assertEqual(post1, [1, 3])
def test_index_correspondence_b(self) -> None: # issue found with a hypothesis test idx = Index([False], loc_is_iloc=False) ic = IndexCorrespondence.from_correspondence(idx, idx) self.assertTrue(ic.is_subset) self.assertTrue(ic.has_common) self.assertEqual(ic.size, 1) self.assertEqual(ic.iloc_src, [0]) # this is as list in this use case self.assertEqual(ic.iloc_dst.tolist(), [0]) # type: ignore
def test_loc_to_iloc_b(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) with self.assertRaises(KeyError): hlmap.loc_to_iloc((5, 'A'), indices) with self.assertRaises(KeyError): hlmap.loc_to_iloc((2, ['E']), indices) with self.assertRaises(KeyError): hlmap.loc_to_iloc(([0, 1, 2], ['A', 'B', 'C']), indices)
def loc_to_iloc(self, # type: ignore key: GetItemKeyType, offset: tp.Optional[int] = None, ) -> GetItemKeyType: ''' Specialized for IndexData indices to convert string data representations into np.datetime64 objects as appropriate. ''' # not passing self.dtype to key_to_datetime_key so as to allow translation to a foreign datetime; slice comparison will be handled by map_slice_args return Index.loc_to_iloc(self, key=key, offset=offset, key_transform=key_to_datetime_key)
def test_bus_to_hierarchy_a(self) -> None: f1 = ff.parse('s(4,4)|v(int,float)|c(I, str)').rename('f1') f2 = ff.parse('s(4,4)|v(str)|c(I, str)').rename('f2') f3 = ff.parse('s(4,4)|v(bool)|c(I, str)').rename('f3') b1 = Bus.from_frames((f1, f2, f3), name='a') indices = Index((0, 1, 2, 3)) columns = Index(('zZbu', 'ztsv', 'zUvW', 'zkuW')) for _, frame in b1.items(): self.assertTrue(indices.equals(frame.index)) self.assertTrue(columns.equals(frame.columns)) def test_assertions(axis: int, flag: bool) -> None: hierarchy, opposite = bus_to_hierarchy(b1, axis=axis, deepcopy_from_bus=flag, init_exception_cls=ErrorInitBus) if axis == 0: expected_tree: tp.Dict[str, Index] = { 'f1': indices, 'f2': indices, 'f3': indices } expected_index = columns else: expected_index = indices expected_tree = {'f1': columns, 'f2': columns, 'f3': columns} self.compare_trees(hierarchy.to_tree(), expected_tree) self.assertTrue(expected_index.equals(opposite)) for axis in (0, 1): for flag in (True, False): test_assertions(axis, flag)
def _loc_to_iloc(self, # type: ignore key: GetItemKeyType, *, partial_selection: bool = False, ) -> GetItemKeyType: ''' Specialized for IndexData indices to convert string data representations into np.datetime64 objects as appropriate. ''' # not passing self.dtype to key_to_datetime_key so as to allow translation to a foreign datetime; slice comparison will be handled by map_slice_args return Index._loc_to_iloc(self, key=key, key_transform=key_to_datetime_key, partial_selection=partial_selection, )
def test_indexers_to_iloc_invalid_input(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) # 1D with self.assertRaises(AssertionError): hlmap.indexers_to_iloc(np.array([0, 1, 2])) # Shape mismatch with self.assertRaises(AssertionError): hlmap.indexers_to_iloc(np.array([[0, 1, 2]])) # Invliad dtype with self.assertRaises(AssertionError): hlmap.indexers_to_iloc(np.array([[0, 1]]).astype(object))
def test_loc_to_iloc_a(self) -> None: indices = [ Index(np.arange(5)), Index(tuple('ABCDE')), ] indexers = np.array([ [3, 3, 0, 1, 4, 0, 3, 2, 2, 0], [4, 2, 1, 0, 3, 0, 3, 2, 0, 4], ]) hlmap = HierarchicalLocMap(indices=indices, indexers=indexers) self.assertEqual(hlmap.loc_to_iloc((2, 'A'), indices), 8) self.assertEqual(hlmap.loc_to_iloc((2, ['A']), indices), [8]) self.assertEqual(hlmap.loc_to_iloc(([2], 'A'), indices), [8]) self.assertEqual(hlmap.loc_to_iloc(([2], ['A']), indices), [8]) self.assertEqual(hlmap.loc_to_iloc(([0, 3], 'E'), indices), [9, 0]) self.assertEqual(hlmap.loc_to_iloc(([0, 3], ['E']), indices), [9, 0]) self.assertEqual(hlmap.loc_to_iloc(([3, 0], 'E'), indices), [0, 9]) self.assertEqual(hlmap.loc_to_iloc(([3, 0], ['E']), indices), [0, 9]) self.assertEqual( hlmap.loc_to_iloc(np.array([0, 'E'], dtype=object), indices), 9)
def test_archive_components_npz_write_arrays_g(self) -> None: a1 = np.arange(12).reshape(3, 4) a2 = np.array(['a', 'b', 'c']) a3 = np.array([True, False, True]) with temp_file('.zip') as fp: index = Index((10, 20, 30), name='foo') NPZ(fp, 'w').from_arrays(blocks=(a1, a2, a3), index=index, name='bar', axis=1) f = Frame.from_npz(fp) self.assertEqual(f.to_pairs(), ((0, ((10, 0), (20, 4), (30, 8))), (1, ((10, 1), (20, 5), (30, 9))), (2, ((10, 2), (20, 6), (30, 10))), (3, ((10, 3), (20, 7), (30, 11))), (4, ((10, 'a'), (20, 'b'), (30, 'c'))), (5, ((10, True), (20, False), (30, True)))) ) self.assertEqual(f.name, 'bar') self.assertEqual(f.index.name, 'foo')
def test_archive_components_npy_write_arrays_h(self) -> None: a1 = np.arange(12).reshape(3, 4) a2 = np.array(['a', 'b', 'c']) a3 = np.array([True, False, True]) with TemporaryDirectory() as fp: columns=Index(('a', 'b', 'c', 'd', 'e', 'f'), name='foo') NPY(fp, 'w').from_arrays(blocks=(a1, a2, a3), columns=columns, name='bar', axis=1) f = Frame.from_npy(fp) self.assertEqual(f.to_pairs(), (('a', ((0, 0), (1, 4), (2, 8))), ('b', ((0, 1), (1, 5), (2, 9))), ('c', ((0, 2), (1, 6), (2, 10))), ('d', ((0, 3), (1, 7), (2, 11))), ('e', ((0, 'a'), (1, 'b'), (2, 'c'))), ('f', ((0, True), (1, False), (2, True)))) ) self.assertEqual(f.name, 'bar') self.assertEqual(f.columns.name, 'foo')
def iloc_searchsorted(self, values: tp.Any, *, side_left: bool = True, ) -> tp.Union[tp.Hashable, tp.Iterable[tp.Hashable]]: ''' {doc} Args: {values} {side_left} ''' # permit variable forms of date specification return Index.iloc_searchsorted(self, #type: ignore [no-any-return] key_to_datetime_key(values), side_left=side_left, )
def test_loc_map_a(self) -> None: idx = Index(['a', 'b', 'c']) post1 = LocMap.loc_to_iloc( label_to_pos=idx._map, labels=idx._labels, positions=idx._positions, key='b', partial_selection=False, ) self.assertEqual(post1, 1) post2 = LocMap.loc_to_iloc( label_to_pos=idx._map, labels=idx._labels, positions=idx._positions, key=NULL_SLICE, partial_selection=False, ) self.assertEqual(post2, NULL_SLICE)
def test_bus_to_hierarchy_b(self) -> None: class CustomError(Exception): pass tree1 = dict(a_I=Index((1,2,3)), a_II=Index((1,2,3))) tree2 = dict(b_I=Index((1,2,3)), b_II=Index((1,2,3))) tree3 = dict(c_I=Index((1,2,3)), c_II=Index((1,2,3))) index1 = IndexHierarchy.from_tree(tree1) index2 = IndexHierarchy.from_tree(tree2) index3 = IndexHierarchy.from_tree(tree3) values = np.arange(36).reshape(6,6) # Align all the frames on columns! f1 = Frame(values, index=index1, columns=index1, name='f1') f2 = Frame(values, index=index2, columns=index1, name='f2') f3 = Frame(values, index=index3, columns=index1, name='f3') b1 = Bus.from_frames((f1, f2, f3)) def test_assertions(hierarchy: IndexHierarchy, opposite: Index) -> None: expected_tree = dict(f1=tree1, f2=tree2, f3=tree3) self.compare_trees(hierarchy.to_tree(), expected_tree) self.assertTrue(index1.equals(opposite)) test_assertions(*bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=False, init_exception_cls=CustomError)) test_assertions(*bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=True, init_exception_cls=CustomError)) # Cannot do this since the frames do not share the same index with self.assertRaises(CustomError): bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=False, init_exception_cls=CustomError) with self.assertRaises(CustomError): bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=True, init_exception_cls=CustomError) # Align all the frames on index! f1 = Frame(values, index=index1, columns=index1, name='f1') f2 = Frame(values, index=index1, columns=index2, name='f2') f3 = Frame(values, index=index1, columns=index3, name='f3') b1 = Bus.from_frames((f1, f2, f3)) test_assertions(*bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=False, init_exception_cls=CustomError)) test_assertions(*bus_to_hierarchy(b1, axis=1, deepcopy_from_bus=True, init_exception_cls=CustomError)) # Cannot do this since the frames do not share the same columns with self.assertRaises(CustomError): bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=False, init_exception_cls=CustomError) with self.assertRaises(CustomError): bus_to_hierarchy(b1, axis=0, deepcopy_from_bus=True, init_exception_cls=CustomError)
def test_pivot_items_to_block_a(self) -> None: f = ff.parse('s(6,4)|v(int)').assign[0]( range(6) ) group_fields_iloc = [0] index_outer = Index(f[0].values.tolist()) post = pivot_items_to_block( blocks=f._blocks, group_fields_iloc=group_fields_iloc, group_depth=1, data_field_iloc=3, func_single=None, dtype=np.dtype(int), fill_value=0, fill_value_dtype=np.dtype(int), index_outer=index_outer, kind='mergesort', ) self.assertEqual(post.tolist(), [129017, 35021, 166924, 122246, 197228, 105269] )
def read( self, label: tp.Optional[str] = None, *, config: tp.Optional[StoreConfig] = None, store_filter: tp.Optional[StoreFilter] = STORE_FILTER_DEFAULT, container_type: tp.Type[Frame] = Frame, ) -> Frame: ''' Args: label: Name of sheet to read from XLSX. container_type: Type of container to be returned, either Frame or a Frame subclass ''' if config is None: config = StoreConfig() # get default index_depth = config.index_depth index_name_depth_level = config.index_name_depth_level columns_depth = config.columns_depth columns_name_depth_level = config.columns_name_depth_level trim_nadir = config.trim_nadir skip_header = config.skip_header skip_footer = config.skip_footer wb = self._load_workbook(self._fp) if label is None: ws = wb[wb.sheetnames[0]] name = None # do not set to default sheet name else: ws = wb[label] name = ws.title if ws.max_column <= 1 or ws.max_row <= 1: # https://openpyxl.readthedocs.io/en/stable/optimized.html # says that some clients might not report correct dimensions ws.calculate_dimension() max_column = ws.max_column max_row = ws.max_row # adjust for downward shift for skipping header, then reduce for footer; at this value and beyond we stop last_row_count = max_row - skip_header - skip_footer index_values: tp.List[tp.Any] = [] columns_values: tp.List[tp.Any] = [] data = [] # pre-size with None? apex_rows = [] if trim_nadir: mask = np.full((last_row_count, max_column), False) for row_count, row in enumerate(ws.iter_rows(max_row=max_row), start=-skip_header): if row_count < 0: continue # due to skip header; perserves comparison to columns_depth if row_count >= last_row_count: break if trim_nadir: row_data: tp.Sequence[tp.Any] = [] for col_count, c in enumerate(row): if store_filter is None: value = c.value else: value = store_filter.to_type_filter_element(c.value) if value is None: # NOTE: only checking None, not np.nan mask[row_count, col_count] = True row_data.append(value) # type: ignore if not row_data: mask[row_count] = True else: if store_filter is None: row_data = tuple(c.value for c in row) else: # only need to filter string values, but probably too expensive to pre-check row_data = tuple( store_filter.to_type_filter_element(c.value) for c in row) if row_count <= columns_depth - 1: apex_rows.append(row_data[:index_depth]) if columns_depth == 1: columns_values.extend(row_data[index_depth:]) elif columns_depth > 1: columns_values.append(row_data[index_depth:]) continue if index_depth == 0: data.append(row_data) elif index_depth == 1: index_values.append(row_data[0]) data.append(row_data[1:]) else: index_values.append(row_data[:index_depth]) data.append(row_data[index_depth:]) wb.close() #----------------------------------------------------------------------- # Trim all-empty trailing rows created from style formatting GH#146. As the wb is opened in read-only mode, reverse iterating on the wb is not an option, nor is direct row access by integer if trim_nadir: # NOTE: `mask` is all data, while `data` is post index/columns extraction; this means that if a non-None label is found, the row/column will not be trimmed. row_mask = mask.all(axis=1) row_trim_start = array1d_to_last_contiguous_to_edge( row_mask) - columns_depth if row_trim_start < len(row_mask) - columns_depth: data = data[:row_trim_start] if index_depth > 0: # this handles depth 1 and greater index_values = index_values[:row_trim_start] col_mask = mask.all(axis=0) col_trim_start = array1d_to_last_contiguous_to_edge( col_mask) - index_depth if col_trim_start < len(col_mask) - index_depth: data = (r[:col_trim_start] for r in data) #type: ignore if columns_depth == 1: columns_values = columns_values[:col_trim_start] if columns_depth > 1: columns_values = (r[:col_trim_start] for r in columns_values) #type: ignore #----------------------------------------------------------------------- # continue with Index and Frame creation index_name = None if columns_depth == 0 else apex_to_name( rows=apex_rows, depth_level=index_name_depth_level, axis=0, axis_depth=index_depth) index: tp.Optional[IndexBase] = None own_index = False if index_depth == 1: index = Index(index_values, name=index_name) own_index = True elif index_depth > 1: index = IndexHierarchy.from_labels( index_values, continuation_token=None, name=index_name, ) own_index = True columns_name = None if index_depth == 0 else apex_to_name( rows=apex_rows, depth_level=columns_name_depth_level, axis=1, axis_depth=columns_depth) columns: tp.Optional[IndexBase] = None own_columns = False if columns_depth == 1: columns = container_type._COLUMNS_CONSTRUCTOR(columns_values, name=columns_name) own_columns = True elif columns_depth > 1: columns = container_type._COLUMNS_HIERARCHY_CONSTRUCTOR.from_labels( zip(*columns_values), continuation_token=None, name=columns_name, ) own_columns = True return container_type.from_records( data, #type: ignore index=index, columns=columns, dtypes=config.dtypes, own_index=own_index, own_columns=own_columns, name=name, consolidate_blocks=config.consolidate_blocks)
def test_archive_components_npz_write_arrays_c(self) -> None: with temp_file('.zip') as fp: a1 = np.arange(12).reshape(3, 4) index = Index((10, 20, 30)) NPZ(fp, 'w').from_arrays(blocks=(a1, ), index=index)
class Series(metaclass=MetaOperatorDelegate): ''' A one-dimensional ordered, labelled collection, immutable and of fixed size. Args: values: An iterable of values, or a single object, to be aligned with the supplied (or automatically generated) index. Alternatively, a dictionary of index / value pairs can be provided. index: Option index initializer. If provided, lenght must be equal to length of values. own_index: Flag index as ownable by Series; primarily for internal clients. ''' __slots__ = ( 'values', '_index', ) @classmethod def from_items(cls, pairs: tp.Iterable[tp.Tuple[tp.Hashable, tp.Any]], dtype: DtypeSpecifier = None) -> 'Series': '''Series construction from an iterator or generator of pairs, where the first pair value is the index and the second is the value. Args: pairs: Iterable of pairs of index, value. dtype: dtype or valid dtype specifier. Returns: :py:class:`static_frame.Series` ''' index = [] def values(): for pair in pairs: # populate index as side effect of iterating values index.append(pair[0]) yield pair[1] return cls(values(), index=index, dtype=dtype) # # @classmethod # def from_record() @classmethod def from_pandas(cls, value, *, own_data: bool = False, own_index: bool = False) -> 'Series': '''Given a Pandas Series, return a Series. Args: own_data: If True, the underlying NumPy data array will be made immutable and used without a copy. own_index: If True, the underlying NumPy index label array will be made immutable and used without a copy. Returns: :py:class:`static_frame.Series` ''' if own_data: data = value.values data.flags.writeable = False else: data = immutable_filter(value.values) if own_index: index = value.index.values index.flags.writeable = False else: index = immutable_filter(value.index.values) # index is already managed, can own return cls(data, index=index) def __init__(self, values: SeriesInitializer, *, index: IndexInitializer = None, dtype: DtypeSpecifier = None, own_index: bool = False) -> None: #----------------------------------------------------------------------- # values assignment values_constructor = None # if deferred # expose .values directly as it is immutable if not isinstance(values, np.ndarray): if isinstance(values, dict): # not sure if we should sort; not sure what to do if index is provided if index is not None: raise Exception( 'cannot create a Series from a dictionary when an index is defined' ) index = [] def values_gen(): for k, v in _dict_to_sorted_items(values): # populate index as side effect of iterating values index.append(k) yield v if dtype and dtype != object: # fromiter does not work with object types self.values = np.fromiter(values_gen(), dtype=dtype, count=len(values)) else: self.values = np.array(tuple(values_gen()), dtype=dtype) self.values.flags.writeable = False # NOTE: not sure if we need to check __iter__ here elif (dtype and dtype != object and dtype != str and hasattr(values, '__iter__') and hasattr(values, '__len__')): self.values = np.fromiter(values, dtype=dtype, count=len(values)) self.values.flags.writeable = False elif hasattr(values, '__len__') and not isinstance(values, str): self.values = np.array(values, dtype=dtype) self.values.flags.writeable = False elif hasattr(values, '__next__'): # a generator-like self.values = np.array(tuple(values), dtype=dtype) self.values.flags.writeable = False else: # it must be a single item # we cannot create the values until we realize the index, which might be hierarchical and not have final size equal to length def values_constructor(shape): self.values = np.full(shape, values, dtype=dtype) self.values.flags.writeable = False else: # is numpy if dtype is not None and dtype != values.dtype: # what to do here? raise Exception( 'when supplying values via array, the dtype argument is not necessary; if provided, it must agree with the dtype of the array' ) if values.shape == (): # handle special case of NP element def values_constructor(shape): self.values = np.repeat(values, shape) self.values.flags.writeable = False else: self.values = immutable_filter(values) #----------------------------------------------------------------------- # index assignment # NOTE: this generally must be done after values assignment, as from_items needs a values generator to be exhausted before looking to values if index is None or (hasattr(index, '__len__') and len(index) == 0): # create an integer index self._index = Index(range(len(self.values)), loc_is_iloc=True) elif own_index: self._index = index elif hasattr(index, 'STATIC'): if index.STATIC: self._index = index else: raise Exception( 'non-static index cannot be assigned to Series') else: # let index handle instantiation if isinstance(index, (Index, IndexHierarchy)): # call with the class of the passed-in index, in case it is hierarchical self._index = index.__class__(index) else: self._index = Index(index) shape = self._index.__len__() if values_constructor: values_constructor(shape) # updates self.values if len(self.values) != shape: raise Exception('values and index do not match length') #--------------------------------------------------------------------------- def __setstate__(self, state): ''' Ensure that reanimated NP arrays are set not writeable. ''' for key, value in state[1].items(): setattr(self, key, value) self.values.flags.writeable = False #--------------------------------------------------------------------------- # interfaces @property def loc(self): return GetItem(self._extract_loc) @property def iloc(self): return GetItem(self._extract_iloc) # NOTE: this could be ExtractInterfacd1D, but are consistent with what is done on the base name space: loc and getitem duplicate each other. @property def drop(self): return InterfaceSelection2D(func_iloc=self._drop_iloc, func_loc=self._drop_loc, func_getitem=self._drop_loc) @property def mask(self): return InterfaceSelection2D(func_iloc=self._extract_iloc_mask, func_loc=self._extract_loc_mask, func_getitem=self._extract_loc_mask) @property def masked_array(self): return InterfaceSelection2D( func_iloc=self._extract_iloc_masked_array, func_loc=self._extract_loc_masked_array, func_getitem=self._extract_loc_masked_array) @property def assign(self) -> InterfaceSelection2D: return InterfaceSelection2D(func_iloc=self._extract_iloc_assign, func_loc=self._extract_loc_assign, func_getitem=self._extract_loc_assign) @property def iter_group(self): return IterNode(container=self, function_items=self._axis_group_items, function_values=self._axis_group, yield_type=IterNodeType.VALUES) @property def iter_group_items(self): return IterNode(container=self, function_items=self._axis_group_items, function_values=self._axis_group, yield_type=IterNodeType.ITEMS) @property def iter_element(self): return IterNode(container=self, function_items=self._axis_element_items, function_values=self._axis_element, yield_type=IterNodeType.VALUES) @property def iter_element_items(self): return IterNode(container=self, function_items=self._axis_element_items, function_values=self._axis_element, yield_type=IterNodeType.ITEMS) #--------------------------------------------------------------------------- # index manipulation def _reindex_other_like_iloc(self, value: 'Series', iloc_key: GetItemKeyType, fill_value=np.nan) -> 'Series': '''Given a value that is a Series, reindex it to the index components, drawn from this Series, that are specified by the iloc_key. ''' return value.reindex(self._index._extract_iloc(iloc_key), fill_value=fill_value) def reindex(self, index: tp.Union[Index, tp.Sequence[tp.Any]], fill_value=np.nan) -> 'Series': ''' Return a new Series based on the passed index. Args: fill_value: attempted to be used, but may be coerced by the dtype of this Series. ` ''' # TODO: implement `method` argument with bfill, ffill options if isinstance(index, (Index, IndexHierarchy)): # always use the Index constructor for safe reuse when possible index = index.__class__(index) else: # create the Index if not already an index, assume 1D index = Index(index) ic = IndexCorrespondence.from_correspondence(self.index, index) if ic.is_subset: # must have some common return self.__class__(self.values[ic.iloc_src], index=index, own_index=True) values = _full_for_fill(self.values.dtype, len(index), fill_value) # if some intersection of values if ic.has_common: values[ic.iloc_dst] = self.values[ic.iloc_src] # make immutable so a copy is not made values.flags.writeable = False return self.__class__(values, index=index, own_index=True) def relabel(self, mapper: CallableOrMapping) -> 'Series': ''' Return a new Series based on a mapping (or callable) from old to new index values. ''' return self.__class__(self.values, index=self._index.relabel(mapper), own_index=True) def reindex_flat(self): ''' Return a new Series, where a ``IndexHierarchy`` (if deifined) is replaced with a flat, one-dimension index of tuples. ''' return self.__class__(self.values, index=self._index.flat()) def reindex_add_level(self, level: tp.Hashable): ''' Return a new Series, adding a new root level to an ``IndexHierarchy``. ''' return self.__class__(self.values, index=self._index.add_level(level)) def reindex_drop_level(self, count: int = 1): ''' Return a new Series, dropping one or more leaf levels from an ``IndexHierarchy``. ''' return self.__class__(self.values, index=self._index.drop_level(count)) #--------------------------------------------------------------------------- # na handling def isna(self) -> 'Series': ''' Return a same-indexed, Boolean Series indicating which values are NaN or None. ''' # consider returning self if not values.any()? values = _isna(self.values) values.flags.writeable = False return self.__class__(values, index=self._index) def notna(self) -> 'Series': ''' Return a same-indexed, Boolean Series indicating which values are NaN or None. ''' values = np.logical_not(_isna(self.values)) values.flags.writeable = False return self.__class__(values, index=self._index) def dropna(self) -> 'Series': ''' Return a new Series after removing values of NaN or None. ''' sel = np.logical_not(_isna(self.values)) if not np.any(sel): return self values = self.values[sel] values.flags.writeable = False return self.__class__(values, index=self._index.loc[sel]) def fillna(self, value) -> 'Series': '''Return a new Series after replacing NaN or None values with the supplied value. ''' sel = _isna(self.values) if not np.any(sel): return self if isinstance(value, np.ndarray): raise Exception('cannot assign an array to fillna') else: value_dtype = np.array(value).dtype assigned_dtype = _resolve_dtype(value_dtype, self.values.dtype) if self.values.dtype == assigned_dtype: assigned = self.values.copy() else: assigned = self.values.astype(assigned_dtype) assigned[sel] = value assigned.flags.writeable = False return self.__class__(assigned, index=self._index) #--------------------------------------------------------------------------- # operators def _ufunc_unary_operator(self, operator: tp.Callable) -> 'Series': return self.__class__(operator(self.values), index=self._index, dtype=self.dtype) def _ufunc_binary_operator(self, *, operator: tp.Callable, other) -> 'Series': values = self.values index = self._index if isinstance(other, Series): # if indices are the same, we can simply set other to values and fallback on NP if len(self.index) != len(other.index) or (self.index != other.index).any(): index = self.index.union(other.index) # now need to reindex the Series values = self.reindex(index).values other = other.reindex(index).values else: other = other.values # if its an np array, we simply fall back on np behavior elif isinstance(other, np.ndarray): if other.ndim > 1: raise NotImplementedError( 'Operator application to greater dimensionalities will result in an array with more than 1 dimension; it is not clear how such an array should be indexed.' ) # permit single value constants; not sure about filtering other types # we want the dtype to be the result of applying the operator; this happends by default result = operator(values, other) if not isinstance(result, np.ndarray): # in comparison to Booleans, if values is of length 1 and a character type, we will get a Boolean back, not an array; this issues the following warning: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison if isinstance(result, _BOOL_TYPES): # return a Boolean at the same size as the original Series; this works, but means that we will mask that, if the arguement is a tuple of length equalt to an erray, NP will perform element wise comparison; bit if the arguemtn is a tuple of length greater or eqial, each value in value will be compared to that tuple result = np.full(len(values), result) else: raise Exception( 'unexpected branch from non-array result of operator application to array' ) result.flags.writeable = False return self.__class__(result, index=index) def _ufunc_axis_skipna(self, *, axis, skipna, ufunc, ufunc_skipna, dtype=None): '''For a Series, all functions of this type reduce the single axis of the Series to 1d, so Index has no use here. Args: dtype: not used, part of signature for a commin interface ''' return _ufunc_skipna_1d(array=self.values, skipna=skipna, ufunc=ufunc, ufunc_skipna=ufunc_skipna) #--------------------------------------------------------------------------- def __len__(self) -> int: '''Length of values. ''' return self.values.__len__() def display(self, config: DisplayConfig = None) -> Display: '''Return a Display of the Series. ''' config = config or DisplayActive.get() d = self._index.display(config=config) d.append_display( Display.from_values(self.values, header='<' + self.__class__.__name__ + '>', config=config)) return d def __repr__(self): return repr(self.display()) #--------------------------------------------------------------------------- # common attributes from the numpy array @property def mloc(self): return mloc(self.values) @property def dtype(self) -> np.dtype: ''' Return the dtype of the underlying NumPy array. Returns: :py:class:`numpy.dtype` ''' return self.values.dtype @property def shape(self) -> tp.Tuple[int]: ''' Return a tuple describing the shape of the underlying NumPy array. Returns: :py:class:`tp.Tuple[int]` ''' return self.values.shape @property def ndim(self) -> int: ''' Return the number of dimensions, which for a `Series` is always 1. Returns: :py:class:`int` ''' return self.values.ndim @property def size(self) -> int: ''' Return the size of the underlying NumPy array. Returns: :py:class:`int` ''' return self.values.size @property def nbytes(self) -> int: ''' Return the total bytes of the underlying NumPy array. Returns: :py:class:`int` ''' return self.values.nbytes #--------------------------------------------------------------------------- # extraction def _extract_iloc(self, key: GetItemKeyType) -> 'Series': # iterable selection should be handled by NP (but maybe not if a tuple) return self.__class__(self.values[key], index=self._index.iloc[key]) def _extract_loc(self, key: GetItemKeyType) -> 'Series': ''' Compatibility: Pandas supports taking in iterables of keys, where some keys are not found in the index; a Series is returned as if a reindex operation was performed. This is undesirable. Better instead is to use reindex() ''' iloc_key = self._index.loc_to_iloc(key) values = self.values[iloc_key] if not isinstance(values, np.ndarray): # if we have a single element return values return self.__class__(values, index=self._index.iloc[iloc_key], own_index=True) def __getitem__(self, key: GetItemKeyType) -> 'Series': '''A Loc selection (by index labels). Compatibility: Pandas supports using both loc and iloc style selections with the __getitem__ interface on Series. This is undesirable, so here we only expose the loc interface (making the Series dictionary like, but unlike the Index, where __getitem__ is an iloc). ''' return self._extract_loc(key) #--------------------------------------------------------------------------- # utilites for alternate extraction: drop, mask and assignment def _drop_iloc(self, key: GetItemKeyType) -> 'Series': if isinstance(key, np.ndarray) and key.dtype == bool: # use Boolean area to select indices from Index positions, as np.delete does not work with arrays values = np.delete(self.values, self._index.positions[key]) else: values = np.delete(self.values, key) values.flags.writeable = False index = self._index._drop_iloc(key) return self.__class__(values, index=index, own_index=True) def _drop_loc(self, key: GetItemKeyType) -> 'Series': return self._drop_iloc(self._index.loc_to_iloc(key)) #--------------------------------------------------------------------------- def _extract_iloc_mask(self, key: GetItemKeyType) -> 'Series': '''Produce a new boolean Series of the same shape, where the values selected via iloc selection are True. ''' mask = np.full(self.values.shape, False, dtype=bool) mask[key] = True mask.flags.writeable = False # can pass self here as it is immutable (assuming index cannot change) return self.__class__(mask, index=self._index) def _extract_loc_mask(self, key: GetItemKeyType) -> 'Series': '''Produce a new boolean Series of the same shape, where the values selected via loc selection are True. ''' iloc_key = self._index.loc_to_iloc(key) return self._extract_iloc_mask(key=iloc_key) #--------------------------------------------------------------------------- def _extract_iloc_masked_array(self, key: GetItemKeyType) -> MaskedArray: '''Produce a new boolean Series of the same shape, where the values selected via iloc selection are True. ''' mask = self._extract_iloc_mask(key=key) return MaskedArray(data=self.values, mask=mask.values) def _extract_loc_masked_array(self, key: GetItemKeyType) -> MaskedArray: '''Produce a new boolean Series of the same shape, where the values selected via loc selection are True. ''' iloc_key = self._index.loc_to_iloc(key) return self._extract_iloc_masked_array(key=iloc_key) #--------------------------------------------------------------------------- def _extract_iloc_assign(self, key: GetItemKeyType) -> 'SeriesAssign': return SeriesAssign(data=self, iloc_key=key) def _extract_loc_assign(self, key: GetItemKeyType) -> 'SeriesAssign': iloc_key = self._index.loc_to_iloc(key) return SeriesAssign(data=self, iloc_key=iloc_key) #--------------------------------------------------------------------------- # axis functions def _axis_group_items(self, *, axis=0): groups, locations = _array_to_groups_and_locations(self.values) for idx, g in enumerate(groups): selection = locations == idx yield g, self._extract_iloc(selection) def _axis_group(self, *, axis=0): yield from (x for _, x in self._axis_group_items(axis=axis)) def _axis_element_items(self, *, axis=0): '''Generator of index, value pairs, equivalent to Series.items(). Rpeated to have a common signature as other axis functions. ''' return zip(self._index.values, self.values) def _axis_element(self, *, axis=0): yield from (x for _, x in self._axis_element_items(axis=axis)) #--------------------------------------------------------------------------- @property def index(self): return self._index #--------------------------------------------------------------------------- # dictionary-like interface def keys(self) -> Index: ''' Iterator of index labels. ''' return self._index def __iter__(self): ''' Iterator of index labels, same as :py:meth:`Series.keys`. ''' return self._index.__iter__() def __contains__(self, value) -> bool: ''' Inclusion of value in index labels. ''' return self._index.__contains__(value) def items(self) -> tp.Generator[tp.Tuple[tp.Any, tp.Any], None, None]: '''Iterator of pairs of index label and value. ''' return zip(self._index.values, self.values) def get(self, key, default=None): ''' Return the value found at the index key, else the default if the key is not found. ''' if key not in self._index: return default return self.__getitem__(key) #--------------------------------------------------------------------------- # transformations resulting in the same dimensionality def sort_index(self, ascending: bool = True, kind: str = _DEFAULT_SORT_KIND) -> 'Series': ''' Return a new Series ordered by the sorted Index. ''' # argsort lets us do the sort once and reuse the results order = np.argsort(self._index.values, kind=kind) if not ascending: order = order[::-1] index_values = self._index.values[order] index_values.flags.writeable = False values = self.values[order] values.flags.writeable = False return self.__class__(values, index=index_values) def sort_values(self, ascending: bool = True, kind: str = _DEFAULT_SORT_KIND) -> 'Series': ''' Return a new Series ordered by the sorted values. ''' # argsort lets us do the sort once and reuse the results order = np.argsort(self.values, kind=kind) if not ascending: order = order[::-1] index_values = self._index.values[order] index_values.flags.writeable = False values = self.values[order] values.flags.writeable = False return self.__class__(values, index=index_values) def isin(self, other) -> 'Series': ''' Return a same-sized Boolean Series that shows if the same-positoined element is in the iterable passed to the function. ''' # cannot use assume_unique because do not know if values is unique v, _ = _iterable_to_array(other) # NOTE: could identify empty iterable and create False array array = np.in1d(self.values, v) array.flags.writeable = False return self.__class__(array, index=self._index) def clip(self, lower=None, upper=None): '''Apply a clip operation to the Series. Args: lower: value or Series to define the inclusive lower bound. upper: value or Series to define the inclusive upper bound. ''' args = [lower, upper] for idx in range(len(args)): arg = args[idx] if isinstance(arg, Series): # after reindexing, strip away the index # NOTE: using the bound forces going to a float type; this may not be the best approach bound = -np.inf if idx == 0 else np.inf args[idx] = arg.reindex(self.index).fillna(bound).values elif hasattr(arg, '__iter__'): raise Exception( 'only Series are supported as iterable lower/upper arguments' ) # assume single value otherwise, no change necessary array = np.clip(self.values, *args) array.flags.writeable = False return self.__class__(array, index=self._index) def transpose(self) -> 'Series': '''The transpositon of a Series is itself. ''' return self @property def T(self): return self.transpose() def duplicated(self, exclude_first=False, exclude_last=False) -> np.ndarray: ''' Return a same-sized Boolean Series that shows True for all b values that are duplicated. ''' # TODO: might be able to do this witnout calling .values and passing in TypeBlocks, but TB needs to support roll duplicates = _array_to_duplicated(self.values, exclude_first=exclude_first, exclude_last=exclude_last) duplicates.flags.writeable = False return self.__class__(duplicates, index=self._index) def drop_duplicated(self, exclude_first=False, exclude_last=False): ''' Return a Series with duplicated values removed. ''' duplicates = _array_to_duplicated(self.values, exclude_first=exclude_first, exclude_last=exclude_last) keep = ~duplicates return self.__class__(self.values[keep], index=self._index[keep]) def astype(self, dtype: DtypeSpecifier) -> 'Series': ''' Return a Series with type determined by `dtype` argument. Note that for Series, this is a simple function, whereas for Frame, this is an interface exposing both a callable and a getitem interface. ''' return self.__class__(self.values.astype(dtype), index=self._index) def roll(self, shift: int, include_index: bool = False) -> 'Series': '''Return a Series with values rotated forward and wrapped around the index (with a postive shift) or backward and wrapped around the index (with a negative shift). Args: shift: Postive or negative integer shift. include_index: Determine if the Index is shifted with the underlying data. ''' if shift % len(self.values): values = array_shift(self.values, shift, axis=0, wrap=True) values.flags.writeable = False else: values = self.values if include_index: index = self._index.roll(shift=shift) own_index = True else: index = self._index own_index = False return self.__class__(values, index=index, own_index=own_index) def shift(self, shift: int, fill_value=np.nan) -> 'Series': '''Return a Series with values shifted forward on the index (with a postive shift) or backward on the index (with a negative shift). Args: shift: Postive or negative integer shift. fill_value: Value to be used to fill data missing after the shift. ''' if shift: values = array_shift(self.values, shift, axis=0, wrap=False, fill_value=fill_value) values.flags.writeable = False else: values = self.values return self.__class__(values, index=self._index) #--------------------------------------------------------------------------- # transformations resulting in reduced dimensionality def head(self, count: int = 5) -> 'Series': '''Return a Series consisting only of the top elements as specified by ``count``. Args: count: Number of elements to be returned from the top of the Series. ''' return self.iloc[:count] def tail(self, count: int = 5) -> 'Series': '''Return a Series consisting only of the bottom elements as specified by ``count``. Args: count: Number of elements to be returned from the bottom of the Series. ''' return self.iloc[-count:] #--------------------------------------------------------------------------- # utility function to numpy array def unique(self) -> np.ndarray: ''' Return a NumPy array of unqiue values. ''' return np.unique(self.values) #--------------------------------------------------------------------------- # export # NOTE: can add to_frame and to_fram_go after Series has name attribute def to_pairs(self) -> tp.Iterable[tp.Tuple[tp.Hashable, tp.Any]]: ''' Return a tuple of tuples, where each inner tuple is a pair of index label, value. ''' if isinstance(self._index, IndexHierarchy): index_values = list(_array2d_to_tuples(self._index.values)) else: index_values = self._index.values return tuple(zip(index_values, self.values)) def to_pandas(self): ''' Return a Pandas Series. ''' import pandas return pandas.Series(self.values.copy(), index=self._index.values.copy())