def pivot_items( blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable], group_depth: int, data_field_iloc: tp.Hashable, func_single: UFunc, ) -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Any]]: ''' Specialized generator of pairs for when we hae only one data_field and one function. ''' group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[ 0] #type: ignore for label, _, sub in blocks.group(axis=0, key=group_key): # label = group if take_group else group[0] # will always be first values = sub._extract_array_column(data_field_iloc) yield label, func_single(values)
def pivot_records_items( blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable], group_depth: int, data_fields_iloc: tp.Iterable[tp.Hashable], func_single: tp.Optional[UFunc], func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]] ) -> tp.Iterator[tp.Tuple[tp.Hashable, tp.Sequence[tp.Any]]]: ''' Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group, ''' # NOTE: this delivers results by label row for use in a Frame.from_records_items constructor # take_group_index = group_depth > 1 # columns_loc_to_iloc = frame.columns._loc_to_iloc group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[ 0] #type: ignore record_size = len(data_fields_iloc) * (1 if func_single else len(func_map)) record: tp.List[tp.Any] for label, _, part in blocks.group(axis=0, key=group_key): # label = group_index if take_group_index else group_index[0] record = [None] * record_size # This size can be pre allocated, pos = 0 if func_single: for column_key in data_fields_iloc: values = part._extract_array_column(column_key) record[pos] = func_single(values) pos += 1 else: for column_key in data_fields_iloc: values = part._extract_array_column(column_key) for _, func in func_map: record[pos] = func(values) pos += 1 yield label, record
def pivot_records_items_to_frame( *, blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable], group_depth: int, data_fields_iloc: tp.Iterable[tp.Hashable], func_single: tp.Optional[UFunc], func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]], func_no: bool, kind: str, columns_constructor: IndexConstructor, columns: tp.List[tp.Hashable], index_constructor: IndexConstructor, dtypes: tp.Tuple[tp.Optional[np.dtype]], frame_cls: tp.Type['Frame'], ) -> 'Frame': ''' Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group, ''' group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[ 0] #type: ignore record_size = len(data_fields_iloc) * (1 if (func_single or func_no) else len(func_map)) index_labels = [] arrays: tp.List[tp.List[tp.Any]] = [list() for _ in range(record_size)] for label, _, part in blocks.group(axis=0, key=group_key, kind=kind): index_labels.append(label) if func_no: if len(part) != 1: raise RuntimeError( 'pivot requires aggregation of values; provide a `func` argument.' ) for i, column_key in enumerate(data_fields_iloc): arrays[i].append(part._extract(0, column_key)) elif func_single: for i, column_key in enumerate(data_fields_iloc): arrays[i].append( func_single(part._extract_array_column(column_key))) else: i = 0 for column_key in data_fields_iloc: values = part._extract_array_column(column_key) for _, func in func_map: arrays[i].append(func(values)) i += 1 def gen() -> tp.Iterator[np.ndarray]: for b, dtype in zip(arrays, dtypes): if dtype is None: array, _ = iterable_to_array_1d(b) else: array = np.array(b, dtype=dtype) array.flags.writeable = False yield array tb = TypeBlocks.from_blocks(gen()) return frame_cls( tb, index=index_constructor(index_labels), columns=columns_constructor(columns), own_data=True, own_index=True, own_columns=True, )
def pivot_records_items_to_blocks( *, blocks: TypeBlocks, group_fields_iloc: tp.Iterable[tp.Hashable], group_depth: int, data_fields_iloc: tp.Iterable[tp.Hashable], func_single: tp.Optional[UFunc], func_map: tp.Sequence[tp.Tuple[tp.Hashable, UFunc]], func_no: bool, fill_value: tp.Any, fill_value_dtype: np.dtype, index_outer: 'IndexBase', dtypes: tp.Tuple[tp.Optional[np.dtype]], kind: str, ) -> tp.List[np.ndarray]: ''' Given a Frame and pivot parameters, perform the group by ont he group_fields and within each group, ''' # NOTE: this delivers results by label, row for use in a Frame.from_records_items constructor group_key = group_fields_iloc if group_depth > 1 else group_fields_iloc[ 0] #type: ignore arrays: tp.List[tp.Union[tp.List[tp.Any], np.ndarray]] = [] for dtype in dtypes: if dtype is None: # we can use fill_value here, as either it will be completely replaced (and not effect dtype evaluation) or be needed (and already there) arrays.append([fill_value] * len(index_outer)) else: arrays.append(np.empty(len(index_outer), dtype=dtype)) # try to use the dtype specified; fill values at end if necessary # collect all possible ilocs, and remove as observerd; if any remain, we have fill targets iloc_not_found: tp.Set[int] = set(range(len(index_outer))) # each group forms a row, each label a value in the index for label, _, part in blocks.group(axis=0, key=group_key, kind=kind): iloc: int = index_outer._loc_to_iloc(label) #type: ignore iloc_not_found.remove(iloc) if func_no: if len(part) != 1: raise RuntimeError( 'pivot requires aggregation of values; provide a `func` argument.' ) for arrays_key, column_key in enumerate(data_fields_iloc): # this is equivalent to extracting a row, but doing so would force a type consolidation arrays[arrays_key][iloc] = part._extract(0, column_key) elif func_single: for arrays_key, column_key in enumerate(data_fields_iloc): arrays[arrays_key][iloc] = func_single( part._extract_array_column(column_key)) else: arrays_key = 0 for column_key in data_fields_iloc: values = part._extract_array_column(column_key) for _, func in func_map: arrays[arrays_key][iloc] = func(values) arrays_key += 1 if iloc_not_found: # we did not fill all arrrays and have values that need to be filled # order does not matter fill_targets = list(iloc_not_found) # mutate in place then make immutable for arrays_key in range(len(arrays)): #pylint: disable=C0200 array = arrays[arrays_key] if not array.__class__ is np.ndarray: # a list array, _ = iterable_to_array_1d(array, count=len(index_outer)) arrays[arrays_key] = array # restore new array else: dtype_resolved = resolve_dtype( array.dtype, fill_value_dtype) # type: ignore if array.dtype != dtype_resolved: # type: ignore array = array.astype(dtype_resolved) #type: ignore array[fill_targets] = fill_value arrays[arrays_key] = array # re-assign new array array.flags.writeable = False # type: ignore else: for arrays_key in range(len(arrays)): #pylint: disable=C0200 array = arrays[arrays_key] if not array.__class__ is np.ndarray: # a list array, _ = iterable_to_array_1d(array, count=len(index_outer)) arrays[arrays_key] = array # re-assign new array array.flags.writeable = False return arrays