Esempio n. 1
0
    def test_resolve_dtype_iter_a(self) -> None:

        a1 = np.array([1, 2, 3])
        a2 = np.array([False, True, False])
        a3 = np.array(['b', 'c', 'd'])
        a4 = np.array([2.3, 3.2])
        a5 = np.array(['test', 'test again'], dtype='S')
        a6 = np.array([2.3,5.4], dtype='float32')

        self.assertEqual(resolve_dtype_iter((a1.dtype, a1.dtype)), a1.dtype)
        self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype)), a2.dtype)

        # boolean with mixed types
        self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype, a3.dtype)), np.object_)
        self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype, a5.dtype)), np.object_)
        self.assertEqual(resolve_dtype_iter((a2.dtype, a2.dtype, a6.dtype)), np.object_)

        # numerical types go to float64
        self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype)), np.float64)

        # add in bool or str, goes to object
        self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a2.dtype)), np.object_)
        self.assertEqual(resolve_dtype_iter((a1.dtype, a4.dtype, a6.dtype, a5.dtype)), np.object_)

        # mixed strings go to the largest
        self.assertEqual(resolve_dtype_iter((a3.dtype, a5.dtype)), np.dtype('<U10'))
Esempio n. 2
0
    def values(self) -> np.ndarray:
        '''
        Return a single immutable NumPy 2D array of all labels found in this IndexLevels instance. This may coerce types.
        '''
        depth_count = self.depth
        shape = self.__len__(), depth_count

        # need to get a compatible dtype for all dtypes
        dtype = resolve_dtype_iter(self.dtypes_iter())
        labels = np.empty(shape, dtype=dtype)

        row_count = 0
        levels = deque(((self, 0, None), ))  # order matters

        while levels:
            level, depth, row_previous = levels.popleft()

            if level.targets is None:
                rows = len(level.index.values)
                row_slice = slice(row_count, row_count + rows)
                labels[row_slice, :] = row_previous
                labels[row_slice, depth] = level.index.values
                row_count += rows

            else:  # target is iterable np.ndaarray
                depth_next = depth + 1
                for label, level_target in zip(level.index.values,
                                               level.targets):
                    if row_previous is None:
                        # shown to be faster to allocate entire row width
                        row = np.empty(depth_count, dtype=dtype)
                    else:
                        row = row_previous.copy()
                    row[depth] = label
                    levels.append((level_target, depth_next, row))

        labels.flags.writeable = False
        return labels
Esempio n. 3
0
 def test_concat_resolved_axis_1(self, arrays: tp.List[np.ndarray]) -> None:
     array = util.concat_resolved(arrays, axis=1)
     self.assertEqual(array.ndim, 2)
     self.assertEqual(array.dtype,
                      util.resolve_dtype_iter((x.dtype for x in arrays)))
Esempio n. 4
0
    def test_resolve_dtype_iter(self, dtypes: tp.Iterable[np.dtype]) -> None:

        x = util.resolve_dtype_iter(dtypes)
        self.assertTrue(isinstance(x, np.dtype))
Esempio n. 5
0
 def dtype_per_depth(self) -> tp.Iterator[np.dtype]:
     '''Return a tuple of resolved dtypes, one from each depth level.'''
     depth_count = self.depth
     for d in range(depth_count):
         yield resolve_dtype_iter(self.dtypes_at_depth(d))
Esempio n. 6
0
def pivot_index_map(
    *,
    index_src: IndexBase,
    depth_level: DepthLevelSpecifier,
    dtypes_src: tp.Optional[tp.Sequence[np.dtype]],
) -> PivotIndexMap:
    '''
    Args:
        dtypes_src: must be of length equal to axis
    '''
    # We are always moving levels from one axis to another; after application, the expanded axis will always be hierarchical, while the contracted axis may or may not be. From the contract axis, we need to divide the depths into two categories: targets (the depths to be moved and added to expand axis) and groups (unique combinations that remain on the contract axis after removing targets).

    # Unique target labels are added to labels on the expand axis; unique group labels become the new contract axis.

    target_select = np.full(index_src.depth, False)
    target_select[depth_level] = True
    group_select = ~target_select

    group_arrays = []
    target_arrays = []
    for i, v in enumerate(target_select):
        if v:
            target_arrays.append(index_src.values_at_depth(i))
        else:
            group_arrays.append(index_src.values_at_depth(i))

    group_depth = len(group_arrays)
    target_depth = len(target_arrays)
    group_to_dtype: tp.Dict[tp.Optional[tp.Hashable], np.dtype] = {}
    targets_unique: tp.Iterable[tp.Hashable]

    if group_depth == 0:
        # targets must be a tuple
        group_to_target_map = {
            None: {v: idx
                   for idx, v in enumerate(zip(*target_arrays))}
        }
        targets_unique = [k for k in group_to_target_map[None]]
        if dtypes_src is not None:
            group_to_dtype[None] = resolve_dtype_iter(dtypes_src)
    else:
        group_to_target_map = defaultdict(dict)
        targets_unique = dict()  # Store targets in order observed

        for axis_idx, (group, target, dtype) in enumerate(
                zip(
                    zip(*group_arrays),  # get tuples of len 1 to depth
                    zip(*target_arrays),
                    (dtypes_src if dtypes_src is not None else repeat(None)),
                )):
            if group_depth == 1:
                group = group[0]
            # targets are transfered labels; groups are the new columns
            group_to_target_map[group][target] = axis_idx
            targets_unique[target] = None  #type: ignore

            if dtypes_src is not None:
                if group in group_to_dtype:
                    group_to_dtype[group] = resolve_dtype(
                        group_to_dtype[group], dtype)
                else:
                    group_to_dtype[group] = dtype

    return PivotIndexMap(  #pylint: disable=E1120
        targets_unique=targets_unique,
        target_depth=target_depth,
        target_select=target_select,
        group_to_target_map=group_to_target_map,  #type: ignore
        group_depth=group_depth,
        group_select=group_select,
        group_to_dtype=group_to_dtype)
Esempio n. 7
0
 def test_concat_resolved_axis_0(self, arrays):
     array = util.concat_resolved(arrays, axis=0)
     self.assertEqual(array.ndim, 2)
     self.assertEqual(array.dtype, util.resolve_dtype_iter((x.dtype for x in arrays)))
Esempio n. 8
0
    def test_resolve_dtype_iter(self, dtypes):

        x = util.resolve_dtype_iter(dtypes)
        self.assertTrue(isinstance(x, np.dtype))