Esempio n. 1
0
    def test_intersect2d_a(self):
        a = np.array([('a', 'b'), ('c', 'd'), ('e', 'f')])
        b = np.array([('a', 'g'), ('c', 'd'), ('e', 'f')])

        post = intersect2d(a, b)
        self.assertEqual(post.tolist(), [['c', 'd'], ['e', 'f']])

        post = intersect2d(a.astype(object), b.astype(object))
        self.assertEqual(post.tolist(), [['c', 'd'], ['e', 'f']])

        post = union2d(a, b)
        self.assertEqual(post.tolist(),
                         [['a', 'b'], ['a', 'g'], ['c', 'd'], ['e', 'f']])
        post = union2d(a.astype(object), b.astype(object))
        self.assertEqual(post.tolist(),
                         [['a', 'b'], ['a', 'g'], ['c', 'd'], ['e', 'f']])
Esempio n. 2
0
 def test_intersect2d(self, arrays: tp.Sequence[np.ndarray]) -> None:
     post = util.intersect2d(arrays[0], arrays[1], assume_unique=False)
     self.assertTrue(post.ndim == 2)
     self.assertTrue(
         len(post) == len(
             set(util.array2d_to_tuples(arrays[0]))
             & set(util.array2d_to_tuples(arrays[1]))))
Esempio n. 3
0
    def from_correspondence(cls, src_index: 'Index',
                            dst_index: 'Index') -> 'IndexCorrespondence':
        '''
        Return an IndexCorrespondence instance from the correspondence of two Index or IndexHierarchy objects.
        '''
        mixed_depth = False
        if src_index.depth == dst_index.depth:
            depth = src_index.depth
        else:
            # if dimensions are mixed, the only way there can be a match is if the 1D index is of object type (so it can hold a tuple); otherwise, there can be no matches;
            if src_index.depth == 1 and src_index.values.dtype.kind == 'O':
                depth = dst_index.depth
                mixed_depth = True
            elif dst_index.depth == 1 and dst_index.values.dtype.kind == 'O':
                depth = src_index.depth
                mixed_depth = True
            else:
                depth = 0

        # need to use lower level array methods go get intersection, rather than Index methods, as need arrays, not Index objects
        if depth == 1:
            # NOTE: this can fail in some cases: comparing two object arrays with NaNs and strings.
            common_labels = intersect1d(src_index.values,
                                        dst_index.values,
                                        assume_unique=True)
            has_common = len(common_labels) > 0
            assert not mixed_depth
        elif depth > 1:
            # if either values arrays are object, we have to covert all values to tuples
            common_labels = intersect2d(src_index.values,
                                        dst_index.values,
                                        assume_unique=True)
            if mixed_depth:
                # when mixed, on the 1D index we have to use loc_to_iloc with tuples
                common_labels = list(array2d_to_tuples(common_labels))
            has_common = len(common_labels) > 0
        else:
            has_common = False

        size = len(dst_index.values)

        # either a reordering or a subset
        if has_common:

            if len(common_labels) == len(dst_index):
                # use new index to retain order
                values_dst = dst_index.values
                if values_dst.dtype == DTYPE_BOOL:
                    # if the index values are a Boolean array, loc_to_iloc will try to do a Boolean selection, which is incorrect. Using a list avoids this problem.
                    iloc_src = src_index.loc_to_iloc(values_dst.tolist())
                else:
                    iloc_src = src_index.loc_to_iloc(values_dst)
                iloc_dst = np.arange(size)
                return cls(has_common=has_common,
                           is_subset=True,
                           iloc_src=iloc_src,
                           iloc_dst=iloc_dst,
                           size=size)

            # these will be equal sized
            iloc_src = src_index.loc_to_iloc(common_labels)
            iloc_dst = dst_index.loc_to_iloc(common_labels)

            # if iloc_src.dtype != int:
            #     import ipdb; ipdb.set_trace()
            return cls(has_common=has_common,
                       is_subset=False,
                       iloc_src=iloc_src,
                       iloc_dst=iloc_dst,
                       size=size)

        return cls(has_common=has_common,
                   is_subset=False,
                   iloc_src=None,
                   iloc_dst=None,
                   size=size)