Ejemplo n.º 1
0
 def test_construction_from_string(self):
     result = DatetimeTZDtype('datetime64[ns, US/Eastern]')
     self.assertTrue(is_dtype_equal(self.dtype, result))
     result = DatetimeTZDtype.construct_from_string(
         'datetime64[ns, US/Eastern]')
     self.assertTrue(is_dtype_equal(self.dtype, result))
     self.assertRaises(TypeError,
                       lambda: DatetimeTZDtype.construct_from_string('foo'))
Ejemplo n.º 2
0
 def test_construction_from_string(self):
     result = DatetimeTZDtype('datetime64[ns, US/Eastern]')
     self.assertTrue(is_dtype_equal(self.dtype, result))
     result = DatetimeTZDtype.construct_from_string(
         'datetime64[ns, US/Eastern]')
     self.assertTrue(is_dtype_equal(self.dtype, result))
     self.assertRaises(TypeError,
                       lambda: DatetimeTZDtype.construct_from_string('foo'))
Ejemplo n.º 3
0
def union_categoricals(to_union):
    """
    Combine list-like of Categoricals, unioning categories. All
    must have the same dtype, and none can be ordered.

    .. versionadded:: 0.19.0

    Parameters
    ----------
    to_union : list-like of Categoricals

    Returns
    -------
    Categorical
       A single array, categories will be ordered as they
       appear in the list

    Raises
    ------
    TypeError
        If any of the categoricals are ordered or all do not
        have the same dtype
    ValueError
        Emmpty list of categoricals passed
    """
    from pandas import Index, Categorical

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    first = to_union[0]
    if any(c.ordered for c in to_union):
        raise TypeError("Can only combine unordered Categoricals")

    if not all(
            com.is_dtype_equal(c.categories.dtype, first.categories.dtype)
            for c in to_union):
        raise TypeError("dtype of categories must be the same")

    cats = first.categories
    unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
    categories = Index(unique_cats)

    new_codes = []
    for c in to_union:
        indexer = categories.get_indexer(c.categories)
        new_codes.append(indexer.take(c.codes))
    codes = np.concatenate(new_codes)
    return Categorical(codes,
                       categories=categories,
                       ordered=False,
                       fastpath=True)
Ejemplo n.º 4
0
    def test_equality(self):
        self.assertTrue(is_dtype_equal(self.dtype, "datetime64[ns, US/Eastern]"))
        self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype("ns", "US/Eastern")))
        self.assertFalse(is_dtype_equal(self.dtype, "foo"))
        self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype("ns", "CET")))
        self.assertFalse(is_dtype_equal(DatetimeTZDtype("ns", "US/Eastern"), DatetimeTZDtype("ns", "US/Pacific")))

        # numpy compat
        self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"), "datetime64[ns]"))
Ejemplo n.º 5
0
    def test_equality(self):
        self.assertTrue(is_dtype_equal(self.dtype, 'datetime64[ns, US/Eastern]'))
        self.assertTrue(is_dtype_equal(self.dtype, DatetimeTZDtype('ns','US/Eastern')))
        self.assertFalse(is_dtype_equal(self.dtype, 'foo'))
        self.assertFalse(is_dtype_equal(self.dtype, DatetimeTZDtype('ns','CET')))
        self.assertFalse(is_dtype_equal(DatetimeTZDtype('ns','US/Eastern'), DatetimeTZDtype('ns','US/Pacific')))

        # numpy compat
        self.assertTrue(is_dtype_equal(np.dtype("M8[ns]"),"datetime64[ns]"))
Ejemplo n.º 6
0
def union_categoricals(to_union):
    """
    Combine list-like of Categoricals, unioning categories. All
    must have the same dtype, and none can be ordered.

    .. versionadded:: 0.19.0

    Parameters
    ----------
    to_union : list-like of Categoricals

    Returns
    -------
    Categorical
       A single array, categories will be ordered as they
       appear in the list

    Raises
    ------
    TypeError
        If any of the categoricals are ordered or all do not
        have the same dtype
    ValueError
        Emmpty list of categoricals passed
    """
    from pandas import Index, Categorical

    if len(to_union) == 0:
        raise ValueError('No Categoricals to union')

    first = to_union[0]
    if any(c.ordered for c in to_union):
        raise TypeError("Can only combine unordered Categoricals")

    if not all(com.is_dtype_equal(c.categories.dtype, first.categories.dtype)
               for c in to_union):
        raise TypeError("dtype of categories must be the same")

    cats = first.categories
    unique_cats = cats.append([c.categories for c in to_union[1:]]).unique()
    categories = Index(unique_cats)

    new_codes = []
    for c in to_union:
        indexer = categories.get_indexer(c.categories)
        new_codes.append(indexer.take(c.codes))
    codes = np.concatenate(new_codes)
    return Categorical(codes, categories=categories, ordered=False,
                       fastpath=True)
Ejemplo n.º 7
0
    def equals(self, other):
        """
        Determines if two Index objects contain the same elements.
        """
        if self is other:
            return True

        # need to compare nans locations and make sure that they are the same
        # since nans don't compare equal this is a bit tricky
        try:
            if not isinstance(other, Float64Index):
                other = self._constructor(other)
            if (not is_dtype_equal(self.dtype, other.dtype) or
                    self.shape != other.shape):
                return False
            left, right = self._values, other._values
            return ((left == right) | (self._isnan & other._isnan)).all()
        except (TypeError, ValueError):
            return False
Ejemplo n.º 8
0
    def __new__(cls, data=None, dtype=None, copy=False, name=None,
                fastpath=False):

        if fastpath:
            return cls._simple_new(data, name=name)

        # isscalar, generators handled in coerce_to_ndarray
        data = cls._coerce_to_ndarray(data)

        if issubclass(data.dtype.type, compat.string_types):
            cls._string_data_error(data)

        if copy or not is_dtype_equal(data.dtype, cls._default_dtype):
            subarr = np.array(data, dtype=cls._default_dtype, copy=copy)
            cls._assert_safe_casting(data, subarr)
        else:
            subarr = data

        if name is None and hasattr(data, 'name'):
            name = data.name
        return cls._simple_new(subarr, name=name)
Ejemplo n.º 9
0
 def test_construction_from_string(self):
     result = CategoricalDtype.construct_from_string("category")
     self.assertTrue(is_dtype_equal(self.dtype, result))
     self.assertRaises(TypeError, lambda: CategoricalDtype.construct_from_string("foo"))
Ejemplo n.º 10
0
 def test_equality(self):
     self.assertTrue(is_dtype_equal(self.dtype, "category"))
     self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype()))
     self.assertFalse(is_dtype_equal(self.dtype, "foo"))
Ejemplo n.º 11
0
 def test_construction_from_string(self):
     result = CategoricalDtype.construct_from_string('category')
     self.assertTrue(is_dtype_equal(self.dtype, result))
     self.assertRaises(TypeError, lambda : CategoricalDtype.construct_from_string('foo'))
Ejemplo n.º 12
0
 def test_equality(self):
     self.assertTrue(is_dtype_equal(self.dtype, 'category'))
     self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype()))
     self.assertFalse(is_dtype_equal(self.dtype, 'foo'))
Ejemplo n.º 13
0
    def _maybe_add_join_keys(self, result, left_indexer, right_indexer):

        left_has_missing = None
        right_has_missing = None

        keys = zip(self.join_names, self.left_on, self.right_on)
        for i, (name, lname, rname) in enumerate(keys):
            if not _should_fill(lname, rname):
                continue

            take_left, take_right = None, None

            if name in result:

                if left_indexer is not None and right_indexer is not None:
                    if name in self.left:

                        if left_has_missing is None:
                            left_has_missing = any(left_indexer == -1)

                        if left_has_missing:
                            take_right = self.right_join_keys[i]

                            if not com.is_dtype_equal(result[name].dtype,
                                                      self.left[name].dtype):
                                take_left = self.left[name]._values

                    elif name in self.right:

                        if right_has_missing is None:
                            right_has_missing = any(right_indexer == -1)

                        if right_has_missing:
                            take_left = self.left_join_keys[i]

                            if not com.is_dtype_equal(result[name].dtype,
                                                      self.right[name].dtype):
                                take_right = self.right[name]._values

            elif left_indexer is not None \
                    and isinstance(self.left_join_keys[i], np.ndarray):

                take_left = self.left_join_keys[i]
                take_right = self.right_join_keys[i]

            if take_left is not None or take_right is not None:

                if take_left is None:
                    lvals = result[name]._values
                else:
                    lfill = na_value_for_dtype(take_left.dtype)
                    lvals = algos.take_1d(take_left, left_indexer,
                                          fill_value=lfill)

                if take_right is None:
                    rvals = result[name]._values
                else:
                    rfill = na_value_for_dtype(take_right.dtype)
                    rvals = algos.take_1d(take_right, right_indexer,
                                          fill_value=rfill)

                # if we have an all missing left_indexer
                # make sure to just use the right values
                mask = left_indexer == -1
                if mask.all():
                    key_col = rvals
                else:
                    key_col = Index(lvals).where(~mask, rvals)

                if name in result:
                    result[name] = key_col
                else:
                    result.insert(i, name or 'key_%d' % i, key_col)