Example #1
0
def _compute_ndim(row_loc, col_loc=None):
    row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc)
    col_scalar = is_scalar(col_loc) or util.is_tuple(col_loc)

    if row_scalar and col_scalar:
        ndim = 0
    elif row_scalar ^ col_scalar:
        ndim = 1
    else:
        ndim = 2

    return ndim
Example #2
0
    def _validate_locator(self, row_loc):
        if util.is_tuple(row_loc):
            if len(row_loc) > 1:
                raise ValueError("Too many indexers")
            row_loc = row_loc[0]

        if isinstance(row_loc, slice) and row_loc.step is not None:
            raise err._unsupported_error(
                "row slicer cannot have a step for now")

        row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc)

        if self.is_at:
            if self.is_loc:
                if not row_scalar:
                    raise ValueError(
                        "At based indexing can only have scalar indexers")
            else:
                if not is_integer(row_loc):
                    raise ValueError(
                        "iAt based indexing can only have integer indexers")

        return (row_loc, row_scalar, _compute_ndim(row_loc))
Example #3
0
    def _validate_locators(self, tup):
        if util.is_tuple(tup) and len(tup) >= 1:
            if len(tup) > 2:
                raise ValueError("Too many indexers")
            row_loc = tup[0]
            col_loc = tup[1] if len(tup) == 2 else slice(None)
        else:
            row_loc = tup
            col_loc = slice(None)

        if isinstance(row_loc, slice) and row_loc.step is not None:
            raise err._unsupported_error(
                "row slicer cannot have a step for now")

        row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc)
        col_scalar = is_scalar(col_loc) or util.is_tuple(col_loc)

        if self.is_at:
            if not util.is_tuple(tup) or len(tup) != 2:
                raise ValueError("Need two indexers")

            if self.is_loc:
                if not row_scalar or not col_scalar:
                    raise ValueError(
                        "At based indexing can only have scalar indexers")
            else:
                if not is_integer(row_loc) or not is_integer(col_loc):
                    raise ValueError(
                        "iAt based indexing can only have integer indexers")

        return (
            row_loc,
            [col_loc] if col_scalar else col_loc,
            row_scalar,
            col_scalar,
            _compute_ndim(row_loc, col_loc),
        )
Example #4
0
    def _binary_op(self, op, other):
        assert op in self._SUPPORTED_BINOPS

        if not util.is_tuple(other):
            other = (other, )

        results = [
            self._levels[lvl]._binary_op(op, oth)
            for lvl, oth in enumerate(other)
        ]

        result = results[0]
        if len(results) > 1:
            op = self._MERGE_OPS[op]
            for other in results[1:]:
                result = result.binary_op(op, other, result.dtype)
        return result
Example #5
0
        def _validate_labels(index, labels, level, membership=True):
            for label in labels:
                if not util.is_tuple(label):
                    continue
                if len(label) > index.nlevels:
                    raise KeyError(f"Key length ({len(label)}) exceeds "
                                   f"index depth ({index.nlevels})")

            if not membership:
                return

            if level is not None:
                level = index._get_level_number(level)
                index = index.get_level_values(level)

            for label in labels:
                if label not in index:
                    raise KeyError(label)
Example #6
0
    def _ensure_valid_frame(self, data, copy=False):
        if is_scalar(data) or util.is_tuple(data):
            return data
        elif isinstance(data, Frame):
            return data.copy(deep=copy)
        elif isinstance(data, pandas.DataFrame):
            from .dataframe import DataFrame

            return DataFrame(data)
        elif isinstance(data, pandas.Series):
            from .series import Series

            return Series(data)
        elif isinstance(data, np.ndarray):
            # TODO: Here we assume that the axis to which we align the ndarray
            #       is the index, but we really should be choosing between
            #       the index and the columns, depending on the axis argument.
            if data.ndim == 1:
                from .series import Series

                if len(self) != len(data):
                    raise ValueError(
                        f"Length of passed values is {len(self)}, "
                        f"index implies {len(data)}.")

                name = self.name if self._is_series else None
                return Series(data, name=name, index=self._raw_index)
            elif data.ndim == 2:
                if self._is_series:
                    raise Exception("Data must be 1-dimensional")

                from .dataframe import DataFrame

                return DataFrame(data,
                                 columns=self.columns,
                                 index=self._raw_index)
            else:
                raise ValueError("array must be either 1-d or 2-d")

        elif is_list_like(data):
            return self._ensure_valid_frame(np.array(data))

        else:
            raise ValueError(f"unsupported value type '{type(data)}'")
Example #7
0
    def __init__(
            self,
            left,
            right,
            how="inner",
            on=None,
            left_on=None,
            right_on=None,
            left_index=False,
            right_index=False,
            sort=False,
            suffixes=("_x", "_y"),
            copy=True,
            **kwargs,
    ):
        import pandas

        from .frame import Frame

        if not isinstance(right, (Frame, pandas.Series, pandas.DataFrame)):
            raise TypeError(f"Can only merge Series or DataFrame objects, "
                            f"a {type(right)} was passed")

        right = left._ensure_valid_frame(right)

        if right._is_series and right.name is None:
            raise ValueError("Cannot merge a Series without a name")

        # Checks overlap between column names
        if not util.is_tuple(suffixes) or len(suffixes) != 2:
            raise ValueError(f"Invalid suffixes: {suffixes}")
        if any(not isinstance(suffix, str) for suffix in suffixes):
            raise ValueError("Suffixes must be strings, but got {suffixes}")

        l_suffix, r_suffix = suffixes
        left_columns = left._get_columns()
        right_columns = right._get_columns()
        intersection = left_columns.intersection(right_columns)
        if len(intersection) != 0 and not (bool(l_suffix) or bool(r_suffix)):
            raise ValueError(
                f"columns overlap but no suffix specified: {intersection}")

        # Perform Legate specific checks
        method = kwargs.get("method", "hash")

        if how not in (
                "inner",
                "left",
                "outer",
        ):
            raise err._unsupported_error("how", how)

        if how == "outer" and method == "broadcast":
            raise ValueError("Broadcast join cannot be used for outer join")

        if copy not in (True, ):
            raise err._unsupported_error("copy", copy)

        if sort not in (False, ):
            raise err._unsupported_error("sort", sort)

        self._left = left
        self._right = right

        self._how = how
        self._on = on

        self._left_on = left_on
        self._right_on = right_on

        self._left_index = left_index
        self._right_index = right_index

        self._left_columns = left_columns
        self._right_columns = right_columns

        self._sort = sort
        self._suffixes = suffixes
        self._copy = copy
        self._method = method
Example #8
0
    def drop(
        self,
        labels=None,
        axis=0,
        index=None,
        columns=None,
        level=None,
        inplace=False,
        errors="raise",
    ):
        # If 'labels' is set, we use 'axis' to determine the lookup axis
        if labels is not None:
            if index is not None or columns is not None:
                raise ValueError(
                    "Cannot specify both 'labels' and 'index'/'columns'")
            axis = self._get_axis_number(axis)

            if axis == 0:
                row_labels = util.to_list_if_scalar(labels)
                row_level = level
                col_labels = []
                col_level = None
            else:
                row_labels = []
                row_level = None
                col_labels = util.to_list_if_scalar(labels)
                col_level = level

        # Otherwise, we use 'columns' and 'index' as lookup labels
        else:
            if not self._is_series and columns is not None:
                col_labels = util.to_list_if_scalar(columns)
                col_level = level
            if index is not None:
                row_labels = util.to_list_if_scalar(index)
                row_level = level

        def _validate_labels(index, labels, level, membership=True):
            for label in labels:
                if not util.is_tuple(label):
                    continue
                if len(label) > index.nlevels:
                    raise KeyError(f"Key length ({len(label)}) exceeds "
                                   f"index depth ({index.nlevels})")

            if not membership:
                return

            if level is not None:
                level = index._get_level_number(level)
                index = index.get_level_values(level)

            for label in labels:
                if label not in index:
                    raise KeyError(label)

        new_self = self.copy(deep=False)

        # Drop columns first as that's easier
        if len(col_labels) > 0:
            assert not new_self._is_series
            _validate_labels(new_self.columns, col_labels, col_level)
            columns = new_self.columns.drop(col_labels, level)
            idxr = new_self.columns.get_indexer_for(columns)
            new_self = new_self._slice_columns(idxr)

        # Then drop rows using selection
        if len(row_labels) > 0:
            _validate_labels(new_self._raw_index, row_labels, row_level, False)

            if len(row_labels) > 1:
                raise err._unsupported_error("Label must be a scalar for now")
            row_label = row_labels[0]

            if level is not None and not is_scalar(row_label):
                raise ValueError("label must be a scalar when 'level' is set")

            if util.is_tuple(row_label) and len(row_label) == 0:
                raise ValueError("label must not be empty")

            mask = new_self._raw_index._get_drop_mask_for(row_label, level)
            new_frame = new_self._frame.select(mask)
            new_self._frame = new_frame

        if inplace:
            if self._is_series:
                self._update_frame(new_self._frame)
            else:
                self._update_frame(new_self._frame, columns=new_self.columns)

        else:
            return new_self