def _compute_ndim(row_loc, col_loc=None): row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc) col_scalar = is_scalar(col_loc) or util.is_tuple(col_loc) if row_scalar and col_scalar: ndim = 0 elif row_scalar ^ col_scalar: ndim = 1 else: ndim = 2 return ndim
def _validate_locator(self, row_loc): if util.is_tuple(row_loc): if len(row_loc) > 1: raise ValueError("Too many indexers") row_loc = row_loc[0] if isinstance(row_loc, slice) and row_loc.step is not None: raise err._unsupported_error( "row slicer cannot have a step for now") row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc) if self.is_at: if self.is_loc: if not row_scalar: raise ValueError( "At based indexing can only have scalar indexers") else: if not is_integer(row_loc): raise ValueError( "iAt based indexing can only have integer indexers") return (row_loc, row_scalar, _compute_ndim(row_loc))
def _validate_locators(self, tup): if util.is_tuple(tup) and len(tup) >= 1: if len(tup) > 2: raise ValueError("Too many indexers") row_loc = tup[0] col_loc = tup[1] if len(tup) == 2 else slice(None) else: row_loc = tup col_loc = slice(None) if isinstance(row_loc, slice) and row_loc.step is not None: raise err._unsupported_error( "row slicer cannot have a step for now") row_scalar = is_scalar(row_loc) or util.is_tuple(row_loc) col_scalar = is_scalar(col_loc) or util.is_tuple(col_loc) if self.is_at: if not util.is_tuple(tup) or len(tup) != 2: raise ValueError("Need two indexers") if self.is_loc: if not row_scalar or not col_scalar: raise ValueError( "At based indexing can only have scalar indexers") else: if not is_integer(row_loc) or not is_integer(col_loc): raise ValueError( "iAt based indexing can only have integer indexers") return ( row_loc, [col_loc] if col_scalar else col_loc, row_scalar, col_scalar, _compute_ndim(row_loc, col_loc), )
def _binary_op(self, op, other): assert op in self._SUPPORTED_BINOPS if not util.is_tuple(other): other = (other, ) results = [ self._levels[lvl]._binary_op(op, oth) for lvl, oth in enumerate(other) ] result = results[0] if len(results) > 1: op = self._MERGE_OPS[op] for other in results[1:]: result = result.binary_op(op, other, result.dtype) return result
def _validate_labels(index, labels, level, membership=True): for label in labels: if not util.is_tuple(label): continue if len(label) > index.nlevels: raise KeyError(f"Key length ({len(label)}) exceeds " f"index depth ({index.nlevels})") if not membership: return if level is not None: level = index._get_level_number(level) index = index.get_level_values(level) for label in labels: if label not in index: raise KeyError(label)
def _ensure_valid_frame(self, data, copy=False): if is_scalar(data) or util.is_tuple(data): return data elif isinstance(data, Frame): return data.copy(deep=copy) elif isinstance(data, pandas.DataFrame): from .dataframe import DataFrame return DataFrame(data) elif isinstance(data, pandas.Series): from .series import Series return Series(data) elif isinstance(data, np.ndarray): # TODO: Here we assume that the axis to which we align the ndarray # is the index, but we really should be choosing between # the index and the columns, depending on the axis argument. if data.ndim == 1: from .series import Series if len(self) != len(data): raise ValueError( f"Length of passed values is {len(self)}, " f"index implies {len(data)}.") name = self.name if self._is_series else None return Series(data, name=name, index=self._raw_index) elif data.ndim == 2: if self._is_series: raise Exception("Data must be 1-dimensional") from .dataframe import DataFrame return DataFrame(data, columns=self.columns, index=self._raw_index) else: raise ValueError("array must be either 1-d or 2-d") elif is_list_like(data): return self._ensure_valid_frame(np.array(data)) else: raise ValueError(f"unsupported value type '{type(data)}'")
def __init__( self, left, right, how="inner", on=None, left_on=None, right_on=None, left_index=False, right_index=False, sort=False, suffixes=("_x", "_y"), copy=True, **kwargs, ): import pandas from .frame import Frame if not isinstance(right, (Frame, pandas.Series, pandas.DataFrame)): raise TypeError(f"Can only merge Series or DataFrame objects, " f"a {type(right)} was passed") right = left._ensure_valid_frame(right) if right._is_series and right.name is None: raise ValueError("Cannot merge a Series without a name") # Checks overlap between column names if not util.is_tuple(suffixes) or len(suffixes) != 2: raise ValueError(f"Invalid suffixes: {suffixes}") if any(not isinstance(suffix, str) for suffix in suffixes): raise ValueError("Suffixes must be strings, but got {suffixes}") l_suffix, r_suffix = suffixes left_columns = left._get_columns() right_columns = right._get_columns() intersection = left_columns.intersection(right_columns) if len(intersection) != 0 and not (bool(l_suffix) or bool(r_suffix)): raise ValueError( f"columns overlap but no suffix specified: {intersection}") # Perform Legate specific checks method = kwargs.get("method", "hash") if how not in ( "inner", "left", "outer", ): raise err._unsupported_error("how", how) if how == "outer" and method == "broadcast": raise ValueError("Broadcast join cannot be used for outer join") if copy not in (True, ): raise err._unsupported_error("copy", copy) if sort not in (False, ): raise err._unsupported_error("sort", sort) self._left = left self._right = right self._how = how self._on = on self._left_on = left_on self._right_on = right_on self._left_index = left_index self._right_index = right_index self._left_columns = left_columns self._right_columns = right_columns self._sort = sort self._suffixes = suffixes self._copy = copy self._method = method
def drop( self, labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors="raise", ): # If 'labels' is set, we use 'axis' to determine the lookup axis if labels is not None: if index is not None or columns is not None: raise ValueError( "Cannot specify both 'labels' and 'index'/'columns'") axis = self._get_axis_number(axis) if axis == 0: row_labels = util.to_list_if_scalar(labels) row_level = level col_labels = [] col_level = None else: row_labels = [] row_level = None col_labels = util.to_list_if_scalar(labels) col_level = level # Otherwise, we use 'columns' and 'index' as lookup labels else: if not self._is_series and columns is not None: col_labels = util.to_list_if_scalar(columns) col_level = level if index is not None: row_labels = util.to_list_if_scalar(index) row_level = level def _validate_labels(index, labels, level, membership=True): for label in labels: if not util.is_tuple(label): continue if len(label) > index.nlevels: raise KeyError(f"Key length ({len(label)}) exceeds " f"index depth ({index.nlevels})") if not membership: return if level is not None: level = index._get_level_number(level) index = index.get_level_values(level) for label in labels: if label not in index: raise KeyError(label) new_self = self.copy(deep=False) # Drop columns first as that's easier if len(col_labels) > 0: assert not new_self._is_series _validate_labels(new_self.columns, col_labels, col_level) columns = new_self.columns.drop(col_labels, level) idxr = new_self.columns.get_indexer_for(columns) new_self = new_self._slice_columns(idxr) # Then drop rows using selection if len(row_labels) > 0: _validate_labels(new_self._raw_index, row_labels, row_level, False) if len(row_labels) > 1: raise err._unsupported_error("Label must be a scalar for now") row_label = row_labels[0] if level is not None and not is_scalar(row_label): raise ValueError("label must be a scalar when 'level' is set") if util.is_tuple(row_label) and len(row_label) == 0: raise ValueError("label must not be empty") mask = new_self._raw_index._get_drop_mask_for(row_label, level) new_frame = new_self._frame.select(mask) new_self._frame = new_frame if inplace: if self._is_series: self._update_frame(new_self._frame) else: self._update_frame(new_self._frame, columns=new_self.columns) else: return new_self