Ejemplo n.º 1
0
 def __getitem__(self, arg):
     from cudf.dataframe.series import Series
     from cudf.dataframe.index import Index
     if isinstance(
             arg,
         (list, np.ndarray, pd.Series, range, Index, DeviceNDArray)):
         if len(arg) == 0:
             arg = Series(np.array([], dtype='int32'))
         else:
             arg = Series(arg)
     if isinstance(arg, Series):
         if arg.dtype in [np.bool, np.bool_]:
             return self._sr.iloc[arg]
         # To do this efficiently we need a solution to
         # https://github.com/rapidsai/cudf/issues/1087
         out = Series([],
                      dtype=self._sr.dtype,
                      index=self._sr.index.__class__([]))
         for s in arg:
             out = out.append(self._sr.loc[s:s], ignore_index=False)
         return out
     elif is_single_value(arg):
         found_index = self._sr.index.find_label_range(arg, None)[0]
         return self._sr.iloc[found_index]
     elif isinstance(arg, slice):
         start_index, stop_index = self._sr.index.find_label_range(
             arg.start, arg.stop)
         return self._sr.iloc[start_index:stop_index:arg.step]
     else:
         raise NotImplementedError(
             ".loc not implemented for label type {}".format(
                 type(arg).__name__))
Ejemplo n.º 2
0
    def _downcast_to_series(self, df, arg):
        """
        "Downcast" from a DataFrame to a Series
        based on Pandas indexing rules
        """
        nrows, ncols = df.shape
        # determine the axis along which the Series is taken:
        if nrows == 1 and ncols == 1:
            if not is_single_value(arg[0]):
                axis = 1
            else:
                axis = 0
        elif nrows == 1:
            axis = 0
        elif ncols == 1:
            axis = 1
        else:
            raise ValueError("Cannot downcast DataFrame selection to Series")

        # take series along the axis:
        if axis == 1:
            return df[df.columns[0]]
        else:
            df = _normalize_dtypes(df)
            sr = df.T
            return sr[sr.columns[0]]
Ejemplo n.º 3
0
    def fillna(self, fill_value, inplace=False):
        if is_single_value(fill_value):
            fill_value = np.datetime64(fill_value, 'ms')
        else:
            fill_value = columnops.as_column(fill_value, nan_as_null=False)

        result = cpp_replace.apply_replace_nulls(self, fill_value)

        result = result.replace(mask=None)
        return self._mimic_inplace(result, inplace)
Ejemplo n.º 4
0
 def _is_scalar_access(self, arg):
     """
     Determine if we are accessing a single value (scalar)
     """
     if isinstance(arg, str):
         return False
     if not hasattr(arg, '__len__'):
         return False
     for obj in arg:
         if not is_single_value(obj):
             return False
     return True
Ejemplo n.º 5
0
 def _can_downcast_to_series(self, df, arg):
     """
     This method encapsulates the logic used
     to determine whether or not the result of a loc/iloc
     operation should be "downcasted" from a DataFrame to a
     Series
     """
     nrows, ncols = df.shape
     if nrows == 1:
         if type(arg[0]) is slice:
             if not is_single_value(arg[1]):
                 return False
         dtypes = df.dtypes.values.tolist()
         all_numeric = all(
             [pd.api.types.is_numeric_dtype(t) for t in dtypes])
         all_identical = dtypes.count(dtypes[0]) == len(dtypes)
         if all_numeric or all_identical:
             return True
     if ncols == 1:
         if type(arg[1]) is slice:
             if not is_single_value(arg[0]):
                 return False
         return True
     return False
Ejemplo n.º 6
0
    def _get_column_selection(self, arg):
        if is_single_value(arg):
            return [arg]

        elif isinstance(arg, slice):
            start = self._df.columns[0] if arg.start is None else arg.start
            stop = self._df.columns[-1] if arg.stop is None else arg.stop
            cols = []
            within_slice = False
            for c in self._df.columns:
                if c == start:
                    within_slice = True
                if within_slice:
                    cols.append(c)
                if c == stop:
                    break
            return cols

        else:
            return arg
Ejemplo n.º 7
0
 def _get_column_selection(self, arg):
     cols = self._df.columns
     if is_single_value(arg):
         return [cols[arg]]
     else:
         return cols[arg]