Example #1
0
 def dropna(self, axis=0, inplace=False, **kwargs):
     col = _col(self.to_dataframe().dropna(axis=axis, inplace=False))
     if inplace:
         anchor_wrap(col, self)
         self._jc = col._jc
         self._pandas_schema = None
         self._pandas_metadata = None
     else:
         return col
Example #2
0
    def _pd_getitem(self, key):
        if key is None:
            raise KeyError("none key")
        if isinstance(key, string_types):
            try:
                return self._spark_getitem(key)
            except AnalysisException:
                raise KeyError(key)
        if np.isscalar(key) or isinstance(key, (tuple, string_types)):
            raise NotImplementedError(key)
        elif isinstance(key, slice):
            return self.loc[key]

        if isinstance(key, (pd.Series, np.ndarray, pd.Index)):
            raise NotImplementedError(key)
        if isinstance(key, list):
            return self.loc[:, key]
        if isinstance(key, DataFrame):
            # TODO Should not implement alignment, too dangerous?
            return self._spark_getitem(key)
        if isinstance(key, Column):
            # TODO Should not implement alignment, too dangerous?
            # It is assumed to be only a filter, otherwise .loc should be used.
            bcol = key.cast("boolean")
            df = self._spark_filter(bcol)
            df._metadata = self._metadata
            return anchor_wrap(self, df)
        raise NotImplementedError(key)
Example #3
0
 def getField(self, name):
     if not isinstance(self.schema, StructType):
         raise AttributeError("Not a struct: {}".format(self.schema))
     else:
         fnames = self.schema.fieldNames()
         if name not in fnames:
             raise AttributeError(
                 "Field {} not found, possible values are {}".format(name, ", ".join(fnames)))
         return anchor_wrap(self, self._spark_getField(name))
Example #4
0
 def rename(self, index=None, **kwargs):
     if index is None:
         return self
     col = self._spark_alias(index)
     if kwargs.get('inplace', False):
         self._jc = col._jc
         self._pandas_schema = None
         self._pandas_metadata = None
         return self
     else:
         return anchor_wrap(self, col)
Example #5
0
    def reset_index(self, level=None, drop=False, name=None, inplace=False):
        if inplace and not drop:
            raise TypeError('Cannot reset_index inplace on a Series to create a DataFrame')

        if name is not None:
            df = self.rename(name).to_dataframe()
        else:
            df = self.to_dataframe()
        df = df.reset_index(level=level, drop=drop)
        if drop:
            col = _col(df)
            if inplace:
                anchor_wrap(col, self)
                self._jc = col._jc
                self._pandas_schema = None
                self._pandas_metadata = None
            else:
                return col
        else:
            return df
Example #6
0
 def astype(self, dtype):
     from databricks.koalas.typing import as_spark_type
     spark_type = as_spark_type(dtype)
     if not spark_type:
         raise ValueError("Type {} not understood".format(dtype))
     return anchor_wrap(self, self._spark_cast(spark_type))
Example #7
0
 def __invert__(self):
     return anchor_wrap(self, self.astype(bool) == F._spark_lit(False))
Example #8
0
 def __getattr__(self, item):
     if item.startswith("__") or item.startswith("_pandas_") or item.startswith("_spark_"):
         raise AttributeError(item)
     return anchor_wrap(self, self.getField(item))
Example #9
0
 def __getitem__(self, key):
     return anchor_wrap(self, self._spark_getitem(key))
Example #10
0
 def isnull(self):
     if isinstance(self.schema[self.name].dataType, (FloatType, DoubleType)):
         return anchor_wrap(self, self._spark_isNull() | F._spark_isnan(self))
     else:
         return anchor_wrap(self, self._spark_isNull())
Example #11
0
 def _index_columns(self):
     return [
         anchor_wrap(self, self._spark_getitem(field))
         for field in self._metadata.index_fields
     ]
Example #12
0
 def __getattr__(self, key):
     if key.startswith("__") or key.startswith(
             "_pandas_") or key.startswith("_spark_"):
         raise AttributeError(key)
     return anchor_wrap(self, self._spark_getattr(key))
Example #13
0
 def get(self, key, default=None):
     try:
         return anchor_wrap(self, self._pd_getitem(key))
     except (KeyError, ValueError, IndexError):
         return default