Esempio n. 1
0
    def __getitem__(self, columns: List[Any]) -> "DataFrame":
        """Get certain columns of the dataframe as a new dataframe

        :raises FugueDataFrameOperationError: if ``columns`` are not strictly contained
          by this dataframe or it is empty
        :return: a new dataframe with these columns
        """
        try:
            schema = self.schema.extract(columns)
        except Exception as e:
            raise FugueDataFrameOperationError(e)
        if len(schema) == 0:
            raise FugueDataFrameOperationError(
                "must select at least one column")
        return self._select_cols(columns)
Esempio n. 2
0
 def _get_altered_schema(self, subschema: Any) -> Schema:
     sub = Schema(subschema)
     assert_or_throw(
         sub.names in self.schema,
         lambda: FugueDataFrameOperationError(
             f"{sub.names} are not all in {self.schema}"
         ),
     )
     for k, v in sub.items():
         old_type = self.schema[k].type
         new_type = v.type
         if not old_type.equals(new_type):
             assert_or_throw(
                 not pa.types.is_struct(old_type)
                 and not pa.types.is_list(old_type)
                 and not pa.types.is_binary(old_type),
                 lambda: NotImplementedError(f"can't convert from {old_type}"),
             )
             assert_or_throw(
                 not pa.types.is_struct(new_type)
                 and not pa.types.is_list(new_type)
                 and not pa.types.is_binary(new_type),
                 lambda: NotImplementedError(f"can't convert to {new_type}"),
             )
     return Schema([(k, sub.get(k, v)) for k, v in self.schema.items()])
Esempio n. 3
0
 def rename(self, columns: Dict[str, str]) -> "DataFrame":
     try:
         schema = self.schema.rename(columns)
     except Exception as e:
         raise FugueDataFrameOperationError(e)
     df = pa.Table.from_arrays(self.native.columns, schema=schema.pa_schema)
     return ArrowDataFrame(df)
Esempio n. 4
0
 def rename(self, columns: Dict[str, str]) -> "DataFrame":
     try:
         schema = self.schema.rename(columns)
     except Exception as e:
         raise FugueDataFrameOperationError(e)
     df = self.native.rename(columns=columns)
     return DaskDataFrame(df, schema, type_safe=False)
Esempio n. 5
0
 def rename(self, columns: Dict[str, str]) -> "DataFrame":
     try:
         schema = self.schema.rename(columns)
     except Exception as e:
         raise FugueDataFrameOperationError(e)
     df = self.native.rename(columns=columns)
     return PandasDataFrame(df, schema, pandas_df_wrapper=True)
Esempio n. 6
0
    def drop(self, columns: List[str]) -> "DataFrame":
        """Drop certain columns and return a new dataframe

        :param columns: columns to drop
        :raises FugueDataFrameOperationError: if
          ``columns`` are not strictly contained by this dataframe, or it is the
          entire dataframe columns
        :return: a new dataframe removing the columns
        """
        try:
            schema = self.schema - columns
        except Exception as e:
            raise FugueDataFrameOperationError(e)
        if len(schema) == 0:
            raise FugueDataFrameOperationError(
                "can't remove all columns of a dataframe")
        return self._drop_cols(columns)
Esempio n. 7
0
 def rename(self, columns: Dict[str, str]) -> "SparkDataFrame":
     try:
         self.schema.rename(columns)
     except Exception as e:
         raise FugueDataFrameOperationError(e)
     df = self.native
     for o, n in columns.items():
         df = df.withColumnRenamed(o, n)
     return SparkDataFrame(df)
Esempio n. 8
0
 def rename(self, columns: Dict[str, str]) -> "DataFrame":
     try:
         schema = self.schema.rename(columns)
     except Exception as e:
         raise FugueDataFrameOperationError(e)
     return ArrayDataFrame(self.native, schema)