예제 #1
0
 def _apply_schema(self, pdf: pd.DataFrame,
                   schema: Optional[Schema]) -> Tuple[pd.DataFrame, Schema]:
     PD_UTILS.ensure_compatible(pdf)
     if pdf.columns.dtype == "object":  # pdf has named schema
         pschema = _input_schema(pdf)
         if schema is None or pschema == schema:
             return pdf, pschema.assert_not_empty()
         pdf = pdf[schema.assert_not_empty().names]
     else:  # pdf has no named schema
         schema = _input_schema(schema).assert_not_empty()
         assert_or_throw(
             pdf.shape[1] == len(schema),
             ValueError(
                 f"Pandas datafame column count doesn't match {schema}"),
         )
         pdf.columns = schema.names
     return PD_UTILS.enforce_type(pdf, schema.pa_schema,
                                  null_safe=True), schema
예제 #2
0
 def __init__(  # noqa: C901
     self,
     df: Any = None,
     schema: Any = None,
     metadata: Any = None,
     pandas_df_wrapper: bool = False,
 ):
     try:
         apply_schema = True
         if df is None:
             schema = _input_schema(schema).assert_not_empty()
             df = []
         if isinstance(df, PandasDataFrame):
             # TODO: This is useless if in this way and wrong
             pdf = df.native
             schema = None
         elif isinstance(df, (pd.DataFrame, pd.Series)):
             if isinstance(df, pd.Series):
                 df = df.to_frame()
             pdf = df
             schema = None if schema is None else _input_schema(schema)
             if pandas_df_wrapper and schema is not None:
                 apply_schema = False
         elif isinstance(df, Iterable):
             schema = _input_schema(schema).assert_not_empty()
             pdf = pd.DataFrame(df, columns=schema.names)
             pdf = PD_UTILS.enforce_type(pdf,
                                         schema.pa_schema,
                                         null_safe=True)
             if PD_UTILS.empty(pdf):
                 for k, v in schema.items():
                     pdf[k] = pdf[k].astype(v.type.to_pandas_dtype())
             apply_schema = False
         else:
             raise ValueError(f"{df} is incompatible with PandasDataFrame")
         if apply_schema:
             pdf, schema = self._apply_schema(pdf, schema)
         super().__init__(schema, metadata)
         self._native = pdf
     except Exception as e:
         raise FugueDataFrameInitError from e
예제 #3
0
 def as_pandas(self) -> pd.DataFrame:
     """Convert to pandas DataFrame"""
     pdf = pd.DataFrame(self.as_array(), columns=self.schema.names)
     return PD_UTILS.enforce_type(pdf, self.schema.pa_schema, null_safe=True)
예제 #4
0
 def __init__(self, data, schema, enforce=False):
     s = expression_to_schema(schema)
     df = pd.DataFrame(data, columns=s.names)
     self.native = PD_UTILS.enforce_type(df, s, enforce)
     self.schema = s