예제 #1
0
 def from_pandas(self, pdf):
     if isinstance(pdf, pd.Series):
         return _col(self.from_pandas(pd.DataFrame(pdf)))
     metadata = Metadata.from_pandas(pdf)
     reset_index = pdf.reset_index()
     reset_index.columns = metadata.all_fields
     df = self.createDataFrame(reset_index)
     df._metadata = metadata
     return df
예제 #2
0
파일: frame.py 프로젝트: lzqkean/koalas
 def _init_from_pandas(self, pdf, *args):
     metadata = Metadata.from_pandas(pdf)
     reset_index = pdf.reset_index()
     reset_index.columns = metadata.all_fields
     schema = StructType([StructField(name, infer_pd_series_spark_type(col),
                                      nullable=bool(col.isnull().any()))
                          for name, col in reset_index.iteritems()])
     for name, col in reset_index.iteritems():
         dt = col.dtype
         if is_datetime64_dtype(dt) or is_datetime64tz_dtype(dt):
             continue
         reset_index[name] = col.replace({np.nan: None})
     self._init_from_spark(default_session().createDataFrame(reset_index, schema=schema),
                           metadata)
예제 #3
0
def from_pandas(pdf):
    """Create DataFrame from pandas DataFrame.

    This is similar to `DataFrame.createDataFrame()` with pandas DataFrame, but this also picks
    the index in the given pandas DataFrame.

    :param pdf: :class:`pandas.DataFrame`
    """
    if isinstance(pdf, pd.Series):
        return _col(from_pandas(pd.DataFrame(pdf)))
    metadata = Metadata.from_pandas(pdf)
    reset_index = pdf.reset_index()
    reset_index.columns = metadata.all_fields
    df = default_session().createDataFrame(reset_index)
    df._metadata = metadata
    return df
예제 #4
0
 def _init_from_pandas(self, pdf, *args):
     metadata = Metadata.from_pandas(pdf)
     reset_index = pdf.reset_index()
     reset_index.columns = metadata.all_fields
     self._init_from_spark(default_session().createDataFrame(reset_index),
                           metadata)