def to_pandas(self): """ Convert Frame to a pandas DataFrame, or raise an error if `pandas` module is not installed. """ pandas = load_module("pandas") numpy = load_module("numpy") if not hasattr(pandas, "DataFrame"): # pragma: no cover raise ImportError("Unsupported pandas version: `%s`" % (getattr(pandas, "__version__", "???"), )) nas = { stype.bool8: -128, stype.int8: -128, stype.int16: -32768, stype.int32: -2147483648, stype.int64: -9223372036854775808 } self.materialize() srcdt = self._dt srccols = collections.OrderedDict() for i in range(self.ncols): name = self.names[i] column = srcdt.column(i) dtype = self.stypes[i].dtype if dtype == numpy.bool: dtype = numpy.int8 if dtype == numpy.dtype("object"): # Variable-width types can only be represented in Numpy as # dtype='object'. However Numpy cannot ingest a buffer of # PyObject types -- getting error # ValueError: cannot create an OBJECT array from memory buffer # Thus, the only alternative remaining is to convert such column # into plain Python list and pass it to Pandas like that. x = self[:, i].to_list()[0] else: x = numpy.frombuffer(column, dtype=dtype) na = nas.get(self.stypes[i]) if na is not None: x = numpy.ma.masked_equal(x, na, copy=False) srccols[name] = x pd = pandas.DataFrame(srccols) return pd
def tonumpy(self, stype=None): """ Convert Frame into a numpy array, optionally forcing it into a specific stype/dtype. Parameters ---------- stype: datatable.stype, numpy.dtype or str Cast datatable into this dtype before converting it into a numpy array. """ numpy = load_module("numpy") st = 0 if stype: st = datatable.stype(stype).value self.internal.use_stype_for_buffers(st) res = numpy.array(self.internal) self.internal.use_stype_for_buffers(0) return res
def to_numpy(self, stype=None): """ Convert Frame into a numpy array, optionally forcing it into a specific stype/dtype. Parameters ---------- stype: datatable.stype, numpy.dtype or str Cast datatable into this dtype before converting it into a numpy array. """ numpy = load_module("numpy") if not hasattr(numpy, "array"): # pragma: no cover raise ImportError("Unsupported numpy version: `%s`" % (getattr(numpy, "__version__", "???"), )) st = 0 if stype: st = datatable.stype(stype).value self.internal.use_stype_for_buffers(st) res = numpy.array(self.internal) self.internal.use_stype_for_buffers(0) return res