def _compute_columns(self): res = core.columns_from_slice(self.dt.internal, self._engine.rowindex, 0, self.dt.ncols, 1) jdt = self._engine.joindt if jdt: nk = len(jdt.key) res2 = core.columns_from_slice(jdt.internal, self._engine.joinindex, nk, jdt.ncols - nk, 1) res.append_columns(res2) return res
def _fill_from_source(self, src, names, stypes): if isinstance(src, list): if len(src) == 0: src = [src] self._fill_from_list(src, names=names, stypes=stypes) elif isinstance(src, (tuple, set, range)): self._fill_from_list([list(src)], names=names, stypes=stypes) elif isinstance(src, dict): self._fill_from_list(list(src.values()), names=tuple(src.keys()), stypes=stypes) elif isinstance(src, core.DataTable): self._fill_from_dt(src, names=names) elif isinstance(src, str): srcdt = datatable.fread(src) if names is None: names = srcdt.names self._fill_from_dt(srcdt.internal, names=names) elif src is None: self._fill_from_list([], names=None, stypes=None) elif is_type(src, Frame_t): if names is None: names = src.names _dt = core.columns_from_slice(src.internal, None, 0, src.ncols, 1) \ .to_datatable() self._fill_from_dt(_dt, names=names) elif is_type(src, PandasDataFrame_t, PandasSeries_t): self._fill_from_pandas(src, names) elif is_type(src, NumpyArray_t): self._fill_from_numpy(src, names=names) elif src is Ellipsis: self._fill_from_list([42], "?", None) else: raise TTypeError("Cannot create Frame from %r" % src)
def execute(self): df = self._engine.dt.internal col = self._col if self._engine.rowindex: cf = core.columns_from_slice(df, self._engine.rowindex, col, 1, 1) df = cf.to_datatable() col = 0 rowindex, groupby = df.sort(col, True) self._engine.rowindex = rowindex self._engine.groupby = groupby self._engine.groupby_cols = [self._col]
def execute(self): ee = self._engine df = ee.dt.internal col = self._col if ee.rowindex: cf = core.columns_from_slice(df, ee.rowindex, col, 1, 1) df = cf.to_frame(None).internal col = 0 rowindex, groupby = df.sort(col, True) f.set_rowindex(rowindex) ee.set_source_rowindex(rowindex) ee.clear_final_rowindex() if ee.rowindex: ee.set_final_rowindex(rowindex, ee.rowindex) ee.rowindex = rowindex ee.groupby = groupby ee.groupby_cols = [self._col]
def sort(self, by): """ Sort datatable by the specified column. Parameters ---------- by: str or int Name or index of the column to sort by. Returns ------- New datatable sorted by the provided column. The target datatable remains unmodified. """ idx = self.colindex(by) ri = self._dt.sort(idx)[0] cs = core.columns_from_slice(self._dt, ri, 0, self.ncols, 1) return cs.to_frame(self.names)
def sort(self, *cols): """ Sort datatable by the specified column(s). Parameters ---------- cols: List[str | int] Names or indices of the columns to sort by. If no columns are given, the Frame will be sorted on all columns. Returns ------- New datatable sorted by the provided column(s). The target datatable remains unmodified. """ if not cols: indexes = list(range(self.ncols)) elif len(cols) == 1 and isinstance(cols[0], list): indexes = [self.colindex(col) for col in cols[0]] else: indexes = [self.colindex(col) for col in cols] ri = self._dt.sort(*indexes)[0] cs = core.columns_from_slice(self._dt, ri, 0, self.ncols, 1) return cs.to_frame(self.names)
def _compute_columns(self): res = core.columns_from_slice(self.dt.internal, self._engine.rowindex, self._start, self._count, self._step) return res