def range(self, start, stop, step): n = len(self._src) rng = range(start, stop, step) res = utils_misc.normalize_range(rng, n) if res is None: array = list(rng) assert (not (-n <= array[0] < n and -n <= array[-1] < n) or array[0] >= 0 > array[-1] or array[0] < 0 <= array[-1]) else: nstart, ncount, nstep = res assert ncount >= 0 and nstart >= 0 and nstep == step if ncount > 0: assert nstart < n assert 0 <= nstart + (ncount - 1) * nstep < n res1 = "".join(self._src[i] for i in rng) res2 = "".join(self._src[nstart + i * nstep] for i in range(ncount)) assert res1 == res2
def _apply_columns_slice(colslice, colsdesc): n = len(colsdesc) if isinstance(colslice, slice): start, count, step = normalize_slice(colslice, n) else: t = normalize_range(colslice, n) if t is None: raise TValueError("Invalid range iterator for a file with " "%d columns: %r" % (n, colslice)) start, count, step = t if step <= 0: raise TValueError("Cannot use slice/range with negative step " "for column filter: %r" % colslice) colnames = [None] * count coltypes = [rtype.rdrop.value] * n for j in range(count): i = start + j * step colnames[j] = colsdesc[i].name coltypes[i] = rtype.rauto.value return (colnames, coltypes)
def make_rowfilter(rows, ee, _nested=False) -> RFNode: """ Create an :class:`RFNode` from the provided expression. This is a factory function that instantiates an appropriate subclass of :class:`RFNode`, depending on the provided argument `rows`. Parameters ---------- rows: An expression that will be converted into one of the RFNodes. This can have a variety of different types, see `help(Frame.__call__)` for more information. ee: EvaluationEngine The evaluation context within which the expression should be computed. _nested: bool, default False Internal attribute, used to avoid deep recursion when `make_rowfilter()` calls itself. When this attribute is False recursion is allowed, otherwise not. """ nrows = ee.dt.nrows if rows is Ellipsis or rows is None: return AllRFNode(ee) if rows is True or rows is False: # Note: True/False are integer objects in Python raise TTypeError("Boolean value cannot be used as a `rows` selector") if isinstance(rows, (int, slice, range)): rows = [rows] from_generator = False if isinstance(rows, types.GeneratorType): # If an iterator is given, materialize it first. Otherwise there # is no way to ensure that the produced indices are valid. rows = list(rows) from_generator = True if isinstance(rows, (list, tuple, set)): bases = [] counts = [] steps = [] for i, elem in enumerate(rows): if isinstance(elem, int): if -nrows <= elem < nrows: # `elem % nrows` forces the row number to become positive bases.append(elem % nrows) else: raise TValueError( "Row `%d` is invalid for datatable with %s" % (elem, plural(nrows, "row"))) elif isinstance(elem, (range, slice)): if elem.step == 0: raise TValueError("In %r step must not be 0" % elem) if not all(x is None or isinstance(x, int) for x in (elem.start, elem.stop, elem.step)): raise TValueError("%r is not integer-valued" % elem) if isinstance(elem, range): res = normalize_range(elem, nrows) if res is None: raise TValueError( "Invalid %r for a datatable with %s" % (elem, plural(nrows, "row"))) else: res = normalize_slice(elem, nrows) start, count, step = res assert count >= 0 if count == 0: pass # don't do anything elif count == 1: bases.append(start) else: if len(counts) < len(bases): counts += [1] * (len(bases) - len(counts)) steps += [1] * (len(bases) - len(steps)) bases.append(start) counts.append(count) steps.append(step) else: if from_generator: raise TValueError( "Invalid row selector %r generated at position %d" % (elem, i)) else: raise TValueError( "Invalid row selector %r at element %d of the " "`rows` list" % (elem, i)) if not counts: if len(bases) == 1: if bases[0] == 0 and nrows == 1: return AllRFNode(ee) return SliceRFNode(ee, bases[0], 1, 1) else: return ArrayRFNode(ee, bases) elif len(bases) == 1: if bases[0] == 0 and counts[0] == nrows and steps[0] == 1: return AllRFNode(ee) else: return SliceRFNode(ee, bases[0], counts[0], steps[0]) else: return MultiSliceRFNode(ee, bases, counts, steps) if is_type(rows, NumpyArray_t): arr = rows if not (len(arr.shape) == 1 or len(arr.shape) == 2 and min(arr.shape) == 1): raise TValueError( "Only a single-dimensional numpy.array is allowed" " as a `rows` argument, got %r" % arr) if len(arr.shape) == 2 and arr.shape[1] > 1: arr = arr.T if not (str(arr.dtype) == "bool" or str(arr.dtype).startswith("int")): raise TValueError("Either a boolean or an integer numpy.array is " "expected for `rows` argument, got %r" % arr) if str(arr.dtype) == "bool" and arr.shape[-1] != nrows: raise TValueError("Cannot apply a boolean numpy array of length " "%d to a datatable with %s" % (arr.shape[-1], plural(nrows, "row"))) rows = datatable.Frame(arr) assert rows.ncols == 1 assert rows.ltypes[0] == ltype.bool or rows.ltypes[0] == ltype.int if is_type(rows, Frame_t): if rows.ncols != 1: raise TValueError("`rows` argument should be a single-column " "datatable, got %r" % rows) col0type = rows.ltypes[0] if col0type == ltype.bool: if rows.nrows != nrows: s1rows = plural(rows.nrows, "row") s2rows = plural(nrows, "row") raise TValueError("`rows` datatable has %s, but applied to a " "datatable with %s" % (s1rows, s2rows)) return BooleanColumnRFNode(ee, rows) elif col0type == ltype.int: return IntegerColumnRFNode(ee, rows) else: raise TTypeError("`rows` datatable should be either a boolean or " "an integer column, however it has type %s" % col0type) if isinstance(rows, types.FunctionType): return make_rowfilter(rows(f), ee, _nested=True) if isinstance(rows, BaseExpr): return FilterExprRFNode(ee, rows) if _nested: raise TTypeError("Unexpected result produced by the `rows` " "function: %r" % (rows, )) else: raise TTypeError("Unexpected `rows` argument: %r" % (rows, ))
def _override_columns(self, colnames, coltypes): assert len(colnames) == len(coltypes) n = len(colnames) colspec = self._columns self._colnames = [] if colspec is None: self._colnames = colnames return if isinstance(colspec, (slice, range)): if isinstance(colspec, slice): start, count, step = normalize_slice(colspec, n) else: t = normalize_range(colspec, n) if t is None: raise TValueError("Invalid range iterator for a file with " "%d columns: %r" % (n, colspec)) start, count, step = t if step <= 0: raise TValueError("Cannot use slice/range with negative step " "for column filter: %r" % colspec) for i in range(n): if (i - start) % step == 0 and i < start + count * step: self._colnames.append(colnames[i]) else: coltypes[i] = 0 return if isinstance(colspec, set): # Make a copy of the `colspec`, in order to check whether all the # columns requested by the user were found, and issue a warning # otherwise. colsfound = set(colspec) for i in range(n): if colnames[i] in colspec: if colnames[i] in colsfound: colsfound.remove(colnames[i]) self._colnames.append(colnames[i]) else: coltypes[i] = 0 if colsfound: self.logger.warning( "Column(s) %r not found in the input file" % list(colsfound)) return if isinstance(colspec, (list, tuple)): nn = len(colspec) if n != nn: raise TValueError("Input file contains %s, whereas `columns` " "parameter specifies only %s" % (plural(n, "column"), plural(nn, "column"))) for i in range(n): entry = colspec[i] if entry is None: coltypes[i] = 0 elif isinstance(entry, str): self._colnames.append(entry) elif isinstance(entry, stype): self._colnames.append(colnames[i]) coltypes[i] = _coltypes.get(entry) elif isinstance(entry, tuple): newname, newtype = entry self._colnames.append(newname) coltypes[i] = _coltypes.get(newtype) if not coltypes[i]: raise TValueError( "Unknown type %r used as an override " "for column %r" % (newtype, newname)) else: raise TTypeError( "Entry `columns[%d]` has invalid type %r" % (i, entry.__class__.__name__)) return if isinstance(colspec, dict): for i in range(n): name = colnames[i] if name in colspec: entry = colspec[name] else: entry = colspec.get(..., ...) if entry is None: coltypes[i] = 0 elif entry is Ellipsis: self._colnames.append(name) elif isinstance(entry, str): self._colnames.append(entry) else: assert isinstance(entry, tuple) newname, newtype = entry if newname is Ellipsis: newname = name self._colnames.append(newname) coltypes[i] = _coltypes.get(newtype) if not coltypes[i]: raise TValueError( "Unknown type %r used as an override " "for column %r" % (newtype, newname)) if callable(colspec) and hasattr(colspec, "__code__"): nargs = colspec.__code__.co_argcount if nargs == 1: for i in range(n): ret = colspec(colnames[i]) if ret is None or ret is False: coltypes[i] = 0 elif ret is True: self._colnames.append(colnames[i]) elif isinstance(ret, str): self._colnames.append(ret) else: raise TValueError("Function passed as the `columns` " "argument was expected to return a " "`Union[None, bool, str]` but " "instead returned value %r" % (ret, )) return if nargs == 2: for i in range(n): ret = colspec(i, colnames[i]) if ret is None or ret is False: coltypes[i] = 0 elif ret is True: self._colnames.append(colnames[i]) elif isinstance(ret, str): self._colnames.append(ret) else: raise TValueError("Function passed as the `columns` " "argument was expected to return a " "`Union[None, bool, str]` but " "instead returned value %r" % (ret, )) return if nargs == 3: for i in range(n): typ = _coltypes_strs[coltypes[i]] ret = colspec(i, colnames[i], typ) if ret is None or ret is False: coltypes[i] = 0 elif ret is True: self._colnames.append(colnames[i]) elif isinstance(ret, str): self._colnames.append(ret) elif isinstance(ret, tuple) and len(ret) == 2: newname, newtype = ret self._colnames.append(newname) coltypes[i] = _coltypes.get(newtype) else: raise TValueError("Function passed as the `columns` " "argument was expected to return a " "`Union[None, bool, str, Tuple[str, " "Union[str, type]]]` but " "instead returned value %r" % ret) return raise RuntimeError("Unknown colspec: %r" # pragma: no cover % colspec)