def _get_column(self, column: Column, batch: RecordBatch) -> Union[np.ndarray, pa.Array]: if self._is_numpy_function: return batch.get_np_column(column) else: return batch.get_pa_column(column)
def _kernel(self, batch: RecordBatch, arguments: Tuple) -> RecordBatch: arrays = self._repeat_scalars(arguments) self._ensure_equal_arrays_size(arrays) col_names = self._get_column_names() if self._keep_input_table: return RecordBatch.from_arrays( tuple(chain(batch.columns, arrays)), tuple(chain(batch.column_names, col_names)) ) else: return RecordBatch.from_arrays(arrays, col_names)
def next(self) -> RecordBatch: while True: try: batch = self._reader.read_next_batch() except StopIteration: break yield RecordBatch(batch)
def next(self) -> Iterable[RecordBatch]: for batch in self._parent_operator.next(): self._process_arguments(self._arguments, batch=batch) col_names = tuple(i[0] for i in self._expressions) exprs = tuple(i[1] for i in self._expressions) batch = RecordBatch.from_arrays( tuple(chain(batch.columns, exprs)), tuple(chain(batch.column_names, col_names)) ) # Remove, once sorting by boolean columns is supported by Arrow self._verify_bool_columns(batch.get_schema()) self._sort_op.next(batch.get_batch()) self._expressions.clear() yield RecordBatch(self._sort_op.sorted()) del self._sort_op
def next(self) -> Iterable[RecordBatch]: for batch in self._parent_operator.next(): if not self.agg_obj: self._init_agg_obj(batch) self.agg_obj.next(batch.get_batch()) if self.agg_obj: yield RecordBatch(self.agg_obj.result()) del self.agg_obj
def _eval_expression(self, expression: 'VectorizedExpression', batch: RecordBatch) -> Any: if ( expression.is_shared() and batch.has_column(expression.get_shared_id()) ): # TODO is this ever invoked or already done in the planner? return self._get_column( Column(expression.get_shared_id()), batch ) else: return super()._eval_expression(expression, batch)
def _get_column(self, column: Column, batch: RecordBatch) -> pa.Array: return batch.get_pa_column(column)
def next(self) -> RecordBatch: yield RecordBatch.empty_batch()
def next(self) -> RecordBatch: while True: batch = self._reader.next() if batch is None: break yield RecordBatch(batch)
def _kernel(self, batch: RecordBatch, arguments: Tuple[AnyArrayLike]) -> RecordBatch: assert len(arguments) == 1 return batch.filter(arguments[0])