def compute_projection_scalar_expr( expr, parent, data, scope: Scope, timecontext: Optional[TimeContext] = None, **kwargs, ): name = expr._name assert name is not None, 'Scalar selection name is None' op = expr.op() parent_table_op = parent.table.op() data_columns = frozenset(data.columns) scope = scope.merge_scopes( Scope( { t: map_new_column_names_to_data( remap_overlapping_column_names(parent_table_op, t, data_columns), data, ) }, timecontext, ) for t in op.root_tables()) scalar = execute(expr, scope=scope, **kwargs) return data.assign(**{name: scalar})[name]
def compute_projection_scalar_expr( expr, parent, data, scope: Scope = None, timecontext: Optional[TimeContext] = None, **kwargs, ): name = expr._name assert name is not None, 'Scalar selection name is None' op = expr.op() parent_table_op = parent.table.op() data_columns = frozenset(data.columns) scope = scope.merge_scopes( Scope( { t: map_new_column_names_to_data( remap_overlapping_column_names( parent_table_op, t, data_columns ), data, ) }, timecontext, ) for t in op.root_tables() ) scalar = execute(expr, scope=scope, **kwargs) result = pandas.Series([scalar], name=name).repeat(len(data.index)) result.index = data.index return dd.from_pandas(result, npartitions=data.npartitions)
def compute_projection_column_expr( expr, parent, data, scope: Scope, timecontext: Optional[TimeContext], **kwargs, ): result_name = getattr(expr, '_name', None) op = expr.op() parent_table_op = parent.table.op() if isinstance(op, ops.TableColumn): # slightly faster path for simple column selection name = op.name if name in data: return data[name].rename(result_name or name) if not isinstance(parent_table_op, ops.Join): raise KeyError(name) (root_table,) = op.root_tables() left_root, right_root = ops.distinct_roots( parent_table_op.left, parent_table_op.right ) suffixes = { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, } return data.loc[:, name + suffixes[root_table]].rename( result_name or name ) data_columns = frozenset(data.columns) scope = scope.merge_scopes( Scope( { t: map_new_column_names_to_data( remap_overlapping_column_names( parent_table_op, t, data_columns ), data, ) }, timecontext, ) for t in op.root_tables() ) result = execute(expr, scope=scope, timecontext=timecontext, **kwargs) assert result_name is not None, 'Column selection name is None' if np.isscalar(result): series = dd.from_array(np.repeat(result, len(data.index))) series.name = result_name series.index = data.index return series return result.rename(result_name)
def _compute_predicates( table_op, predicates, data, scope: Scope, timecontext: Optional[TimeContext], **kwargs, ): """Compute the predicates for a table operation. Parameters ---------- table_op : TableNode predicates : List[ir.ColumnExpr] data : pd.DataFrame scope : Scope timecontext: Optional[TimeContext] kwargs : dict Returns ------- computed_predicate : pd.Series[bool] Notes ----- This handles the cases where the predicates are computed columns, in addition to the simple case of named columns coming directly from the input table. """ for predicate in predicates: # Map each root table of the predicate to the data so that we compute # predicates on the result instead of any left or right tables if the # Selection is on a Join. Project data to only inlude columns from # the root table. root_tables = predicate.op().root_tables() # handle suffixes data_columns = frozenset(data.columns) additional_scope = Scope() for root_table in root_tables: mapping = remap_overlapping_column_names( table_op, root_table, data_columns ) if mapping is not None: new_data = data.loc[:, mapping.keys()].rename(columns=mapping) else: new_data = data additional_scope = additional_scope.merge_scope( Scope({root_table: new_data}, timecontext) ) scope = scope.merge_scope(additional_scope) yield execute(predicate, scope=scope, **kwargs)
def compute_projection_column_expr( expr, parent, data, scope: Scope, timecontext: Optional[TimeContext], **kwargs, ): result_name = getattr(expr, '_name', None) op = expr.op() parent_table_op = parent.table.op() if isinstance(op, ops.TableColumn): # slightly faster path for simple column selection name = op.name if name in data: return data[name].rename(result_name or name) if not isinstance(parent_table_op, ops.Join): raise KeyError(name) suffix = get_join_suffix_for_op(op, parent_table_op) return data.loc[:, name + suffix].rename(result_name or name) data_columns = frozenset(data.columns) scope = scope.merge_scopes( Scope( { t: map_new_column_names_to_data( remap_overlapping_column_names( parent_table_op, t, data_columns ), data, ) }, timecontext, ) for t in op.root_tables() ) result = execute(expr, scope=scope, timecontext=timecontext, **kwargs) result = coerce_to_output(result, expr, data.index) assert result_name is not None, 'Column selection name is None' return result