def remap_overlapping_column_names(table_op, root_table, data_columns): """Return an ``OrderedDict`` mapping possibly suffixed column names to column names without suffixes. Parameters ---------- table_op : TableNode The ``TableNode`` we're selecting from. root_table : TableNode The root table of the expression we're selecting from. data_columns : set or frozenset The available columns to select from Returns ------- mapping : OrderedDict[str, str] A map from possibly-suffixed column names to column names without suffixes. """ if not isinstance(table_op, ops.Join): return None left_root, right_root = ops.distinct_roots(table_op.left, table_op.right) suffixes = { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, } column_names = [({name, name + suffixes[root_table]} & data_columns, name) for name in root_table.schema.names] mapping = OrderedDict((first(col_name), final_name) for col_name, final_name in column_names if col_name) return mapping
def get_join_suffix_for_op(op: ops.TableColumn, join_op: ops.Join): (root_table, ) = op.root_tables() left_root, right_root = ops.distinct_roots(join_op.left, join_op.right) return { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, }[root_table]
def compute_projection_column_expr( expr, parent, data, scope: Scope, timecontext: Optional[TimeContext], **kwargs, ): result_name = getattr(expr, '_name', None) op = expr.op() parent_table_op = parent.table.op() if isinstance(op, ops.TableColumn): # slightly faster path for simple column selection name = op.name if name in data: return data[name].rename(result_name or name) if not isinstance(parent_table_op, ops.Join): raise KeyError(name) (root_table,) = op.root_tables() left_root, right_root = ops.distinct_roots( parent_table_op.left, parent_table_op.right ) suffixes = { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, } return data.loc[:, name + suffixes[root_table]].rename( result_name or name ) data_columns = frozenset(data.columns) scope = scope.merge_scopes( Scope( { t: map_new_column_names_to_data( remap_overlapping_column_names( parent_table_op, t, data_columns ), data, ) }, timecontext, ) for t in op.root_tables() ) result = execute(expr, scope=scope, timecontext=timecontext, **kwargs) assert result_name is not None, 'Column selection name is None' if np.isscalar(result): series = dd.from_array(np.repeat(result, len(data.index))) series.name = result_name series.index = data.index return series return result.rename(result_name)
def compute_projection_column_expr(expr, parent, data, scope=None, **kwargs): result_name = getattr(expr, '_name', None) op = expr.op() parent_table_op = parent.table.op() if isinstance(op, ops.TableColumn): # slightly faster path for simple column selection name = op.name if name in data: return data[name].rename(result_name or name) if not isinstance(parent_table_op, ops.Join): raise KeyError(name) root_table, = op.root_tables() left_root, right_root = ops.distinct_roots(parent_table_op.left, parent_table_op.right) suffixes = { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, } return data.loc[:, name + suffixes[root_table]].rename(result_name or name) data_columns = frozenset(data.columns) additional_scope = { t: map_new_column_names_to_data( remap_overlapping_column_names(parent_table_op, t, data_columns), data, ) for t in op.root_tables() } new_scope = toolz.merge(scope, additional_scope) result = execute(expr, scope=new_scope, **kwargs) assert result_name is not None, 'Column selection name is None' if np.isscalar(result): return pd.Series( np.repeat(result, len(data.index)), index=data.index, name=result_name, ) return result.rename(result_name)
def compute_projection_column_expr(expr, parent, data, scope=None, **kwargs): result_name = getattr(expr, '_name', None) op = expr.op() parent_table_op = parent.table.op() if isinstance(op, ops.TableColumn): # slightly faster path for simple column selection name = op.name if name in data: return data[name].rename(result_name or name) if not isinstance(parent_table_op, ops.Join): raise KeyError(name) root_table, = op.root_tables() left_root, right_root = ops.distinct_roots( parent_table_op.left, parent_table_op.right ) suffixes = { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, } return data.loc[:, name + suffixes[root_table]].rename( result_name or name ) data_columns = frozenset(data.columns) additional_scope = { t: map_new_column_names_to_data( remap_overlapping_column_names(parent_table_op, t, data_columns), data, ) for t in op.root_tables() } new_scope = toolz.merge(scope, additional_scope) result = execute(expr, scope=new_scope, **kwargs) assert result_name is not None, 'Column selection name is None' return result.rename(result_name)
def remap_overlapping_column_names(table_op, root_table, data_columns): """Return an ``OrderedDict`` mapping possibly suffixed column names to column names without suffixes. Parameters ---------- table_op : TableNode The ``TableNode`` we're selecting from. root_table : TableNode The root table of the expression we're selecting from. data_columns : set or frozenset The available columns to select from Returns ------- mapping : OrderedDict[str, str] A map from possibly-suffixed column names to column names without suffixes. """ if not isinstance(table_op, ops.Join): return None left_root, right_root = ops.distinct_roots(table_op.left, table_op.right) suffixes = { left_root: constants.LEFT_JOIN_SUFFIX, right_root: constants.RIGHT_JOIN_SUFFIX, } column_names = [ ({name, name + suffixes[root_table]} & data_columns, name) for name in root_table.schema.names ] mapping = OrderedDict( (first(col_name), final_name) for col_name, final_name in column_names if col_name ) return mapping