def accessed_columns(expr): """Return a set of column indexes accessed by an expression. Assumes that named attribute references have been converted to integer positions. """ for ex in expr.walk(): assert not isinstance(ex, NamedAttributeRef) return set([ex.position for ex in expr.walk() if isinstance(ex, UnnamedAttributeRef)])
def udf_undefined_vars(expr, vars): """Return a list of undefined variables in a UDF. :param expr: An expression corresponding to a UDF. Variable references are identified by instances of NamedAttributeRef. :param vars: A list of variables in the argument list to the function. :type vars: list of strings """ return [ex.name for ex in expr.walk() if isinstance(ex, NamedAttributeRef) and ex.name not in vars]
def reindex_expr(expr, index_map): """Changes references to key columns to references to value columns in index_map. Assumes that named attribute references have been converted to integer positions. """ for ex in expr.walk(): assert (not isinstance(ex, AttributeRef) or isinstance(ex, UnnamedAttributeRef)) if isinstance(ex, UnnamedAttributeRef) and ex.position in index_map: ex.position = index_map[ex.position]
def rebase_expr(expr, offset): """Subtract the given offset from each column access. Assumes that named attribute references have been converted to integer positions. """ assert offset > 0 for ex in expr.walk(): assert not isinstance(ex, NamedAttributeRef) if isinstance(ex, UnnamedAttributeRef): ex.position -= offset