def _get_filter_info(self, expr_to_parse, X) -> Tuple[str, Any, Optional[str]]: col_list = X.columns if isinstance(expr_to_parse, ast.Call): op = expr_to_parse.func # for now, we only support single argument predicates if len(expr_to_parse.args) != 1: raise ValueError( "Filter predicate functions currently only support a single argument" ) arg = expr_to_parse.args[0] if _is_ast_subscript(arg): lhs = _get_subscript_value(arg) elif _is_ast_attribute(arg): lhs = arg.attr # type: ignore else: raise ValueError( "Filter predicate functions only supports subscript or dot notation for the argument. For example, it.col_name or it['col_name']" ) if lhs not in col_list: raise ValueError( "Cannot perform filter predicate operation as {} not a column of input dataframe X.".format( lhs ) ) return lhs, op, None if _is_ast_subscript(expr_to_parse.left): lhs = _get_subscript_value(expr_to_parse.left) elif _is_ast_attribute(expr_to_parse.left): lhs = expr_to_parse.left.attr else: raise ValueError( "Filter predicate only supports subscript or dot notation for the left hand side. For example, it.col_name or it['col_name']" ) if lhs not in col_list: raise ValueError( "Cannot perform filter operation as {} not a column of input dataframe X.".format( lhs ) ) op = expr_to_parse.ops[0] if _is_ast_subscript(expr_to_parse.comparators[0]): rhs = _get_subscript_value(expr_to_parse.comparators[0]) elif _is_ast_attribute(expr_to_parse.comparators[0]): rhs = expr_to_parse.comparators[0].attr elif _is_ast_constant(expr_to_parse.comparators[0]): rhs = expr_to_parse.comparators[0].value else: raise ValueError( "Filter predicate only supports subscript or dot notation for the right hand side. For example, it.col_name or it['col_name'] or a constant value" ) if not _is_ast_constant(expr_to_parse.comparators[0]) and rhs not in col_list: raise ValueError( "Cannot perform filter operation as {} not a column of input dataframe X.".format( rhs ) ) return lhs, op, rhs
def _get_group_key(self, expr_to_parse): if _is_ast_subscript(expr_to_parse): return _get_subscript_value(expr_to_parse) elif _is_ast_attribute(expr_to_parse): return expr_to_parse.attr else: raise ValueError( "GroupBy by parameter only supports subscript or dot notation for the key columns. For example, it.col_name or it['col_name']." )
def _get_join_info(cls, expr_to_parse): left_key = [] right_key = [] if _is_ast_subscript(expr_to_parse.left.value): left_name = _get_subscript_value(expr_to_parse.left.value) elif _is_ast_attribute(expr_to_parse.left.value): left_name = expr_to_parse.left.value.attr else: raise ValueError( "ERROR: Expression type not supported! Formats supported: it.table_name.column_name or it['table_name'].column_name" ) if _is_ast_subscript(expr_to_parse.left): left_key.append(_get_subscript_value(expr_to_parse.left)) elif _is_ast_attribute(expr_to_parse.left): left_key.append(expr_to_parse.left.attr) else: raise ValueError( "ERROR: Expression type not supported! Formats supported: it.table_name.column_name or it.table_name['column_name']" ) if _is_ast_subscript(expr_to_parse.comparators[0].value): right_name = _get_subscript_value( expr_to_parse.comparators[0].value) elif _is_ast_attribute(expr_to_parse.comparators[0].value): right_name = expr_to_parse.comparators[0].value.attr else: raise ValueError( "ERROR: Expression type not supported! Formats supported: it.table_name.column_name or it['table_name'].column_name" ) if _is_ast_subscript(expr_to_parse.comparators[0]): right_key.append(_get_subscript_value( expr_to_parse.comparators[0])) elif _is_ast_attribute(expr_to_parse.comparators[0]): right_key.append(expr_to_parse.comparators[0].attr) else: raise ValueError( "ERROR: Expression type not supported! Formats supported: it.table_name.column_name or it.table_name['column_name']" ) return left_name, left_key, right_name, right_key
def __init__(self, table=None): assert table is not None if isinstance(table._expr, ast.Attribute): self.table_name = table._expr.attr elif isinstance(table._expr, ast.Subscript): self.table_name = _get_subscript_value(table._expr)