Exemple #1
0
    def __getitem__(self, obj):
        """Expression evaluation against groups.

        Given a single object or list, the GroupBy will interpret it as a set
        of SELECT expressions to evaluate in the context of the GROUP BY.

        Given a tuple of length 2, the first element will be interpreted for
        group selection (i.e., a HAVING clause), while the second element will
        be interpreted as a set of expressions to evaluate against the groups.
        """
        ast = copy(self._grouped_ast)
        if isinstance(obj, tuple) and len(obj) == 2:
            if not isinstance(obj[0], Expr):
                raise ValueError("The group filter (obj[0]) must be Expr type")
            ast._having = obj[0]
            obj = obj[1]
        # obj is now the SELECT portion
        if not isinstance(obj, (list, tuple)):
            obj = [obj]
        select_list = []
        for elt in obj:
            if isinstance(elt, SelectItem):
                select_list.append(elt)
            elif isinstance(elt, basestring):
                select_list.append(SelectItem(expr=Literal(elt)))
            elif isinstance(elt, Expr):
                select_list.append(SelectItem(expr=elt))
        ast._select_list = select_list
        return BigDataFrame(self._ic, ast)
Exemple #2
0
    def join(self, other, on=None, how='inner', hint=None):
        """Join this BDF to another one.

        `on` is `None`, `string`, `Expr`, or `list[string]`
        """
        left = InlineView(self._query_ast.to_sql(), 'left_tbl')
        right = InlineView(other._query_ast.to_sql(), 'right_tbl')
        # SELECT left.*, right.*
        select_list = [SelectItem(table_name=TableName(left.name)),
                       SelectItem(table_name=TableName(right.name))]
        table_ref = JoinTableRef(left, right, on=on, op=how, hint=hint)
        ast = SelectStmt(select_list, table_ref)
        return BigDataFrame(self._ic, ast)
Exemple #3
0
def from_sql_table(ic, table):
    """Create a BDF from a table name usable in Impala"""
    table_name = _to_TableName(table)
    table_ref = BaseTableRef(table_name)
    schema = _get_table_schema_hack(ic._cursor, table_ref.to_sql())
    select_list = tuple(
        [SelectItem(expr=Literal(col)) for (col, ty) in schema])
    return BigDataFrame(ic, SelectStmt(select_list, table_ref))
Exemple #4
0
def from_sql_query(ic, query, alias=None):
    """Create a BDF from a SQL query executed by Impala"""
    query_alias = alias if alias else _random_id('inline_', 4)
    table_ref = InlineView(query, query_alias)
    schema = _get_table_schema_hack(ic._cursor, table_ref.to_sql())
    select_list = tuple(
        [SelectItem(expr=Literal(col)) for (col, ty) in schema])
    return BigDataFrame(ic, SelectStmt(select_list, table_ref))
Exemple #5
0
    def take(self, n):
        """Return `n` rows as a pandas `DataFrame`

        Distributed and no notion of order, so not guaranteed to be
        reproducible.
        """
        alias = _random_id('inline_', 4)
        table_ref = InlineView(self._query_ast.to_sql(), alias)
        # SELECT alias.*
        select_list = [SelectItem(table_name=TableName(table_ref.name))]
        limit_elt = LimitElement(Literal(n), None)
        ast = SelectStmt(select_list, table_ref, limit=limit_elt)
        bdf = BigDataFrame(self._ic, ast)
        return as_pandas(bdf.__iter__())
Exemple #6
0
 def groups(self):
     ast = copy(self._grouped_ast)
     select_list = [SelectItem(expr=e) for e in self._grouped_ast._group_by]
     ast._select_list = tuple(select_list)
     return BigDataFrame(self._ic, ast)