Example #1
0
    def __getitem__(self, obj):
        """'Indexing' functionality for the BigDataFrame

        Given a single object or list, the BDF will interpret it as a
        relational projection (i.e., a selection of columns).

        Given a tuple of length 2, the first element will be interpreted for
        row selection (i.e., predicate/filter/WHERE clause), while the second
        element will be interpreted as a projection.
        """
        # other select/filter fns should be implemented with this one
        if isinstance(obj, tuple) and len(obj) == 2:
            alias = _random_id('inline_', 4)
            table_ref = InlineView(self._query_ast.to_sql(), alias)
            (limit_elt, where) = self._query_ast._filter(obj[0])
            select_list = self._query_ast._projection(obj[1])
            return BigDataFrame(
                self._ic,
                SelectStmt(select_list,
                           table_ref,
                           where=where,
                           limit=limit_elt))
        elif isinstance(obj, list):
            alias = _random_id('inline_', 4)
            table_ref = InlineView(self._query_ast.to_sql(), alias)
            select_list = self._query_ast._projection(obj)
            return BigDataFrame(self._ic, SelectStmt(select_list, table_ref))
        else:
            # single object, possibly a slice; wrap in list and get projection
            return self[[obj]]
Example #2
0
 def schema(self):
     if self._schema is None:
         table_ref = InlineView(self._query_ast.to_sql(),
                                _random_id('inline_', 4))
         self._schema = _get_table_schema_hack(self._ic._cursor,
                                               table_ref.to_sql())
     return self._schema
Example #3
0
def from_sql_query(ic, query, alias=None):
    """Create a BDF from a SQL query executed by Impala"""
    query_alias = alias if alias else _random_id('inline_', 4)
    table_ref = InlineView(query, query_alias)
    schema = _get_table_schema_hack(ic._cursor, table_ref.to_sql())
    select_list = tuple([SelectItem(expr=Literal(col)) for (col, ty) in schema])
    return BigDataFrame(ic, SelectStmt(select_list, table_ref))
Example #4
0
File: bdf.py Project: cgc17/impyla
 def schema(self):
     if self._schema is None:
         table_ref = InlineView(
             self._query_ast.to_sql(), _random_id('inline_', 4))
         self._schema = _get_table_schema_hack(
             self._ic._cursor, table_ref.to_sql())
     return self._schema
Example #5
0
def from_sql_query(ic, query, alias=None):
    """Create a BDF from a SQL query executed by Impala"""
    query_alias = alias if alias else _random_id('inline_', 4)
    table_ref = InlineView(query, query_alias)
    schema = _get_table_schema_hack(ic._cursor, table_ref.to_sql())
    select_list = tuple(
        [SelectItem(expr=Literal(col)) for (col, ty) in schema])
    return BigDataFrame(ic, SelectStmt(select_list, table_ref))
Example #6
0
    def join(self, other, on=None, how='inner', hint=None):
        """Join this BDF to another one.

        `on` is `None`, `string`, `Expr`, or `list[string]`
        """
        left = InlineView(self._query_ast.to_sql(), 'left_tbl')
        right = InlineView(other._query_ast.to_sql(), 'right_tbl')
        # SELECT left.*, right.*
        select_list = [SelectItem(table_name=TableName(left.name)),
                       SelectItem(table_name=TableName(right.name))]
        table_ref = JoinTableRef(left, right, on=on, op=how, hint=hint)
        ast = SelectStmt(select_list, table_ref)
        return BigDataFrame(self._ic, ast)
Example #7
0
    def take(self, n):
        """Return `n` rows as a pandas `DataFrame`

        Distributed and no notion of order, so not guaranteed to be
        reproducible.
        """
        alias = _random_id('inline_', 4)
        table_ref = InlineView(self._query_ast.to_sql(), alias)
        # SELECT alias.*
        select_list = [SelectItem(table_name=TableName(table_ref.name))]
        limit_elt = LimitElement(Literal(n), None)
        ast = SelectStmt(select_list, table_ref, limit=limit_elt)
        bdf = BigDataFrame(self._ic, ast)
        return as_pandas(bdf.__iter__())
Example #8
0
    def group_by(self, by):
        """Group the BDF

        `by` is `string`, `Expr`, or `list/tuple[string/Expr]`
        """
        if not isinstance(by, (tuple, list)):
            by = (by, )
        if not all([isinstance(e, (basestring, Expr)) for e in by]):
            raise ValueError("must supply only strings or Exprs")
        by = tuple([e if isinstance(e, Expr) else Literal(e) for e in by])
        table_ref = InlineView(self._query_ast.to_sql(), 'inner_tbl')
        # invalid AST; to be used by GroupBy
        incomplete_ast = SelectStmt([], table_ref, group_by=by)
        return GroupBy(self._ic, incomplete_ast)