Example #1
0
def compute_projection_scalar_expr(
    expr,
    parent,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext] = None,
    **kwargs,
):
    name = expr._name
    assert name is not None, 'Scalar selection name is None'

    op = expr.op()
    parent_table_op = parent.table.op()

    data_columns = frozenset(data.columns)
    scope = scope.merge_scopes(
        Scope(
            {
                t:
                map_new_column_names_to_data(
                    remap_overlapping_column_names(parent_table_op, t,
                                                   data_columns),
                    data,
                )
            },
            timecontext,
        ) for t in op.root_tables())
    scalar = execute(expr, scope=scope, **kwargs)
    return data.assign(**{name: scalar})[name]
Example #2
0
def compute_projection_scalar_expr(
    expr,
    parent,
    data,
    scope: Scope = None,
    timecontext: Optional[TimeContext] = None,
    **kwargs,
):
    name = expr._name
    assert name is not None, 'Scalar selection name is None'

    op = expr.op()
    parent_table_op = parent.table.op()

    data_columns = frozenset(data.columns)

    scope = scope.merge_scopes(
        Scope(
            {
                t: map_new_column_names_to_data(
                    remap_overlapping_column_names(
                        parent_table_op, t, data_columns
                    ),
                    data,
                )
            },
            timecontext,
        )
        for t in op.root_tables()
    )
    scalar = execute(expr, scope=scope, **kwargs)
    result = pandas.Series([scalar], name=name).repeat(len(data.index))
    result.index = data.index
    return dd.from_pandas(result, npartitions=data.npartitions)
Example #3
0
def compute_projection_column_expr(
    expr,
    parent,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext],
    **kwargs,
):
    result_name = getattr(expr, '_name', None)
    op = expr.op()
    parent_table_op = parent.table.op()

    if isinstance(op, ops.TableColumn):
        # slightly faster path for simple column selection
        name = op.name

        if name in data:
            return data[name].rename(result_name or name)

        if not isinstance(parent_table_op, ops.Join):
            raise KeyError(name)
        (root_table,) = op.root_tables()
        left_root, right_root = ops.distinct_roots(
            parent_table_op.left, parent_table_op.right
        )
        suffixes = {
            left_root: constants.LEFT_JOIN_SUFFIX,
            right_root: constants.RIGHT_JOIN_SUFFIX,
        }
        return data.loc[:, name + suffixes[root_table]].rename(
            result_name or name
        )

    data_columns = frozenset(data.columns)

    scope = scope.merge_scopes(
        Scope(
            {
                t: map_new_column_names_to_data(
                    remap_overlapping_column_names(
                        parent_table_op, t, data_columns
                    ),
                    data,
                )
            },
            timecontext,
        )
        for t in op.root_tables()
    )

    result = execute(expr, scope=scope, timecontext=timecontext, **kwargs)
    assert result_name is not None, 'Column selection name is None'
    if np.isscalar(result):
        series = dd.from_array(np.repeat(result, len(data.index)))
        series.name = result_name
        series.index = data.index
        return series
    return result.rename(result_name)
Example #4
0
def _compute_predicates(
    table_op,
    predicates,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext],
    **kwargs,
):
    """Compute the predicates for a table operation.

    Parameters
    ----------
    table_op : TableNode
    predicates : List[ir.ColumnExpr]
    data : pd.DataFrame
    scope : Scope
    timecontext: Optional[TimeContext]
    kwargs : dict

    Returns
    -------
    computed_predicate : pd.Series[bool]

    Notes
    -----
    This handles the cases where the predicates are computed columns, in
    addition to the simple case of named columns coming directly from the input
    table.
    """
    for predicate in predicates:
        # Map each root table of the predicate to the data so that we compute
        # predicates on the result instead of any left or right tables if the
        # Selection is on a Join. Project data to only inlude columns from
        # the root table.
        root_tables = predicate.op().root_tables()

        # handle suffixes
        data_columns = frozenset(data.columns)

        additional_scope = Scope()
        for root_table in root_tables:
            mapping = remap_overlapping_column_names(
                table_op, root_table, data_columns
            )
            if mapping is not None:
                new_data = data.loc[:, mapping.keys()].rename(columns=mapping)
            else:
                new_data = data
            additional_scope = additional_scope.merge_scope(
                Scope({root_table: new_data}, timecontext)
            )

        scope = scope.merge_scope(additional_scope)
        yield execute(predicate, scope=scope, **kwargs)
Example #5
0
def compute_projection_column_expr(
    expr,
    parent,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext],
    **kwargs,
):
    result_name = getattr(expr, '_name', None)
    op = expr.op()
    parent_table_op = parent.table.op()

    if isinstance(op, ops.TableColumn):
        # slightly faster path for simple column selection
        name = op.name

        if name in data:
            return data[name].rename(result_name or name)

        if not isinstance(parent_table_op, ops.Join):
            raise KeyError(name)
        suffix = get_join_suffix_for_op(op, parent_table_op)
        return data.loc[:, name + suffix].rename(result_name or name)

    data_columns = frozenset(data.columns)

    scope = scope.merge_scopes(
        Scope(
            {
                t: map_new_column_names_to_data(
                    remap_overlapping_column_names(
                        parent_table_op, t, data_columns
                    ),
                    data,
                )
            },
            timecontext,
        )
        for t in op.root_tables()
    )

    result = execute(expr, scope=scope, timecontext=timecontext, **kwargs)
    result = coerce_to_output(result, expr, data.index)
    assert result_name is not None, 'Column selection name is None'

    return result