Exemple #1
0
def remap_overlapping_column_names(table_op, root_table, data_columns):
    """Return an ``OrderedDict`` mapping possibly suffixed column names to
    column names without suffixes.

    Parameters
    ----------
    table_op : TableNode
        The ``TableNode`` we're selecting from.
    root_table : TableNode
        The root table of the expression we're selecting from.
    data_columns : set or frozenset
        The available columns to select from

    Returns
    -------
    mapping : OrderedDict[str, str]
        A map from possibly-suffixed column names to column names without
        suffixes.
    """
    if not isinstance(table_op, ops.Join):
        return None

    left_root, right_root = ops.distinct_roots(table_op.left, table_op.right)
    suffixes = {
        left_root: constants.LEFT_JOIN_SUFFIX,
        right_root: constants.RIGHT_JOIN_SUFFIX,
    }
    column_names = [({name, name + suffixes[root_table]} & data_columns, name)
                    for name in root_table.schema.names]
    mapping = OrderedDict((first(col_name), final_name)
                          for col_name, final_name in column_names if col_name)
    return mapping
Exemple #2
0
def get_join_suffix_for_op(op: ops.TableColumn, join_op: ops.Join):
    (root_table, ) = op.root_tables()
    left_root, right_root = ops.distinct_roots(join_op.left, join_op.right)
    return {
        left_root: constants.LEFT_JOIN_SUFFIX,
        right_root: constants.RIGHT_JOIN_SUFFIX,
    }[root_table]
Exemple #3
0
def compute_projection_column_expr(
    expr,
    parent,
    data,
    scope: Scope,
    timecontext: Optional[TimeContext],
    **kwargs,
):
    result_name = getattr(expr, '_name', None)
    op = expr.op()
    parent_table_op = parent.table.op()

    if isinstance(op, ops.TableColumn):
        # slightly faster path for simple column selection
        name = op.name

        if name in data:
            return data[name].rename(result_name or name)

        if not isinstance(parent_table_op, ops.Join):
            raise KeyError(name)
        (root_table,) = op.root_tables()
        left_root, right_root = ops.distinct_roots(
            parent_table_op.left, parent_table_op.right
        )
        suffixes = {
            left_root: constants.LEFT_JOIN_SUFFIX,
            right_root: constants.RIGHT_JOIN_SUFFIX,
        }
        return data.loc[:, name + suffixes[root_table]].rename(
            result_name or name
        )

    data_columns = frozenset(data.columns)

    scope = scope.merge_scopes(
        Scope(
            {
                t: map_new_column_names_to_data(
                    remap_overlapping_column_names(
                        parent_table_op, t, data_columns
                    ),
                    data,
                )
            },
            timecontext,
        )
        for t in op.root_tables()
    )

    result = execute(expr, scope=scope, timecontext=timecontext, **kwargs)
    assert result_name is not None, 'Column selection name is None'
    if np.isscalar(result):
        series = dd.from_array(np.repeat(result, len(data.index)))
        series.name = result_name
        series.index = data.index
        return series
    return result.rename(result_name)
Exemple #4
0
def compute_projection_column_expr(expr, parent, data, scope=None, **kwargs):
    result_name = getattr(expr, '_name', None)
    op = expr.op()
    parent_table_op = parent.table.op()

    if isinstance(op, ops.TableColumn):
        # slightly faster path for simple column selection
        name = op.name

        if name in data:
            return data[name].rename(result_name or name)

        if not isinstance(parent_table_op, ops.Join):
            raise KeyError(name)

        root_table, = op.root_tables()
        left_root, right_root = ops.distinct_roots(parent_table_op.left,
                                                   parent_table_op.right)
        suffixes = {
            left_root: constants.LEFT_JOIN_SUFFIX,
            right_root: constants.RIGHT_JOIN_SUFFIX,
        }
        return data.loc[:, name + suffixes[root_table]].rename(result_name
                                                               or name)

    data_columns = frozenset(data.columns)
    additional_scope = {
        t: map_new_column_names_to_data(
            remap_overlapping_column_names(parent_table_op, t, data_columns),
            data,
        )
        for t in op.root_tables()
    }

    new_scope = toolz.merge(scope, additional_scope)
    result = execute(expr, scope=new_scope, **kwargs)
    assert result_name is not None, 'Column selection name is None'
    if np.isscalar(result):
        return pd.Series(
            np.repeat(result, len(data.index)),
            index=data.index,
            name=result_name,
        )
    return result.rename(result_name)
Exemple #5
0
def compute_projection_column_expr(expr, parent, data, scope=None, **kwargs):
    result_name = getattr(expr, '_name', None)
    op = expr.op()
    parent_table_op = parent.table.op()

    if isinstance(op, ops.TableColumn):
        # slightly faster path for simple column selection
        name = op.name

        if name in data:
            return data[name].rename(result_name or name)

        if not isinstance(parent_table_op, ops.Join):
            raise KeyError(name)

        root_table, = op.root_tables()
        left_root, right_root = ops.distinct_roots(
            parent_table_op.left, parent_table_op.right
        )
        suffixes = {
            left_root: constants.LEFT_JOIN_SUFFIX,
            right_root: constants.RIGHT_JOIN_SUFFIX,
        }
        return data.loc[:, name + suffixes[root_table]].rename(
            result_name or name
        )

    data_columns = frozenset(data.columns)
    additional_scope = {
        t: map_new_column_names_to_data(
            remap_overlapping_column_names(parent_table_op, t, data_columns),
            data,
        )
        for t in op.root_tables()
    }

    new_scope = toolz.merge(scope, additional_scope)
    result = execute(expr, scope=new_scope, **kwargs)
    assert result_name is not None, 'Column selection name is None'
    return result.rename(result_name)
Exemple #6
0
def remap_overlapping_column_names(table_op, root_table, data_columns):
    """Return an ``OrderedDict`` mapping possibly suffixed column names to
    column names without suffixes.

    Parameters
    ----------
    table_op : TableNode
        The ``TableNode`` we're selecting from.
    root_table : TableNode
        The root table of the expression we're selecting from.
    data_columns : set or frozenset
        The available columns to select from

    Returns
    -------
    mapping : OrderedDict[str, str]
        A map from possibly-suffixed column names to column names without
        suffixes.
    """
    if not isinstance(table_op, ops.Join):
        return None

    left_root, right_root = ops.distinct_roots(table_op.left, table_op.right)
    suffixes = {
        left_root: constants.LEFT_JOIN_SUFFIX,
        right_root: constants.RIGHT_JOIN_SUFFIX,
    }
    column_names = [
        ({name, name + suffixes[root_table]} & data_columns, name)
        for name in root_table.schema.names
    ]
    mapping = OrderedDict(
        (first(col_name), final_name)
        for col_name, final_name in column_names
        if col_name
    )
    return mapping