Example #1
0
def parse_grpexpr(grp_ctx, tree, row_ctx, maker):
    if isinstance(tree, _ast.Call) and tree.func.id in FUNC_MAP:
        if tree.keywords:
            raise Exception('Keyword arguments are not supported in %r' % (tree.func.id,))
        return FUNC_MAP[tree.func.id](
            maker, list(parse_expr(row_ctx, arg) for arg in tree.args)
        )
    else:
        return parse_expr(grp_ctx, tree, partial(parse_grpexpr, maker=maker, row_ctx=row_ctx))
Example #2
0
def awk_grp(data_desc, key_str, grp_expr_tuples, output_only_assigned=True, expose_groups=False):
    namer = Namer()
    acc_maker = GrpExprFuncMaker('__acc_', namer)
    grp_maker = GrpExprFuncMaker('__grp_', namer)
    key_ctx = ExprContext(data_desc, namer)
    row_ctx = ExprContext(data_desc, namer)
    acc_ctx = ExprContext(DataDesc([],[]), namer)
    grp_ctx = ExprContext(DataDesc([],[]), namer)
    out_ctx = ExprContext(DataDesc([],[]), namer)

    # parse key expr
    keys = []
    key_ins_pos = 0
    for node in parse(key_str or '1').body:
        assigned_name = None
        if isinstance(node, _ast.Assign):
            if len(node.targets) != 1 or not isinstance(node.targets[0], _ast.Name):
                raise Exception('Bad assignment in %r' % (key_str,))
            expr = parse_expr(key_ctx, node.value)
            assigned_name = node.targets[0].id
        else:
            expr = parse_expr(key_ctx, node)
            if output_only_assigned:
                assign = False
            else:
                if isinstance(node, _ast.Expr) and isinstance(node.value, _ast.Name):
                    assigned_name = node.value.id
                else:
                    raise Exception('Please assign expression to a variable in %r' % (key_str,))
        key_name = namer.get_name('__key', expr.tostr())
        key_row_name = namer.get_name('__row_key', expr.tostr())
        if assigned_name:
            out_ctx.set_var(
                key_name,
                RowExprAssign(key_name, expr)
            )
            out_ctx.set_var(
                assigned_name,
                RowExprAssign(assigned_name, RowExprVar(out_ctx, key_name)),
                insert_at = key_ins_pos
            )
            key_ins_pos += 1
            grp_ctx.set_var(
                assigned_name,
                RowExprAssign(assigned_name, RowExprVar(out_ctx, key_name)),
            )
        if isinstance(expr, RowExprField): # force str assuming if node is field
            expr = RowExprOp('', [expr, RowExprConst("")])
        keys.append((expr, key_name, key_row_name))

    for grp_type, expr_str in grp_expr_tuples:
        for ast_expr in parse(expr_str).body:
            if grp_type == 'acc':
                expr = parse_assign_grpexpr(acc_ctx, ast_expr, row_ctx, acc_maker)
                acc_ctx.set_var(expr.target, expr)
                out_ctx.set_var(expr.target, expr)
            elif grp_type == 'grp':
                expr = parse_assign_grpexpr(grp_ctx, ast_expr, row_ctx, grp_maker)
                grp_ctx.set_var(expr.target, expr)
                out_ctx.set_var(expr.target, expr)
            else:
                raise Exception('Unknown grouping type %r' % (grp_type,))


    # construct awk script
    print_awk, output_desc = awk_filter_map_from_context(
        out_ctx,
        order = data_desc.order,
    )
    if output_desc is None:
        raise Exception('No output fields specified')
    assert not print_awk.end

    init_grps = AwkBlock()
    init_accs = AwkBlock()
    calc_row_keys = AwkBlock()
    keys_changed = []
    update_keys = AwkBlock()
    update_grps = AwkBlock()
    update_accs = AwkBlock()
    end_grps = AwkBlock()

    for expr, name, row_name in keys:
        calc_row_keys.append(row_name + ' = ' + expr.tostr())
        update_keys.append(name + ' = ' + row_name)
        keys_changed.append(name + '!=' + row_name)

    for name, val in find_grp_funcs(grp_ctx):
        init_grps.append(val.init_str())
        update_grps.extend(val.update_str(recursive=True))
        end_grps.append(val.end_str())

    for name, val in find_grp_funcs(acc_ctx):
        init_accs.append(val.init_str())
        update_accs.extend(val.update_str(recursive=True))
        end_grps.append(val.end_str())

    keys_changed_str = ' || '.join(keys_changed)

    awk = AwkScript(
        begin = (
            print_awk.begin
            + init_grps
            + init_accs
            + AwkBlock(['__print_last = ' + str(int(key_str == None))])
        ),
        end = AwkBlock() if expose_groups else AwkBlock([AwkHeadBlock(
            'if(NR!=0 || __print_last==1)',
            end_grps + print_awk.main
        )]),
        main = (
            calc_row_keys
            + AwkHeadBlock('if(NR==1)', update_keys)
            + AwkHeadBlock('else', AwkBlock([
                AwkHeadBlock('if(' + keys_changed_str + ')',
                    end_grps
                    + (print_awk.main if not expose_groups else AwkBlock())
                    + update_keys
                    + init_grps
                )])
            )
            + update_grps
            + update_accs
            + (print_awk.main if expose_groups else AwkBlock())
        )
    )

    return awk, output_desc