def parse_grpexpr(grp_ctx, tree, row_ctx, maker): if isinstance(tree, _ast.Call) and tree.func.id in FUNC_MAP: if tree.keywords: raise Exception('Keyword arguments are not supported in %r' % (tree.func.id,)) return FUNC_MAP[tree.func.id]( maker, list(parse_expr(row_ctx, arg) for arg in tree.args) ) else: return parse_expr(grp_ctx, tree, partial(parse_grpexpr, maker=maker, row_ctx=row_ctx))
def awk_grp(data_desc, key_str, grp_expr_tuples, output_only_assigned=True, expose_groups=False): namer = Namer() acc_maker = GrpExprFuncMaker('__acc_', namer) grp_maker = GrpExprFuncMaker('__grp_', namer) key_ctx = ExprContext(data_desc, namer) row_ctx = ExprContext(data_desc, namer) acc_ctx = ExprContext(DataDesc([],[]), namer) grp_ctx = ExprContext(DataDesc([],[]), namer) out_ctx = ExprContext(DataDesc([],[]), namer) # parse key expr keys = [] key_ins_pos = 0 for node in parse(key_str or '1').body: assigned_name = None if isinstance(node, _ast.Assign): if len(node.targets) != 1 or not isinstance(node.targets[0], _ast.Name): raise Exception('Bad assignment in %r' % (key_str,)) expr = parse_expr(key_ctx, node.value) assigned_name = node.targets[0].id else: expr = parse_expr(key_ctx, node) if output_only_assigned: assign = False else: if isinstance(node, _ast.Expr) and isinstance(node.value, _ast.Name): assigned_name = node.value.id else: raise Exception('Please assign expression to a variable in %r' % (key_str,)) key_name = namer.get_name('__key', expr.tostr()) key_row_name = namer.get_name('__row_key', expr.tostr()) if assigned_name: out_ctx.set_var( key_name, RowExprAssign(key_name, expr) ) out_ctx.set_var( assigned_name, RowExprAssign(assigned_name, RowExprVar(out_ctx, key_name)), insert_at = key_ins_pos ) key_ins_pos += 1 grp_ctx.set_var( assigned_name, RowExprAssign(assigned_name, RowExprVar(out_ctx, key_name)), ) if isinstance(expr, RowExprField): # force str assuming if node is field expr = RowExprOp('', [expr, RowExprConst("")]) keys.append((expr, key_name, key_row_name)) for grp_type, expr_str in grp_expr_tuples: for ast_expr in parse(expr_str).body: if grp_type == 'acc': expr = parse_assign_grpexpr(acc_ctx, ast_expr, row_ctx, acc_maker) acc_ctx.set_var(expr.target, expr) out_ctx.set_var(expr.target, expr) elif grp_type == 'grp': expr = parse_assign_grpexpr(grp_ctx, ast_expr, row_ctx, grp_maker) grp_ctx.set_var(expr.target, expr) out_ctx.set_var(expr.target, expr) else: raise Exception('Unknown grouping type %r' % (grp_type,)) # construct awk script print_awk, output_desc = awk_filter_map_from_context( out_ctx, order = data_desc.order, ) if output_desc is None: raise Exception('No output fields specified') assert not print_awk.end init_grps = AwkBlock() init_accs = AwkBlock() calc_row_keys = AwkBlock() keys_changed = [] update_keys = AwkBlock() update_grps = AwkBlock() update_accs = AwkBlock() end_grps = AwkBlock() for expr, name, row_name in keys: calc_row_keys.append(row_name + ' = ' + expr.tostr()) update_keys.append(name + ' = ' + row_name) keys_changed.append(name + '!=' + row_name) for name, val in find_grp_funcs(grp_ctx): init_grps.append(val.init_str()) update_grps.extend(val.update_str(recursive=True)) end_grps.append(val.end_str()) for name, val in find_grp_funcs(acc_ctx): init_accs.append(val.init_str()) update_accs.extend(val.update_str(recursive=True)) end_grps.append(val.end_str()) keys_changed_str = ' || '.join(keys_changed) awk = AwkScript( begin = ( print_awk.begin + init_grps + init_accs + AwkBlock(['__print_last = ' + str(int(key_str == None))]) ), end = AwkBlock() if expose_groups else AwkBlock([AwkHeadBlock( 'if(NR!=0 || __print_last==1)', end_grps + print_awk.main )]), main = ( calc_row_keys + AwkHeadBlock('if(NR==1)', update_keys) + AwkHeadBlock('else', AwkBlock([ AwkHeadBlock('if(' + keys_changed_str + ')', end_grps + (print_awk.main if not expose_groups else AwkBlock()) + update_keys + init_grps )]) ) + update_grps + update_accs + (print_awk.main if expose_groups else AwkBlock()) ) ) return awk, output_desc