Exemple #1
0
def PyQuery( clauses, prior_locs, returnType ):
  data = []
  data.append( emptyTuple([]) )

  clauses = list(clauses)
  clauses.reverse()

  plan = plan_from_list(clauses)
  plan = rewrite(plan, prior_locs)

  if Debug().print_optimized:
      print("Rewritten query:",plan)
  data = plan.execute(data, prior_locs)
  if returnType == "gen":
    return data
  elif returnType == "list":
    return list(data)
  elif returnType == "set":
    return set(data)
  else:
    return dict(data)
Exemple #2
0
  def wrap(self,subplan,project_list,visible_vars):
    tables = []
    output_tuple_vars = []
    output_vars = []
    output_exprs = {}
    where_exprs = []
    
    symtab = psql_infer_types(subplan,visible_vars)
    
    src = None
    
    for node in subplan.as_list_reversed():
        c = node.op

        if type(c) == For:
            
            # If this is a clause that goes against the database source,
            # record the table name in the output SQL and the output variable 
            # as well.
            
            if c.database:
                src_meta = c.database
                src = c.database['source']
                t = psql_infer_types_expr(get_ast(c.vars[0]),symtab,visible_vars)
                tables.append( { 'table_name': src.table_name if not src.schema_name else "%s.%s" % (src.schema_name,src.table_name),
                                 'tuple_var' : c.vars[0],
                                 'output_schema' : t } )
                output_tuple_vars.append(c.vars[0])
                
            # If this is an outerjoin clause, we will process the nested query
            # and create a nested query that we'll outerjoin with the main query.

            else:
                e = get_ast(c.expr)
                if ( isinstance(e,call_e) and isinstance(e.func, name_e) and e.func.id == 'outer' and
                        isinstance(e.args[0],call_e) and isinstance(e.args[0].func, name_e) and e.args[0].func.id == 'PyQuery' ):
                    nested_clauses = eval(print_ast(e.args[0].args[0]))
                    nested_clauses.reverse()
                    nested_plan = plan_from_list(nested_clauses)

                    nested_tables = []
                    n_output_exprs = {}
                    n_where_exprs = []
                    n_output_vars = []
                    is_tuple_expr = False
                    
                    for nnode in nested_plan.as_list():
                        nc = nnode.op

                        if type(nc) == For:
                            # The expression in this clause has to be a variable that binds to an RDBMSTable
                            source = get_ast(nc.expr)
                            n_src = visible_vars[source.id]

                            nested_tables.append( {'table_name': n_src.table_name if not n_src.schema_name else "%s.%s" % (n_src.schema_name,n_src.table_name),
                                                   'tuple_var': nc.vars[0] } )

                        if type(nc) == Let:
                            e2 = get_ast(nc.expr)
                            e2 = replace_vars(e2,n_output_exprs)
                            n_output_exprs[c.vars[0]] = e2

                        if type(nc) == Where:
                            e2 = get_ast(nc.expr)
                            e2 = replace_vars(e2,n_output_exprs)
                            n_where_exprs.append( e2 )

                        if type(nc) == Select:
                            e2 = get_ast(nc.expr)
                            e2 = replace_vars(e2,n_output_exprs)
                            if isinstance(e2,call_e) and isinstance(e2.func,name_e) and e2.func.id == 'make_pql_tuple':
                                for t in e2.args[0].values:
                                    te = t.values[0].value.replace(" ","")
                                    te_a = te.replace(" ","")
                                    if re.match("\w+\.",te_a):
                                        te_a = "".join( te_a.split(".")[1:] )
                                    if not re.match("^\w+$",te_a):
                                        te_a = '"' + te_a + '"'
                                    ae = t.values[1]
                                    alias = te_a if isinstance(ae,none_literal) else ae.value
                                    n_output_vars.append( {'var':get_ast(te), 'alias':alias} )
                                
                            else:
                                if isinstance(e2,name_e) and psql_infer_types_expr(e2,symtab,visible_vars)['type'] == 'tuple':
                                    is_tuple_expr = True
                                n_output_vars = [ {'var':e2, 'alias':None}  ]

                    output_tuple_vars.append(c.vars[0])
                    output_schema = psql_infer_types_expr(e,symtab,visible_vars)['unit_type']
                    if output_schema['type'] != 'tuple':
                        output_schema = {"values":[ ('#value',{'type':output_schema['type']}) ]}
                    output_schema['values'].append( ('#checkbit',{'type':'boolean'}) )

                    tables.append( {'nested_tables':nested_tables, 
                                    'where':n_where_exprs, 
                                    'output':n_output_vars, 
                                    'output_schema': output_schema,
                                    'tuple_var':c.vars[0], 
                                    'is_tuple_expr':is_tuple_expr,
                                    'clause_vars':nested_plan.defined_vars(),
                                    'outer':True } )
        
        if type(c) == Let:
            
            # We support two types of let clauses, the outerjoin let clause and
            # a basic expression let clause
            e = get_ast(c.expr)

            output_vars.append( c.vars[0])
            
            # We need to scan the expression first and
            # replace all non-tuple variables with the expressions that computed them.
            e = replace_vars(e,output_exprs)
            
            output_exprs[c.vars[0]] = e

            
        if type(c) == Where:
            
            # Add a where clause expression to the query. Again, we need to replace all
            # the non-tuple variables with expressions that computed them.
            e = get_ast(c.expr)
            e = replace_vars(e,output_exprs)
            where_exprs.append( e )
            
    # Project out variables that are not needed above in the plan
    output_tuple_vars = [v for v in output_tuple_vars if v in project_list]
    output_vars = [v for v in output_vars if v in project_list]
                
    sql_query = "SELECT "
    sql_query += ", ".join(['%s.*' % v for v in output_tuple_vars])
    if output_vars:
        if output_tuple_vars:
            sql_query += ", "
        sql_query += ", ".join(['%s as %s' % 
                                    (psql_translate_expr(output_exprs[v],symtab,visible_vars),v) for v in output_vars] )
        
    join_expr = ""
    for (i,t) in enumerate(tables):
        if i!=0 and not 'outer' in t:
            join_expr += ", "

        if 'table_name' in t:
            join_expr +=  "%s as %s" % (t['table_name'],t['tuple_var']) 

        if 'outer' in t:
            subquery = "( SELECT "
            if t['is_tuple_expr']:
                subquery += ' %s.*,true as "#checkbit"' % t['output'][0]['var'].id
            else:
                printed_exprs = [(psql_translate_expr(v['var'],symtab,visible_vars), v['alias']) for v in t['output']]
                printed_exprs.append( ('true','"#checkbit"') )
                subquery += ", ".join(['%s as %s' % (v,a) if a else '%s' % v for (v,a) in printed_exprs])
            subquery += "\nFROM " + ", ".join([ "%s as %s" % (x['table_name'],x['tuple_var']) for x in t['nested_tables']])
            subquery += " ) as %s" % t['tuple_var']
            if t['where']:
              aliases = { a for b in [get_aliases(x) for x in t['where']] for a in b}
              filtered_aliases = aliases.intersection( t['clause_vars'] )
              alias_map = { a:t['tuple_var'] for a in filtered_aliases }
              subquery += "\nON " + " and ".join([psql_translate_expr(w,symtab,visible_vars,alias_map) for w in t['where']])
            else:
              subquery += "\nON true"

            join_expr += " LEFT JOIN " + subquery 
            
    sql_query += "\n"
    sql_query += "FROM "
    sql_query += join_expr
        
    sql_query += "\n"
    if where_exprs:
        sql_query += "WHERE " + " and ".join([psql_translate_expr(w,symtab,visible_vars) for w in where_exprs])
        
    return WrappedSubplan(src, sql_query, tables, output_vars)
Exemple #3
0
def rewrite(plan,visible_vars):
    source_id = 0
    databases = {}
    source_meta = {}
    source_plans = {}
    rest_clauses = []
    
    # Have we seen any group-bys in the plan?
    # In this case we can't push fors or lets any longer
    groupbys_seen = False
    
    # All variables defined by this plan
    defined_vars = set()
    
    # Live variables - all variables that are needed above in the plan
    live_vars = set()
    
    # Variables that were turned into a list by group-by
    list_vars = set()
    
    hints = []
    join_conds = []
    
    for subplan in plan.as_list_reversed():

        # Current operator
        op = subplan.op
 
        # Compute all defined vars
        defined_vars = subplan.defined_vars()

        # Current variables are the union of variables visible outside of the
        # scope of this plan and variables defined by this subplan
        current_vars = defined_vars.union( set(visible_vars) )
        
        # If we see a for clause, we try to find its source, if there's none,
        # we'll create a new source. We'll then push the for clause into the
        # source plans. This can be done for the clauses that don't depend upon
        # any variables.
        if type(op) == For and len(op.vars)==1 and not groupbys_seen:
            source = get_ast(op.expr)
            if (isinstance(source,name_e) 
                    and isinstance(visible_vars.get(source.id), RDBMSTable) ):
                database = visible_vars[source.id]
                meta = {    "type":"database", 
                            "database":database.engine,
                            "source":database
                }
                op.database = meta

                url = database.engine.url
                if not url in databases:
                    source_meta[source_id] = meta
                    databases[url] = source_id
                    source_plans[source_id] = OpTreeNode(op,None)
                    source_id += 1
                else:
                    source_plans[databases[url]] = OpTreeNode(op,source_plans[databases[url]])
               
            # Check if this is an iterator over an outerjoin
            elif good_outerjoin(source,defined_vars,'for'):
                srcs = outerjoin_sources(source.args[0],visible_vars)

                # If we can push this thing into an existing wrapper, do so
                pushed = False
                if all([isinstance(s, RDBMSTable) for s in srcs]):
                    urls = {s.engine.url for s in srcs}
                    if len(urls) == 1 and list(urls)[0] in databases:
                        url = list(urls)[0]
                        sid = databases[url]
                        if source_meta[sid]['source'].supports(source_plans[sid],get_ast(op.expr),visible_vars):
                            source_plans[sid] = OpTreeNode(op,source_plans[sid])
                            pushed = True

                # If we didn't manage to push the outerjoin into the wrapper,
                # add it as another outerjoin source to the plan

                if not pushed:
                    #clauses = eval(print_ast(source.args[0].args[0]))
                    #(clauses,hints,on) = extract_where(clauses,visible_vars)
                    #clauses.reverse()

                    # Create a "broken" outerjoin without a left child, we'll
                    # fix this up when we introduce joins into the plan

                    #op = LeftOuterJoin(on=on, hints=hints)
                    #source_meta[source_id] = {"type":"for_outerjoin",
                    #                          "clauses":clauses, 
                    #                          "hints":hints,
                    #                          "on":on }
                    #source_plans[source_id] = OpTreeNode(op,None,plan_from_list(clauses))
                    #source_id += 1
                    rest_clauses.append(op)
            
            # Check whether the variables used in the operator don't occur in the subplan
            elif op.used_vars().intersection(defined_vars) == set():
                source_meta[source_id] = {"type":"expr", "expr":op.expr}
                source_plans[source_id] = OpTreeNode(op)
                source_id += 1
            else:
                rest_clauses.append(op)
        
        # We can push let clause into one of the sources, if that can't happen, we'll create
        # a new source for it. It can be pushed into an existing source only if it only depends
        # upon the variables in only the source or has no dependencies. 

        elif type(op) == Let and not groupbys_seen:
            expr = get_ast(op.expr)

            # At some point it will be nice to translate a let clause into an outerjoin
            # if it looks like an outerjoin

            if (isinstance(expr,call_e) and isinstance(expr.func,name_e) and expr.func.id == 'PyQuery'):
                if good_outerjoin(expr,defined_vars,'let'):
                    clauses = eval(print_ast(expr.args[0]))
                    (clauses,hints,on) = extract_where(clauses,visible_vars)
                    clauses.reverse()

                    # Create a "broken" outerjoin without a left child, we'll
                    # fix this up when we introduce joins into the plan
                    op = LeftOuterJoin(on=on, hints=hints)
                    source_meta[source_id] = {"type":"let_outerjoin",
                                              "clauses":clauses,
                                              "hints":hints,
                                              "on":on}
                    source_plans[source_id] = OpTreeNode(op, None, plan_from_list(clauses))
                    source_id += 1
                else:
                    rest_clauses.append(op)
            else:

                let_vars = get_all_vars(expr)
                srcs = [s for s in range(source_id) if let_vars - source_plans[s].defined_vars() == set()]
                if len(op.vars)==1 and srcs:
                   src = srcs[0]
                   if source_meta[src]['type'] == 'database':
                       if source_meta[src]['source'].supports(source_plans[src],get_ast(op.expr),visible_vars):
                           source_plans[src] = OpTreeNode(op, source_plans[src])
                       else:
                           rest_clauses.append(op)
                   else:
                       source_plans[src] = OpTreeNode(op,source_plans[src])
                else:
                    rest_clauses.append(op)
            
        # When we see a group-by, we mark all the variables not in the group-by key as list
        # variables. This knowledge will help to figure out whether we can send further clauses
        # that depend on these variables to the source.
        elif type(op) == GroupBy:
            list_vars = current_vars - {x[1] for x in op.groupby_list}
            rest_clauses.append(op)
            groupbys_seen = True
            
        # The where clause is especially important for us, since it includes conditions that
        # we can push to the sources, including join conditions, and also various hints.
        elif type(op) == Where:
            expr = get_ast(op.expr)
            exprs = [expr]
            if isinstance(expr,boolOp_e) and expr.op == 'and':
                exprs = expr.args
            remaining_exprs = []
            for e in exprs:
                # If this is a hint, record the hint
                if isinstance(e,call_e) and isinstance(e.func,name_e) and e.func.id=='hint':
                    hints.append(e)
                else: 
                    # If the entire expression can be pushed to a specific source, do so
                    srcs = [s for s in range(source_id) if get_all_vars(e) - source_plans[s].defined_vars() == set()]
                    if srcs:
                        src = srcs[0]
                        if source_meta[src]['type'] == 'database':
                            if source_meta[src]['source'].supports(source_plans[src],e,visible_vars):
                                source_plans[src] = OpTreeNode(Where(print_ast(e)),source_plans[src])
                            else:
                                remaining_exprs.append(e)
                        else:
                            source_plans[src] = OpTreeNode(Where(print_ast(e)), source_plans[src])
                    
                    # If this looks like a join condition, we'll record it separately. However, we need to
		    # to check that its a real join condition, i.e. doesn't include a refence from the
                    # local variables
                    elif is_join_cond(e) and not get_all_vars(e).intersection(visible_vars):
                        join_conds.append(e)
                    else:
                        remaining_exprs.append(e)
            if remaining_exprs:
                e = remaining_exprs[0] if len(remaining_exprs)==1 else boolOp_e('and',remaining_exprs)
                rest_clauses.append( Where(print_ast(e)) )
        else:
            rest_clauses.append(op)
            
    join = None
    
    # Create a tree of joins if there is more than one source
    if source_id>1:
        last_join = OpTreeNode( Join(), source_plans[0],source_plans[1] )

        if type(source_plans[1].op) == LeftOuterJoin:
            oj_tree = source_plans[1]
            last_join = oj_tree
            oj_tree.left_child = source_plans[0]

            if source_meta[1]['type'] == 'let_outerjoin':
                all_left_vars = oj_tree.left_child.defined_vars()
                new_counter_var = make_cvar()
                new_child = OpTreeNode( Count(new_counter_var), oj_tree.left_child )
                oj_tree.left_child = new_child

                new_last_join = OpTreeNode( MakeList(new_counter_var, all_left_vars), last_join )
                last_join = new_last_join

        for s in range(2,source_id-3):
            if type(source_plans[s].op) == LeftOuterJoin:
                oj_tree = source_plans[s]
                last_join = oj_tree
                oj_tree.left_child = last_join

                if source_meta[s]['type'] == 'let_outerjoin':
                    all_left_vars = oj_tree.left_child.defined_vars()
                    new_counter_var = make_cvar()
                    new_child = OpTreeNode( Count(new_counter_var), oj_tree.left_child )
                    oj_tree.left_child = new_child

                    new_last_join = OpTreeNode( MakeList(new_counter_var, all_left_vars ), last_join)
                    last_join = new_last_join
            else:
                last_join = OpTreeNode( Join(), source_plans[s], last_join )

        
        def find_next_join(tree):
            if isinstance(tree.op, Join) or isinstance(tree.op, LeftOuterJoin):
                return tree
            else:
                return find_next_join(tree.left_child)

        join = find_next_join(last_join)

        # Push join condition to the deepest level
        for cond in join_conds:
            all_cond_vars = get_all_vars(cond)
            deepest_join = join
            while True:
                left_vars = deepest_join.left_child.defined_vars()
                right_vars = deepest_join.right_child.defined_vars()
                if all_cond_vars.intersection(left_vars) == all_cond_vars:
                    deepest_join == find_next_join(deepest_join.left_child)
                elif all_cond_vars.intersection(right_vars) == all_cond_vars:
                    deepest_join == find_next_join(deepest_join.right_child)
                else:
                    break

            left_child_vars = deepest_join.left_child.defined_vars()
            c1 = cond.left
            c2 = cond.comparators[0]

            if get_all_vars(c1).intersection(left_child_vars):
                deepest_join.op.left_conds.append(print_ast(c1))
                deepest_join.op.right_conds.append(print_ast(c2))
            else:
                deepest_join.op.left_conds.append(print_ast(c2))
                deepest_join.op.right_conds.append(print_ast(c1))
        
        # Push in join hints to the level with join conditions
        for hint in hints:
            join_type = hint.args[0].value
            left_var = hint.args[1].value
            right_var = hint.args[2].value
            
            for j in [x for x in join.visit() if type(x)==Join]:
                l_vars = j.left_child.defined_vars()
                r_vars = j.right_child.defined_vars()
                if left_var in l_vars and right_var in r_vars:
                    j.hint = {'join_type':join_type, 'dir':'right'}
                elif left_var in r_vars and right_var in l_vars:
                    j.hint = {'join_type':join_type, 'dir':'left'}
        
    # Build the final plan
    res = None

    if join:
        res = join

    else:
        res = source_plans[0] if source_plans else None

    for c in rest_clauses:
        res = OpTreeNode(c, res)

    res.compute_parents()

    # Iterate over the database sources and translate the queries into
    # database-specific dialects
    for db in databases:
        src_id = databases[db]
        subplan = source_plans[src_id]
        src_meta = source_meta[src_id]
        
        # Compute the project list
        used_var_list = subplan.used_vars_above()
        vars = subplan.defined_vars()
        project_list = used_var_list.intersection(vars)
        
        wrapped = src_meta['source'].wrap(subplan,project_list,visible_vars)
        subplan.replace( OpTreeNode(wrapped) )

    return res
Exemple #4
0
def rewrite(plan, visible_vars):
    source_id = 0
    databases = {}
    source_meta = {}
    source_plans = {}
    rest_clauses = []

    # Have we seen any group-bys in the plan?
    # In this case we can't push fors or lets any longer
    groupbys_seen = False

    # All variables defined by this plan
    defined_vars = set()

    # Live variables - all variables that are needed above in the plan
    live_vars = set()

    # Variables that were turned into a list by group-by
    list_vars = set()

    hints = []
    join_conds = []

    for subplan in plan.as_list_reversed():

        # Current operator
        op = subplan.op

        # Compute all defined vars
        defined_vars = subplan.defined_vars()

        # Current variables are the union of variables visible outside of the
        # scope of this plan and variables defined by this subplan
        current_vars = defined_vars.union(set(visible_vars))

        # If we see a for clause, we try to find its source, if there's none,
        # we'll create a new source. We'll then push the for clause into the
        # source plans. This can be done for the clauses that don't depend upon
        # any variables.
        if type(op) == For and len(op.vars) == 1 and not groupbys_seen:
            source = get_ast(op.expr)
            if (isinstance(source, name_e)
                    and isinstance(visible_vars.get(source.id), RDBMSTable)):
                database = visible_vars[source.id]
                meta = {
                    "type": "database",
                    "database": database.engine,
                    "source": database
                }
                op.database = meta

                url = database.engine.url
                if not url in databases:
                    source_meta[source_id] = meta
                    databases[url] = source_id
                    source_plans[source_id] = OpTreeNode(op, None)
                    source_id += 1
                else:
                    source_plans[databases[url]] = OpTreeNode(
                        op, source_plans[databases[url]])

            # Check if this is an iterator over an outerjoin
            elif good_outerjoin(source, defined_vars, 'for'):
                srcs = outerjoin_sources(source.args[0], visible_vars)

                # If we can push this thing into an existing wrapper, do so
                pushed = False
                if all([isinstance(s, RDBMSTable) for s in srcs]):
                    urls = {s.engine.url for s in srcs}
                    if len(urls) == 1 and list(urls)[0] in databases:
                        url = list(urls)[0]
                        sid = databases[url]
                        if source_meta[sid]['source'].supports(
                                source_plans[sid], get_ast(op.expr),
                                visible_vars):
                            source_plans[sid] = OpTreeNode(
                                op, source_plans[sid])
                            pushed = True

                # If we didn't manage to push the outerjoin into the wrapper,
                # add it as another outerjoin source to the plan

                if not pushed:
                    #clauses = eval(print_ast(source.args[0].args[0]))
                    #(clauses,hints,on) = extract_where(clauses,visible_vars)
                    #clauses.reverse()

                    # Create a "broken" outerjoin without a left child, we'll
                    # fix this up when we introduce joins into the plan

                    #op = LeftOuterJoin(on=on, hints=hints)
                    #source_meta[source_id] = {"type":"for_outerjoin",
                    #                          "clauses":clauses,
                    #                          "hints":hints,
                    #                          "on":on }
                    #source_plans[source_id] = OpTreeNode(op,None,plan_from_list(clauses))
                    #source_id += 1
                    rest_clauses.append(op)

            # Check whether the variables used in the operator don't occur in the subplan
            elif op.used_vars().intersection(defined_vars) == set():
                source_meta[source_id] = {"type": "expr", "expr": op.expr}
                source_plans[source_id] = OpTreeNode(op)
                source_id += 1
            else:
                rest_clauses.append(op)

        # We can push let clause into one of the sources, if that can't happen, we'll create
        # a new source for it. It can be pushed into an existing source only if it only depends
        # upon the variables in only the source or has no dependencies.

        elif type(op) == Let and not groupbys_seen:
            expr = get_ast(op.expr)

            # At some point it will be nice to translate a let clause into an outerjoin
            # if it looks like an outerjoin

            if (isinstance(expr, call_e) and isinstance(expr.func, name_e)
                    and expr.func.id == 'PyQuery'):
                if good_outerjoin(expr, defined_vars, 'let'):
                    clauses = eval(print_ast(expr.args[0]))
                    (clauses, hints, on) = extract_where(clauses, visible_vars)
                    clauses.reverse()

                    # Create a "broken" outerjoin without a left child, we'll
                    # fix this up when we introduce joins into the plan
                    op = LeftOuterJoin(on=on, hints=hints)
                    source_meta[source_id] = {
                        "type": "let_outerjoin",
                        "clauses": clauses,
                        "hints": hints,
                        "on": on
                    }
                    source_plans[source_id] = OpTreeNode(
                        op, None, plan_from_list(clauses))
                    source_id += 1
                else:
                    rest_clauses.append(op)
            else:

                let_vars = get_all_vars(expr)
                srcs = [
                    s for s in range(source_id)
                    if let_vars - source_plans[s].defined_vars() == set()
                ]
                if len(op.vars) == 1 and srcs:
                    src = srcs[0]
                    if source_meta[src]['type'] == 'database':
                        if source_meta[src]['source'].supports(
                                source_plans[src], get_ast(op.expr),
                                visible_vars):
                            source_plans[src] = OpTreeNode(
                                op, source_plans[src])
                        else:
                            rest_clauses.append(op)
                    else:
                        source_plans[src] = OpTreeNode(op, source_plans[src])
                else:
                    rest_clauses.append(op)

        # When we see a group-by, we mark all the variables not in the group-by key as list
        # variables. This knowledge will help to figure out whether we can send further clauses
        # that depend on these variables to the source.
        elif type(op) == GroupBy:
            list_vars = current_vars - {x[1] for x in op.groupby_list}
            rest_clauses.append(op)
            groupbys_seen = True

        # The where clause is especially important for us, since it includes conditions that
        # we can push to the sources, including join conditions, and also various hints.
        elif type(op) == Where:
            expr = get_ast(op.expr)
            exprs = [expr]
            if isinstance(expr, boolOp_e) and expr.op == 'and':
                exprs = expr.args
            remaining_exprs = []
            for e in exprs:
                # If this is a hint, record the hint
                if isinstance(e, call_e) and isinstance(
                        e.func, name_e) and e.func.id == 'hint':
                    hints.append(e)
                else:
                    # If the entire expression can be pushed to a specific source, do so
                    srcs = [
                        s for s in range(source_id) if get_all_vars(e) -
                        source_plans[s].defined_vars() == set()
                    ]
                    if srcs:
                        src = srcs[0]
                        if source_meta[src]['type'] == 'database':
                            if source_meta[src]['source'].supports(
                                    source_plans[src], e, visible_vars):
                                source_plans[src] = OpTreeNode(
                                    Where(print_ast(e)), source_plans[src])
                            else:
                                remaining_exprs.append(e)
                        else:
                            source_plans[src] = OpTreeNode(
                                Where(print_ast(e)), source_plans[src])

                    # If this looks like a join condition, we'll record it separately. However, we need to
# to check that its a real join condition, i.e. doesn't include a refence from the
# local variables
                    elif is_join_cond(e) and not get_all_vars(e).intersection(
                            visible_vars):
                        join_conds.append(e)
                    else:
                        remaining_exprs.append(e)
            if remaining_exprs:
                e = remaining_exprs[0] if len(
                    remaining_exprs) == 1 else boolOp_e(
                        'and', remaining_exprs)
                rest_clauses.append(Where(print_ast(e)))
        else:
            rest_clauses.append(op)

    join = None

    # Create a tree of joins if there is more than one source
    if source_id > 1:
        last_join = OpTreeNode(Join(), source_plans[0], source_plans[1])

        if type(source_plans[1].op) == LeftOuterJoin:
            oj_tree = source_plans[1]
            last_join = oj_tree
            oj_tree.left_child = source_plans[0]

            if source_meta[1]['type'] == 'let_outerjoin':
                all_left_vars = oj_tree.left_child.defined_vars()
                new_counter_var = make_cvar()
                new_child = OpTreeNode(Count(new_counter_var),
                                       oj_tree.left_child)
                oj_tree.left_child = new_child

                new_last_join = OpTreeNode(
                    MakeList(new_counter_var, all_left_vars), last_join)
                last_join = new_last_join

        for s in range(2, source_id - 3):
            if type(source_plans[s].op) == LeftOuterJoin:
                oj_tree = source_plans[s]
                last_join = oj_tree
                oj_tree.left_child = last_join

                if source_meta[s]['type'] == 'let_outerjoin':
                    all_left_vars = oj_tree.left_child.defined_vars()
                    new_counter_var = make_cvar()
                    new_child = OpTreeNode(Count(new_counter_var),
                                           oj_tree.left_child)
                    oj_tree.left_child = new_child

                    new_last_join = OpTreeNode(
                        MakeList(new_counter_var, all_left_vars), last_join)
                    last_join = new_last_join
            else:
                last_join = OpTreeNode(Join(), source_plans[s], last_join)

        def find_next_join(tree):
            if isinstance(tree.op, Join) or isinstance(tree.op, LeftOuterJoin):
                return tree
            else:
                return find_next_join(tree.left_child)

        join = find_next_join(last_join)

        # Push join condition to the deepest level
        for cond in join_conds:
            all_cond_vars = get_all_vars(cond)
            deepest_join = join
            while True:
                left_vars = deepest_join.left_child.defined_vars()
                right_vars = deepest_join.right_child.defined_vars()
                if all_cond_vars.intersection(left_vars) == all_cond_vars:
                    deepest_join == find_next_join(deepest_join.left_child)
                elif all_cond_vars.intersection(right_vars) == all_cond_vars:
                    deepest_join == find_next_join(deepest_join.right_child)
                else:
                    break

            left_child_vars = deepest_join.left_child.defined_vars()
            c1 = cond.left
            c2 = cond.comparators[0]

            if get_all_vars(c1).intersection(left_child_vars):
                deepest_join.op.left_conds.append(print_ast(c1))
                deepest_join.op.right_conds.append(print_ast(c2))
            else:
                deepest_join.op.left_conds.append(print_ast(c2))
                deepest_join.op.right_conds.append(print_ast(c1))

        # Push in join hints to the level with join conditions
        for hint in hints:
            join_type = hint.args[0].value
            left_var = hint.args[1].value
            right_var = hint.args[2].value

            for j in [x for x in join.visit() if type(x) == Join]:
                l_vars = j.left_child.defined_vars()
                r_vars = j.right_child.defined_vars()
                if left_var in l_vars and right_var in r_vars:
                    j.hint = {'join_type': join_type, 'dir': 'right'}
                elif left_var in r_vars and right_var in l_vars:
                    j.hint = {'join_type': join_type, 'dir': 'left'}

    # Build the final plan
    res = None

    if join:
        res = join

    else:
        res = source_plans[0] if source_plans else None

    for c in rest_clauses:
        res = OpTreeNode(c, res)

    res.compute_parents()

    # Iterate over the database sources and translate the queries into
    # database-specific dialects
    for db in databases:
        src_id = databases[db]
        subplan = source_plans[src_id]
        src_meta = source_meta[src_id]

        # Compute the project list
        used_var_list = subplan.used_vars_above()
        vars = subplan.defined_vars()
        project_list = used_var_list.intersection(vars)

        wrapped = src_meta['source'].wrap(subplan, project_list, visible_vars)
        subplan.replace(OpTreeNode(wrapped))

    return res