def top_output_as_value(stmt: pgast.Query, *, env: context.Environment) -> pgast.Query: """Finalize output serialization on the top level.""" if env.output_format == context.OutputFormat.JSON: # For JSON we just want to aggregate the whole thing # into a JSON array. subrvar = pgast.RangeSubselect( subquery=stmt, alias=pgast.Alias(aliasname=env.aliases.get('aggw'))) stmt_res = stmt.target_list[0] if stmt_res.name is None: stmt_res = stmt.target_list[0] = pgast.ResTarget( name=env.aliases.get('v'), val=stmt_res.val, ) new_val = pgast.FuncCall(name=('jsonb_agg', ), args=[pgast.ColumnRef(name=[stmt_res.name])]) new_val = pgast.CoalesceExpr( args=[new_val, pgast.StringConstant(val='[]')]) result = pgast.SelectStmt(target_list=[pgast.ResTarget(val=new_val)], from_clause=[subrvar]) result.ctes = stmt.ctes stmt.ctes = [] return result else: return stmt
def fini_dml_stmt(ir_stmt: irast.MutatingStmt, wrapper: pgast.Query, dml_cte: pgast.CommonTableExpr, dml_rvar: pgast.BaseRangeVar, *, parent_ctx: context.CompilerContextLevel, ctx: context.CompilerContextLevel) -> pgast.Query: # Record the effect of this insertion in the relation overlay # context to ensure that the RETURNING clause potentially # referencing this class yields the expected results. if isinstance(ir_stmt, irast.InsertStmt): dbobj.add_rel_overlay(ir_stmt.subject.scls, 'union', dml_cte, env=ctx.env) elif isinstance(ir_stmt, irast.DeleteStmt): dbobj.add_rel_overlay(ir_stmt.subject.scls, 'except', dml_cte, env=ctx.env) if parent_ctx.toplevel_stmt is wrapper: ret_ref = pathctx.get_path_identity_var(wrapper, ir_stmt.subject.path_id, env=parent_ctx.env) count = pgast.FuncCall(name=('count', ), args=[ret_ref]) wrapper.target_list = [pgast.ResTarget(val=count)] clauses.fini_stmt(wrapper, ctx, parent_ctx) return wrapper
def get_path_output_or_null( rel: pgast.Query, path_id: irast.PathId, *, aspect: str, env: context.Environment) -> \ typing.Tuple[pgast.OutputVar, bool]: path_id = map_path_id(path_id, rel.view_path_id_map) ref = maybe_get_path_output(rel, path_id, aspect=aspect, env=env) if ref is not None: return ref, False alt_aspect = get_less_specific_aspect(path_id, aspect) if alt_aspect is not None: ref = maybe_get_path_output(rel, path_id, aspect=alt_aspect, env=env) if ref is not None: rel.path_outputs[path_id, aspect] = ref return ref, False alias = env.aliases.get('null') restarget = pgast.ResTarget(name=alias, val=pgast.Constant(val=None)) if hasattr(rel, 'returning_list'): rel.returning_list.append(restarget) else: rel.target_list.append(restarget) ref = pgast.ColumnRef(name=[alias], nullable=True) rel.path_outputs[path_id, aspect] = ref return ref, True
def get_path_serialized_output(rel: pgast.Query, path_id: irast.PathId, *, env: context.Environment) -> pgast.OutputVar: # Serialized output is a special case, we don't # want this behaviour to be recursive, so it # must be kept outside of get_path_output() generic. aspect = 'serialized' result = rel.path_outputs.get((path_id, aspect)) if result is not None: return result ref = get_path_serialized_or_value_var(rel, path_id, env=env) ref = output.serialize_expr(ref, path_id=path_id, env=env) alias = get_path_output_alias(path_id, aspect, env=env) restarget = pgast.ResTarget(name=alias, val=ref) if hasattr(rel, 'returning_list'): rel.returning_list.append(restarget) else: rel.target_list.append(restarget) result = pgast.ColumnRef(name=[alias], nullable=ref.nullable) rel.path_outputs[path_id, aspect] = result return result
def new_empty_rvar( ir_set: irast.EmptySet, *, ctx: context.CompilerContextLevel) -> pgast.BaseRangeVar: nullref_alias = ctx.env.aliases.get('e') val = typecomp.cast(pgast.Constant(val=None, nullable=True), source_type=ir_set.scls, target_type=ir_set.scls, force=True, env=ctx.env) nullrel = pgast.SelectStmt( target_list=[ pgast.ResTarget( val=val, name=nullref_alias ) ], nullable=True ) rvar = dbobj.rvar_for_rel(nullrel, env=ctx.env) rvar.path_scope.add(ir_set.path_id) rvar.value_scope.add(ir_set.path_id) null_ref = pgast.ColumnRef(name=[nullref_alias], nullable=True) pathctx.put_rvar_path_output(rvar, ir_set.path_id, aspect='value', var=null_ref, env=ctx.env) if ir_set.path_id.is_objtype_path(): pathctx.put_rvar_path_output(rvar, ir_set.path_id, aspect='identity', var=null_ref, env=ctx.env) return rvar
def array_as_json_object(expr, *, stype, env): if stype.element_type.is_tuple(): coldeflist = [] json_args = [] is_named = stype.element_type.named for n, st in stype.element_type.iter_subtypes(): colname = env.aliases.get(str(n)) if is_named: json_args.append(pgast.StringConstant(val=n)) val = pgast.ColumnRef(name=[colname]) if st.is_collection(): val = coll_as_json_object(val, stype=st, env=env) json_args.append(val) coldeflist.append( pgast.ColumnDef( name=colname, typename=pgast.TypeName( name=pgtypes.pg_type_from_object(env.schema, st)))) if is_named: json_func = 'jsonb_build_object' else: json_func = 'jsonb_build_array' return pgast.SelectStmt(target_list=[ pgast.ResTarget( val=pgast.FuncCall(name=('jsonb_agg', ), args=[ pgast.FuncCall( name=(json_func, ), args=json_args, ) ]), ser_safe=True, ) ], from_clause=[ pgast.RangeFunction( alias=pgast.Alias( aliasname=env.aliases.get('q'), ), coldeflist=coldeflist, functions=[ pgast.FuncCall( name=('unnest', ), args=[expr], ) ]) ]) else: return pgast.FuncCall(name=('to_jsonb', ), args=[expr], null_safe=True, ser_safe=True)
def get_path_output_or_null( rel: pgast.Query, path_id: irast.PathId, *, aspect: str, env: context.Environment) -> \ typing.Tuple[pgast.OutputVar, bool]: try: ref = get_path_output(rel, path_id, aspect=aspect, env=env) is_null = False except LookupError: alias = env.aliases.get('null') restarget = pgast.ResTarget(name=alias, val=pgast.Constant(val=None)) if hasattr(rel, 'returning_list'): rel.returning_list.append(restarget) else: rel.target_list.append(restarget) is_null = True ref = pgast.ColumnRef(name=[alias], nullable=True) return ref, is_null
def cast(node: pgast.Base, *, source_type: s_obj.Object, target_type: s_obj.Object, force: bool = False, env: context.Environment) -> pgast.Base: if source_type.name == target_type.name and not force: return node schema = env.schema real_t = schema.get('std::anyreal') int_t = schema.get('std::anyint') json_t = schema.get('std::json') str_t = schema.get('std::str') datetime_t = schema.get('std::datetime') bool_t = schema.get('std::bool') if isinstance(target_type, s_types.Collection): if target_type.schema_name == 'array': if source_type.issubclass(json_t): # If we are casting a jsonb array to array, we do the # following transformation: # EdgeQL: <array<T>>MAP_VALUE # SQL: # SELECT array_agg(j::T) # FROM jsonb_array_elements(MAP_VALUE) AS j inner_cast = cast(pgast.ColumnRef(name=['j']), source_type=source_type, target_type=target_type.element_type, env=env) return pgast.SelectStmt( target_list=[ pgast.ResTarget(val=pgast.FuncCall( name=('array_agg', ), args=[inner_cast])) ], from_clause=[ pgast.RangeFunction(functions=[ pgast.FuncCall(name=('jsonb_array_elements', ), args=[node]) ], alias=pgast.Alias(aliasname='j')) ]) else: # EdgeQL: <array<int64>>['1', '2'] # to SQL: ARRAY['1', '2']::int[] elem_pgtype = pg_types.pg_type_from_object( schema, target_type.element_type, topbase=True) return pgast.TypeCast(arg=node, type_name=pgast.TypeName( name=elem_pgtype, array_bounds=[-1])) else: # `target_type` is not a collection. if (source_type.issubclass(datetime_t) and target_type.issubclass(str_t)): # Normalize datetime to text conversion to have the same # format as one would get by serializing to JSON. # # EdgeQL: <text><datetime>'2010-10-10'; # To SQL: trim(to_json('2010-01-01'::timestamptz)::text, '"') return pgast.FuncCall( name=('trim', ), args=[ pgast.TypeCast(arg=pgast.FuncCall(name=('to_json', ), args=[node]), type_name=pgast.TypeName(name=('text', ))), pgast.Constant(val='"') ]) elif source_type.issubclass(bool_t) and target_type.issubclass(int_t): # PostgreSQL 9.6 doesn't allow to cast 'boolean' to any integer # other than int32: # SELECT 'true'::boolean::bigint; # ERROR: cannot cast type boolean to bigint # So we transform EdgeQL: <int64>BOOL # to SQL: BOOL::int::<targetint> return pgast.TypeCast( arg=pgast.TypeCast(arg=node, type_name=pgast.TypeName(name=('int', ))), type_name=pgast.TypeName( name=pg_types.pg_type_from_scalar(schema, target_type))) elif source_type.issubclass(int_t) and target_type.issubclass(bool_t): # PostgreSQL 9.6 doesn't allow to cast any integer other # than int32 to 'boolean': # SELECT 1::bigint::boolean; # ERROR: cannot cast type bigint to boolea # So we transform EdgeQL: <boolean>INT # to SQL: (INT != 0) return astutils.new_binop(node, pgast.Constant(val=0), op=ast.ops.NE) elif source_type.issubclass(json_t): if (target_type.issubclass(real_t) or target_type.issubclass(bool_t)): # Simply cast to text and the to the target type. return cast(cast(node, source_type=source_type, target_type=str_t, env=env), source_type=str_t, target_type=target_type, env=env) elif target_type.issubclass(str_t): # It's not possible to cast jsonb string to text directly, # so we do a trick: # EdgeQL: <str>JSONB_VAL # SQL: array_to_json(ARRAY[JSONB_VAL])->>0 return astutils.new_binop(pgast.FuncCall( name=('array_to_json', ), args=[pgast.ArrayExpr(elements=[node])]), pgast.Constant(val=0), op='->>') elif target_type.issubclass(json_t): return pgast.TypeCast( arg=node, type_name=pgast.TypeName(name=('jsonb', ))) else: const_type = pg_types.pg_type_from_object(schema, target_type, topbase=True) return pgast.TypeCast(arg=node, type_name=pgast.TypeName(name=const_type)) raise RuntimeError( f'could not cast {source_type.name} to {target_type.name}')
def process_link_values( ir_stmt, ir_expr, target_tab, tab_cols, col_data, dml_rvar, sources, props_only, target_is_scalar, iterator_cte, *, ctx=context.CompilerContext) -> \ typing.Tuple[pgast.CommonTableExpr, typing.List[str]]: """Unpack data from an update expression into a series of selects. :param ir_expr: IR of the INSERT/UPDATE body element. :param target_tab: The link table being updated. :param tab_cols: A sequence of columns in the table being updated. :param col_data: Expressions used to populate well-known columns of the link table such as std::source and std::__type__. :param sources: A list of relations which must be joined into the data query to resolve expressions in *col_data*. :param props_only: Whether this link update only touches link properties. :param target_is_scalar: Whether the link target is an ScalarType. :param iterator_cte: CTE representing the iterator range in the FOR clause of the EdgeQL DML statement. """ with ctx.newscope() as newscope, newscope.newrel() as subrelctx: row_query = subrelctx.rel relctx.include_rvar(row_query, dml_rvar, path_id=ir_stmt.subject.path_id, ctx=subrelctx) subrelctx.path_scope[ir_stmt.subject.path_id] = row_query if iterator_cte is not None: iterator_rvar = dbobj.rvar_for_rel(iterator_cte, lateral=True, env=subrelctx.env) relctx.include_rvar(row_query, iterator_rvar, path_id=iterator_cte.query.path_id, ctx=subrelctx) with subrelctx.newscope() as sctx, sctx.subrel() as input_rel_ctx: input_rel = input_rel_ctx.rel if iterator_cte is not None: input_rel_ctx.path_scope[iterator_cte.query.path_id] = \ row_query input_rel_ctx.expr_exposed = False input_rel_ctx.shape_format = context.ShapeFormat.FLAT input_rel_ctx.volatility_ref = pathctx.get_path_identity_var( row_query, ir_stmt.subject.path_id, env=input_rel_ctx.env) dispatch.compile(ir_expr, ctx=input_rel_ctx) input_stmt = input_rel input_rvar = pgast.RangeSubselect( subquery=input_rel, lateral=True, alias=pgast.Alias(aliasname=ctx.env.aliases.get('val'))) source_data = {} if input_stmt.op is not None: # UNION input_stmt = input_stmt.rarg path_id = ir_expr.path_id output = pathctx.get_path_value_output(input_stmt, path_id, env=ctx.env) if isinstance(output, pgast.TupleVar): for element in output.elements: name = element.path_id.rptr_name() if name is None: name = element.path_id[-1].name colname = common.edgedb_name_to_pg_name(name) source_data.setdefault(colname, dbobj.get_column(input_rvar, element.name)) else: if target_is_scalar: target_ref = pathctx.get_rvar_path_value_var(input_rvar, path_id, env=ctx.env) else: target_ref = pathctx.get_rvar_path_identity_var(input_rvar, path_id, env=ctx.env) source_data['std::target'] = target_ref if not target_is_scalar and 'std::target' not in source_data: target_ref = pathctx.get_rvar_path_identity_var(input_rvar, path_id, env=ctx.env) source_data['std::target'] = target_ref specified_cols = [] for col in tab_cols: expr = col_data.get(col) if expr is None: expr = source_data.get(col) if expr is not None: row_query.target_list.append(pgast.ResTarget(val=expr, name=col)) specified_cols.append(col) row_query.from_clause += list(sources) + [input_rvar] link_rows = pgast.CommonTableExpr(query=row_query, name=ctx.env.aliases.get(hint='r')) return link_rows, specified_cols
def process_link_update( ir_stmt: irast.MutatingStmt, ir_expr: irast.Base, props_only: bool, wrapper: pgast.Query, dml_cte: pgast.CommonTableExpr, iterator_cte: pgast.CommonTableExpr, *, ctx: context.CompilerContextLevel) -> typing.Optional[pgast.Query]: """Perform updates to a link relation as part of a DML statement. :param ir_stmt: IR of the statement. :param ir_expr: IR of the INSERT/UPDATE body element. :param props_only: Whether this link update only touches link properties. :param wrapper: Top-level SQL query. :param dml_cte: CTE representing the SQL INSERT or UPDATE to the main relation of the Object. :param iterator_cte: CTE representing the iterator range in the FOR clause of the EdgeQL DML statement. """ toplevel = ctx.toplevel_stmt edgedb_ptr_tab = pgast.RangeVar( relation=pgast.Relation(schemaname='edgedb', name='pointer'), alias=pgast.Alias(aliasname=ctx.env.aliases.get(hint='ptr'))) ltab_alias = edgedb_ptr_tab.alias.aliasname rptr = ir_expr.rptr ptrcls = rptr.ptrcls target_is_scalar = isinstance(ptrcls.target, s_scalars.ScalarType) path_id = rptr.source.path_id.extend(ptrcls, rptr.direction, rptr.target.scls) # The links in the dml class shape have been derived, # but we must use the correct specialized link class for the # base material type. mptrcls = ptrcls.material_type() # Lookup link class id by link name. lname_to_id = pgast.CommonTableExpr(query=pgast.SelectStmt( from_clause=[edgedb_ptr_tab], target_list=[ pgast.ResTarget(val=pgast.ColumnRef(name=[ltab_alias, 'id'])) ], where_clause=astutils.new_binop( lexpr=pgast.ColumnRef(name=[ltab_alias, 'name']), rexpr=pgast.Constant(val=mptrcls.name), op=ast.ops.EQ)), name=ctx.env.aliases.get(hint='lid')) lname_to_id_rvar = pgast.RangeVar(relation=lname_to_id) toplevel.ctes.append(lname_to_id) target_rvar = dbobj.range_for_ptrcls(mptrcls, '>', include_overlays=False, env=ctx.env) target_alias = target_rvar.alias.aliasname target_tab_name = (target_rvar.relation.schemaname, target_rvar.relation.name) tab_cols = dbobj.cols_for_pointer(mptrcls, env=ctx.env) dml_cte_rvar = pgast.RangeVar( relation=dml_cte, alias=pgast.Alias(aliasname=ctx.env.aliases.get('m'))) col_data = { 'ptr_item_id': pgast.ColumnRef(name=[lname_to_id.name, 'id']), 'std::source': pathctx.get_rvar_path_identity_var(dml_cte_rvar, ir_stmt.subject.path_id, env=ctx.env) } # Drop all previous link records for this source. delcte = pgast.CommonTableExpr(query=pgast.DeleteStmt( relation=target_rvar, where_clause=astutils.new_binop( lexpr=col_data['std::source'], op=ast.ops.EQ, rexpr=pgast.ColumnRef(name=[target_alias, 'std::source'])), using_clause=[dml_cte_rvar], returning_list=[ pgast.ResTarget(val=pgast.ColumnRef( name=[target_alias, pgast.Star()])) ]), name=ctx.env.aliases.get(hint='d')) pathctx.put_path_value_rvar(delcte.query, path_id.ptr_path(), target_rvar, env=ctx.env) # Record the effect of this removal in the relation overlay # context to ensure that the RETURNING clause potentially # referencing this link yields the expected results. overlays = ctx.env.rel_overlays[ptrcls.shortname] overlays.append(('except', delcte)) toplevel.ctes.append(delcte) # Turn the IR of the expression on the right side of := # into a subquery returning records for the link table. data_cte, specified_cols = process_link_values(ir_stmt, ir_expr, target_tab_name, tab_cols, col_data, dml_cte_rvar, [lname_to_id_rvar], props_only, target_is_scalar, iterator_cte, ctx=ctx) toplevel.ctes.append(data_cte) data_select = pgast.SelectStmt( target_list=[ pgast.ResTarget(val=pgast.ColumnRef( name=[data_cte.name, pgast.Star()])) ], from_clause=[pgast.RangeVar(relation=data_cte)]) # Inserting rows into the link table may produce cardinality # constraint violations, since the INSERT into the link table # is executed in the snapshot where the above DELETE from # the link table is not visible. Hence, we need to use # the ON CONFLICT clause to resolve this. conflict_cols = ['std::source', 'std::target', 'ptr_item_id'] conflict_inference = [] conflict_exc_row = [] for col in conflict_cols: conflict_inference.append(pgast.ColumnRef(name=[col])) conflict_exc_row.append(pgast.ColumnRef(name=['excluded', col])) conflict_data = pgast.SelectStmt( target_list=[ pgast.ResTarget(val=pgast.ColumnRef( name=[data_cte.name, pgast.Star()])) ], from_clause=[pgast.RangeVar(relation=data_cte)], where_clause=astutils.new_binop( lexpr=pgast.ImplicitRowExpr(args=conflict_inference), rexpr=pgast.ImplicitRowExpr(args=conflict_exc_row), op='=')) cols = [pgast.ColumnRef(name=[col]) for col in specified_cols] updcte = pgast.CommonTableExpr( name=ctx.env.aliases.get(hint='i'), query=pgast.InsertStmt( relation=target_rvar, select_stmt=data_select, cols=cols, on_conflict=pgast.OnConflictClause( action='update', infer=pgast.InferClause(index_elems=conflict_inference), target_list=[ pgast.MultiAssignRef(columns=cols, source=conflict_data) ]), returning_list=[ pgast.ResTarget(val=pgast.ColumnRef(name=[pgast.Star()])) ])) pathctx.put_path_value_rvar(updcte.query, path_id.ptr_path(), target_rvar, env=ctx.env) # Record the effect of this insertion in the relation overlay # context to ensure that the RETURNING clause potentially # referencing this link yields the expected results. overlays = ctx.env.rel_overlays[ptrcls.shortname] overlays.append(('union', updcte)) toplevel.ctes.append(updcte) return data_cte
def process_insert_body(ir_stmt: irast.MutatingStmt, wrapper: pgast.Query, insert_cte: pgast.CommonTableExpr, insert_rvar: pgast.BaseRangeVar, *, ctx: context.CompilerContextLevel) -> None: """Generate SQL DML CTEs from an InsertStmt IR. :param ir_stmt: IR of the statement. :param wrapper: Top-level SQL query. :param insert_cte: CTE representing the SQL INSERT to the main relation of the Object. """ cols = [pgast.ColumnRef(name=['std::__type__'])] select = pgast.SelectStmt(target_list=[]) values = select.target_list # The main INSERT query of this statement will always be # present to insert at least the std::id and std::__type__ # links. insert_stmt = insert_cte.query insert_stmt.cols = cols insert_stmt.select_stmt = select if ir_stmt.parent_stmt is not None: iterator_set = ir_stmt.parent_stmt.iterator_stmt else: iterator_set = None if iterator_set is not None: with ctx.substmt() as ictx: ictx.path_scope = ictx.path_scope.new_child() ictx.path_scope[iterator_set.path_id] = ictx.rel clauses.compile_iterator_expr(ictx.rel, iterator_set, ctx=ictx) ictx.rel.path_id = iterator_set.path_id pathctx.put_path_bond(ictx.rel, iterator_set.path_id) iterator_cte = pgast.CommonTableExpr( query=ictx.rel, name=ctx.env.aliases.get('iter')) ictx.toplevel_stmt.ctes.append(iterator_cte) iterator_rvar = dbobj.rvar_for_rel(iterator_cte, env=ctx.env) relctx.include_rvar(select, iterator_rvar, path_id=ictx.rel.path_id, ctx=ctx) iterator_id = pathctx.get_path_identity_var(select, iterator_set.path_id, env=ctx.env) else: iterator_cte = None iterator_id = None values.append( pgast.ResTarget(val=pgast.SelectStmt( target_list=[pgast.ResTarget(val=pgast.ColumnRef(name=['id']))], from_clause=[ pgast.RangeVar(relation=pgast.Relation(name='objecttype', schemaname='edgedb')) ], where_clause=astutils.new_binop( op=ast.ops.EQ, lexpr=pgast.ColumnRef(name=['name']), rexpr=pgast.Constant(val=ir_stmt.subject.scls.shortname))))) external_inserts = [] tuple_elements = [] parent_link_props = [] with ctx.newrel() as subctx: subctx.rel = select subctx.rel_hierarchy[select] = insert_stmt subctx.expr_exposed = False subctx.shape_format = context.ShapeFormat.FLAT if iterator_cte is not None: subctx.path_scope = ctx.path_scope.new_child() subctx.path_scope[iterator_cte.query.path_id] = select # Process the Insert IR and separate links that go # into the main table from links that are inserted into # a separate link table. for shape_el in ir_stmt.subject.shape: rptr = shape_el.rptr ptrcls = rptr.ptrcls.material_type() if (ptrcls.is_link_property() and rptr.source.path_id != ir_stmt.subject.path_id): parent_link_props.append(shape_el) continue ptr_info = pg_types.get_pointer_storage_info( ptrcls, schema=subctx.env.schema, resolve_type=True, link_bias=False) props_only = False # First, process all local link inserts. if ptr_info.table_type == 'ObjectType': props_only = True field = pgast.ColumnRef(name=[ptr_info.column_name]) cols.append(field) insvalue = insert_value_for_shape_element(insert_stmt, wrapper, ir_stmt, shape_el, iterator_id, ptr_info=ptr_info, ctx=subctx) tuple_el = astutils.tuple_element_for_shape_el(shape_el, field) tuple_elements.append(tuple_el) values.append(pgast.ResTarget(val=insvalue)) ptr_info = pg_types.get_pointer_storage_info(ptrcls, resolve_type=False, link_bias=True) if ptr_info and ptr_info.table_type == 'link': external_inserts.append((shape_el, props_only)) if iterator_cte is not None: cols.append(pgast.ColumnRef(name=['__edb_token'])) values.append(pgast.ResTarget(val=iterator_id)) pathctx.put_path_identity_var(insert_stmt, iterator_set.path_id, cols[-1], force=True, env=subctx.env) pathctx.put_path_bond(insert_stmt, iterator_set.path_id) toplevel = ctx.toplevel_stmt toplevel.ctes.append(insert_cte) # Process necessary updates to the link tables. for shape_el, props_only in external_inserts: process_link_update(ir_stmt, shape_el, props_only, wrapper, insert_cte, iterator_cte, ctx=ctx) if parent_link_props: prop_elements = [] with ctx.newscope() as scopectx: scopectx.rel = wrapper for shape_el in parent_link_props: rptr = shape_el.rptr scopectx.path_scope[rptr.source.path_id] = wrapper pathctx.put_path_rvar_if_not_exists(wrapper, rptr.source.path_id, insert_rvar, aspect='value', env=scopectx.env) dispatch.compile(shape_el, ctx=scopectx) tuple_el = astutils.tuple_element_for_shape_el(shape_el, None) prop_elements.append(tuple_el) valtuple = pgast.TupleVar(elements=prop_elements, named=True) pathctx.put_path_value_var(wrapper, ir_stmt.subject.path_id, valtuple, force=True, env=ctx.env)
def _get_path_output(rel: pgast.BaseRelation, path_id: irast.PathId, *, aspect: str, allow_nullable: bool = True, ptr_info: typing.Optional[ pg_types.PointerStorageInfo] = None, env: context.Environment) -> pgast.OutputVar: result = rel.path_outputs.get((path_id, aspect)) if result is not None: return result if is_terminal_relation(rel): return _get_rel_path_output(rel, path_id, aspect=aspect, ptr_info=ptr_info, env=env) else: ref = get_path_var(rel, path_id, aspect=aspect, env=env) other_output = find_path_output(rel, path_id, ref, env=env) if other_output is not None: rel.path_outputs[path_id, aspect] = other_output return other_output if isinstance(ref, pgast.TupleVar): elements = [] for el in ref.elements: el_path_id = reverse_map_path_id(el.path_id, rel.view_path_id_map) try: # Similarly to get_path_var(), check for outer path_id # first for tuple serialized var disambiguation. element = _get_path_output(rel, el_path_id, aspect=aspect, allow_nullable=False, env=env) except LookupError: element = get_path_output(rel, el_path_id, aspect=aspect, allow_nullable=False, env=env) elements.append( pgast.TupleElement(path_id=el_path_id, name=element)) result = pgast.TupleVar(elements=elements, named=ref.named) else: if astutils.is_set_op_query(rel): assert isinstance(ref, pgast.ColumnRef) result = dbobj.get_column(None, ref) else: alias = get_path_output_alias(path_id, aspect, env=env) restarget = pgast.ResTarget(name=alias, val=ref) if hasattr(rel, 'returning_list'): rel.returning_list.append(restarget) else: rel.target_list.append(restarget) nullable = is_nullable(ref, env=env) if isinstance(ref, pgast.ColumnRef): optional = ref.optional else: optional = None if nullable and not allow_nullable: var = get_path_var(rel, path_id, aspect=aspect, env=env) rel.where_clause = astutils.extend_binop( rel.where_clause, pgast.NullTest(arg=var, negated=True)) nullable = False result = pgast.ColumnRef(name=[alias], nullable=nullable, optional=optional) rel.path_outputs[path_id, aspect] = result return result
def _get_rel_path_output(rel: pgast.BaseRelation, path_id: irast.PathId, *, aspect: str, ptr_info: typing.Optional[ pg_types.PointerStorageInfo] = None, env: context.Environment) -> pgast.OutputVar: if path_id.is_objtype_path(): if aspect == 'identity': aspect = 'value' if aspect != 'value': raise LookupError( f'invalid request for non-scalar path {path_id} {aspect}') if (path_id == rel.path_id or (rel.path_id.is_type_indirection_path() and path_id == rel.path_id.src_path())): path_id = irutils.get_id_path_id(path_id, schema=env.schema) else: if aspect == 'identity': raise LookupError( f'invalid request for scalar path {path_id} {aspect}') elif aspect == 'serialized': aspect = 'value' var = rel.path_outputs.get((path_id, aspect)) if var is not None: return var ptrcls = path_id.rptr() rptr_dir = path_id.rptr_dir() if (rptr_dir is not None and rptr_dir != s_pointers.PointerDirection.Outbound): raise LookupError( f'{path_id} is an inbound pointer and cannot be resolved ' f'on a base relation') if isinstance(rel, pgast.NullRelation): if ptrcls is not None: target = ptrcls.target else: target = path_id.target if ptr_info is not None: name = ptr_info.column_name else: name = env.aliases.get('v') val = typecomp.cast(pgast.Constant(val=None, nullable=True), source_type=target, target_type=target, force=True, env=env) rel.target_list.append(pgast.ResTarget(name=name, val=val)) result = pgast.ColumnRef(name=[name], nullable=True) else: if ptrcls is None: raise ValueError( f'could not resolve trailing pointer class for {path_id}') ptr_info = pg_types.get_pointer_storage_info(ptrcls, resolve_type=False, link_bias=False) result = pgast.ColumnRef(name=[ptr_info.column_name], nullable=not ptrcls.required) rel.path_outputs[path_id, aspect] = result return result
def compile_GroupStmt(stmt: irast.GroupStmt, *, ctx: context.CompilerContextLevel) -> pgast.Query: parent_ctx = ctx with parent_ctx.substmt() as ctx: clauses.init_stmt(stmt, ctx=ctx, parent_ctx=parent_ctx) group_path_id = stmt.group_path_id # Process the GROUP .. BY part into a subquery. with ctx.subrel() as gctx: gctx.expr_exposed = False gquery = gctx.rel pathctx.put_path_bond(gquery, group_path_id) if stmt.path_scope: ctx.path_scope.update( {path_id: gquery for path_id in stmt.path_scope.paths}) relctx.update_scope(stmt.subject, gquery, ctx=gctx) stmt.subject.path_scope = None clauses.compile_output(stmt.subject, ctx=gctx) subj_rvar = pathctx.get_path_rvar(gquery, stmt.subject.path_id, aspect='value', env=gctx.env) relctx.ensure_bond_for_expr(stmt.subject, subj_rvar.query, ctx=gctx) group_paths = set() part_clause = [] for expr in stmt.groupby: with gctx.new() as subctx: partexpr = dispatch.compile(expr, ctx=subctx) part_clause.append(partexpr) group_paths.add(expr.path_id) # Since we will be computing arbitrary expressions # based on the grouped sets, it is more efficient # to compute the "group bond" as a small unique # value than it is to use GROUP BY and aggregate # actual id values into an array. # # To achieve this we use the first_value() window # function while using the GROUP BY clause as # a partition clause. We use the id of the first # object in each partition if GROUP BY input is # a ObjectType, otherwise we generate the id using # row_number(). if isinstance(stmt.subject.scls, s_objtypes.ObjectType): first_val = pathctx.get_path_identity_var(gquery, stmt.subject.path_id, env=ctx.env) else: with ctx.subrel() as subctx: wrapper = subctx.rel gquery_rvar = dbobj.rvar_for_rel(gquery, env=ctx.env) wrapper.from_clause = [gquery_rvar] relctx.pull_path_namespace(target=wrapper, source=gquery_rvar, ctx=subctx) new_part_clause = [] for i, expr in enumerate(part_clause): path_id = stmt.groupby[i].path_id pathctx.put_path_value_var(gquery, path_id, expr, force=True, env=ctx.env) output_ref = pathctx.get_path_value_output(gquery, path_id, env=ctx.env) new_part_clause.append( dbobj.get_column(gquery_rvar, output_ref)) part_clause = new_part_clause first_val = pathctx.get_rvar_path_identity_var( gquery_rvar, stmt.subject.path_id, env=ctx.env) gquery = wrapper pathctx.put_path_bond(gquery, group_path_id) group_id = pgast.FuncCall( name=('first_value', ), args=[first_val], over=pgast.WindowDef(partition_clause=part_clause)) pathctx.put_path_identity_var(gquery, group_path_id, group_id, env=ctx.env) pathctx.put_path_value_var(gquery, group_path_id, group_id, env=ctx.env) group_cte = pgast.CommonTableExpr(query=gquery, name=ctx.env.aliases.get('g')) group_cte_rvar = dbobj.rvar_for_rel(group_cte, env=ctx.env) # Generate another subquery contaning distinct values of # path expressions in BY. with ctx.subrel() as gvctx: gvquery = gvctx.rel relctx.include_rvar(gvquery, group_cte_rvar, path_id=group_path_id, ctx=gvctx) pathctx.put_path_bond(gvquery, group_path_id) for group_set in stmt.groupby: dispatch.visit(group_set, ctx=gvctx) path_id = group_set.path_id if path_id.is_objtype_path(): pathctx.put_path_bond(gvquery, path_id) gvquery.distinct_clause = [ pathctx.get_path_identity_var(gvquery, group_path_id, env=ctx.env) ] for path_id, aspect in list(gvquery.path_rvar_map): if path_id not in group_paths and path_id != group_path_id: gvquery.path_rvar_map.pop((path_id, aspect)) for path_id, aspect in list(gquery.path_rvar_map): if path_id in group_paths: gquery.path_rvar_map.pop((path_id, aspect)) gquery.path_namespace.pop((path_id, aspect), None) gquery.path_outputs.pop((path_id, aspect), None) groupval_cte = pgast.CommonTableExpr(query=gvquery, name=ctx.env.aliases.get('gv')) groupval_cte_rvar = dbobj.rvar_for_rel(groupval_cte, env=ctx.env) o_stmt = stmt.result.expr # process the result expression; with ctx.subrel() as selctx: selquery = selctx.rel outer_id = stmt.result.path_id inner_id = o_stmt.result.path_id relctx.include_specific_rvar(selquery, groupval_cte_rvar, group_path_id, aspect='identity', ctx=ctx) for path_id in group_paths: selctx.path_scope[path_id] = selquery pathctx.put_path_rvar(selquery, path_id, groupval_cte_rvar, aspect='value', env=ctx.env) selctx.group_by_rels = selctx.group_by_rels.copy() selctx.group_by_rels[group_path_id, stmt.subject.path_id] = \ group_cte selquery.view_path_id_map = {outer_id: inner_id} selquery.ctes.append(group_cte) sortoutputs = [] selquery.ctes.append(groupval_cte) clauses.compile_output(o_stmt.result, ctx=selctx) # The WHERE clause selquery.where_clause = astutils.extend_binop( selquery.where_clause, clauses.compile_filter_clause(o_stmt.where, ctx=selctx)) for ir_sortexpr in o_stmt.orderby: alias = ctx.env.aliases.get('s') sexpr = dispatch.compile(ir_sortexpr.expr, ctx=selctx) selquery.target_list.append( pgast.ResTarget(val=sexpr, name=alias)) sortoutputs.append(alias) if not gvquery.target_list: # No values were pulled from the group-values rel, # we must remove the DISTINCT clause to prevent # a syntax error. gvquery.distinct_clause[:] = [] query = ctx.rel result_rvar = dbobj.rvar_for_rel(selquery, lateral=True, env=ctx.env) relctx.include_rvar(query, result_rvar, path_id=outer_id, ctx=ctx) for rt in selquery.target_list: if rt.name is None: rt.name = ctx.env.aliases.get('v') if rt.name not in sortoutputs: query.target_list.append( pgast.ResTarget(val=dbobj.get_column(result_rvar, rt.name), name=rt.name)) for i, expr in enumerate(o_stmt.orderby): sort_ref = dbobj.get_column(result_rvar, sortoutputs[i]) sortexpr = pgast.SortBy(node=sort_ref, dir=expr.direction, nulls=expr.nones_order) query.sort_clause.append(sortexpr) # The OFFSET clause if o_stmt.offset: with ctx.new() as ctx1: ctx1.clause = 'offsetlimit' ctx1.expr_exposed = False query.limit_offset = dispatch.compile(o_stmt.offset, ctx=ctx1) # The LIMIT clause if o_stmt.limit: with ctx.new() as ctx1: ctx1.clause = 'offsetlimit' ctx1.expr_exposed = False query.limit_count = dispatch.compile(o_stmt.limit, ctx=ctx1) clauses.fini_stmt(query, ctx, parent_ctx) return query
def range_for_ptrcls( ptrcls: s_links.Link, direction: s_pointers.PointerDirection, *, include_overlays: bool=True, env: context.Environment) -> pgast.BaseRangeVar: """"Return a Range subclass corresponding to a given ptr step. If `ptrcls` is a generic link, then a simple RangeVar is returned, otherwise the return value may potentially be a UNION of all tables corresponding to a set of specialized links computed from the given `ptrcls` taking source inheritance into account. """ linkname = ptrcls.shortname endpoint = ptrcls.source tgt_col = pgtypes.get_pointer_storage_info( ptrcls, resolve_type=False, link_bias=True).column_name cols = [ 'std::source', tgt_col ] set_ops = [] ptrclses = set() for source in {endpoint} | set(endpoint.descendants(env.schema)): # Sift through the descendants to see who has this link try: src_ptrcls = source.pointers[linkname].material_type() except KeyError: # This source has no such link, skip it continue else: if src_ptrcls in ptrclses: # Seen this link already continue ptrclses.add(src_ptrcls) table = table_from_ptrcls(src_ptrcls, env=env) qry = pgast.SelectStmt() qry.from_clause.append(table) qry.rptr_rvar = table # Make sure all property references are pulled up properly for colname in cols: selexpr = pgast.ColumnRef( name=[table.alias.aliasname, colname]) qry.target_list.append( pgast.ResTarget(val=selexpr, name=colname)) set_ops.append(('union', qry)) overlays = env.rel_overlays.get(src_ptrcls.shortname) if overlays and include_overlays: for op, cte in overlays: rvar = pgast.RangeVar( relation=cte, alias=pgast.Alias( aliasname=env.aliases.get(cte.name) ) ) qry = pgast.SelectStmt( target_list=[ pgast.ResTarget( val=pgast.ColumnRef( name=[col] ) ) for col in cols ], from_clause=[rvar], ) set_ops.append((op, qry)) rvar = range_from_queryset(set_ops, ptrcls, env=env) return rvar
def get_path_output(rel: pgast.BaseRelation, path_id: irast.PathId, *, aspect: str, ptr_info: typing.Optional[ pg_types.PointerStorageInfo] = None, env: context.Environment) -> pgast.OutputVar: view_path_id_map = getattr(rel, 'view_path_id_map', None) if view_path_id_map: path_id = map_path_id(path_id, view_path_id_map) result = rel.path_outputs.get((path_id, aspect)) if result is not None: return result if isinstance(rel, pgast.Relation): return _get_rel_path_output(rel, path_id, aspect=aspect, ptr_info=ptr_info, env=env) else: ref = get_path_var(rel, path_id, aspect=aspect, env=env) other_output = find_path_output(rel, path_id, ref, env=env) if other_output is not None: rel.path_outputs[path_id, aspect] = other_output return other_output if isinstance(ref, pgast.TupleVar): elements = [] for el in ref.elements: el_path_id = reverse_map_path_id(el.path_id, rel.view_path_id_map) element = get_path_output(rel, el_path_id, aspect=aspect, env=env) elements.append( pgast.TupleElement(path_id=el_path_id, name=element)) result = pgast.TupleVar(elements=elements, named=ref.named) else: if astutils.is_set_op_query(rel): assert isinstance(ref, pgast.ColumnRef) result = dbobj.get_column(None, ref) else: alias = get_path_output_alias(path_id, aspect, env=env) restarget = pgast.ResTarget(name=alias, val=ref) if hasattr(rel, 'returning_list'): rel.returning_list.append(restarget) else: rel.target_list.append(restarget) if isinstance(ref, pgast.ColumnRef): nullable = ref.nullable optional = ref.optional else: nullable = rel.nullable optional = None result = pgast.ColumnRef(name=[alias], nullable=nullable, optional=optional) rel.path_outputs[path_id, aspect] = result return result