def _get_load_associations_query(document, doc_types_to_load): query_parents = DBSession. \ query( Association.parent_document_id.label('id'), Document.type.label('t'), literal_column('1').label('p')). \ join( Document, and_( Association.child_document_id == document.document_id, Association.parent_document_id == Document.document_id, Document.type.in_(doc_types_to_load))). \ subquery() query_children = DBSession. \ query( Association.child_document_id.label('id'), Document.type.label('t'), literal_column('0').label('p')). \ join( Document, and_( Association.child_document_id == Document.document_id, Association.parent_document_id == document.document_id, Document.type.in_(doc_types_to_load))). \ subquery() return DBSession \ .query('id', 't', 'p') \ .select_from(union(query_parents.select(), query_children.select()))
def mostFrequentVariableAndValue(self, variableNameList): """ :type variableNameList: list(str) """ subQueryList = [] if len(variableNameList) == 0: raise EmptyVariableNameListError() with closing(self._sessionMaker()) as session: # For each variable, retrieve all possible values and their occurrence count. for variableName in variableNameList: variableNameColumn = literal(variableName).label(self._VARIABLE_NAME_KEY) variableValueColumn = getattr(SQLModsecurityAuditEntryMessage, variableName).label(self._VARIABLE_VALUE_KEY) variableValueCountColumn = count().label(self._VARIABLE_VALUE_COUNT_KEY) # Subquery of each variable. subQuery = self._makeQuery(session, [variableNameColumn, variableValueColumn, variableValueCountColumn]) subQuery = subQuery.group_by(self._VARIABLE_NAME_KEY, self._VARIABLE_VALUE_KEY) subQueryList.append(subQuery) # Merging all subqueries and sorting by reverse count... query = union(*subQueryList).order_by(desc(self._VARIABLE_VALUE_COUNT_KEY)).limit(1) query = query.order_by(desc(self._VARIABLE_VALUE_COUNT_KEY)).limit(1) # ... then picking the first one. item = session.execute(query).fetchone() if item is not None: return {str(item.variableName): item.variableValue} else: return None
def _get_load_associations_query(document, doc_types_to_load): query_parents = DBSession. \ query( Association.parent_document_id.label('id'), Association.parent_document_type.label('t'), literal_column('1').label('p')). \ filter( and_( Association.child_document_id == document.document_id, Association.parent_document_type.in_(doc_types_to_load) ) ). \ subquery() query_children = DBSession. \ query( Association.child_document_id.label('id'), Association.child_document_type.label('t'), literal_column('0').label('p')). \ filter( and_( Association.parent_document_id == document.document_id, Association.child_document_type.in_(doc_types_to_load) ) ). \ subquery() return DBSession \ .query(column('id'), column('t'), column('p')) \ .select_from(union(query_parents.select(), query_children.select()))
def test_in_26(self): self.assert_compile(self.table1.c.myid.in_( union( select([self.table1.c.myid], self.table1.c.myid == 5), select([self.table1.c.myid], self.table1.c.myid == 12), ) ), "mytable.myid IN ("\ "SELECT mytable.myid FROM mytable WHERE mytable.myid = :myid_1 "\ "UNION SELECT mytable.myid FROM mytable WHERE mytable.myid = :myid_2)")
def test_in_26(self): self.assert_compile(self.table1.c.myid.in_( union( select([self.table1.c.myid], self.table1.c.myid == 5), select([self.table1.c.myid], self.table1.c.myid == 12), ) ), "mytable.myid IN ("\ "SELECT mytable.myid FROM mytable WHERE mytable.myid = :myid_1 "\ "UNION SELECT mytable.myid FROM mytable WHERE mytable.myid = :myid_2)")
def parents(self): session = object_session(self) parent_ids = union( session.query( children.c.parent_id).filter(children.c.child_id == self.id), session.query( embedded.c.parent_id).filter(embedded.c.child_id == self.id)) return session.query(Link).filter( Link.id == parent_ids.c.children_parent_id)
def test_date(session): dates = ( date(2016, 1, 1), date(2016, 1, 2), ) selects = tuple(select((MakeADate(d),)) for d in dates) data = alias(union(*selects, use_labels=True), 'dates') stmt = select((data,)) result = session.execute(stmt).fetchall() assert tuple(chain.from_iterable(result)) == dates
def test_date(session): dates = ( date(2016, 1, 1), date(2016, 1, 2), ) selects = tuple(select((MakeADate(d), )) for d in dates) data = alias(union(*selects, use_labels=True), 'dates') stmt = select((data, )) result = session.execute(stmt).fetchall() assert tuple(chain.from_iterable(result)) == dates
def build_included_union(self, params): selects = [ self.build_single_included(params.fields, subpath) for path in params.include for subpath in subpaths(path) ] union_select = union(*selects).alias() return sa.select([union_select.c.included.label('included')], from_obj=union_select).order_by( union_select.c.included[s('type')], union_select.c.included[s('id')])
def union_select(selectComponents, distinct=False, select_type=TRIPLE_SELECT): """ Helper function for building union all select statement. Terms: u - uri refs v - variables b - bnodes l - literal f - formula Takes a list of: - table name - table alias - table type (literal, type, asserted, quoted) - where clause string """ selects = [] for table, whereClause, tableType in selectComponents: if select_type == COUNT_SELECT: selectClause = table.count(whereClause) elif select_type == CONTEXT_SELECT: selectClause = expression.select([table.c.context], whereClause) elif tableType in FULL_TRIPLE_PARTITIONS: selectClause = table.select(whereClause) elif tableType == ASSERTED_TYPE_PARTITION: selectClause = expression.select([ table.c.id.label("id"), table.c.member.label("subject"), expression.literal(text_type(RDF.type)).label("predicate"), table.c.klass.label("object"), table.c.context.label("context"), table.c.termComb.label("termcomb"), expression.literal_column("NULL").label("objlanguage"), expression.literal_column("NULL").label("objdatatype") ], whereClause) elif tableType == ASSERTED_NON_TYPE_PARTITION: selectClause = expression.select([c for c in table.columns] + [ expression.literal_column("NULL").label("objlanguage"), expression.literal_column("NULL").label("objdatatype") ], whereClause, from_obj=[table]) selects.append(selectClause) order_statement = [] if select_type == TRIPLE_SELECT: order_statement = [ expression.literal_column("subject"), expression.literal_column("predicate"), expression.literal_column("object"), ] if distinct: return expression.union(*selects, **{"order_by": order_statement}) else: return expression.union_all(*selects, **{"order_by": order_statement})
def _get_select_waypoints_for_routes(): waypoint_type = text('\'' + WAYPOINT_TYPE + '\'') route_type = text('\'' + ROUTE_TYPE + '\'') select_linked_waypoints = \ select([ Association.child_document_id.label('route_id'), Association.parent_document_id.label('waypoint_id') ]). \ where( and_( Association.parent_document_type == waypoint_type, Association.child_document_type == route_type)). \ cte('linked_waypoints') select_waypoint_parents = \ select([ select_linked_waypoints.c.route_id, Association.parent_document_id.label('waypoint_id') ]). \ select_from(join( select_linked_waypoints, Association, and_( Association.child_document_id == select_linked_waypoints.c.waypoint_id, Association.parent_document_type == waypoint_type ))). \ cte('waypoint_parents') select_waypoint_grandparents = \ select([ select_waypoint_parents.c.route_id, Association.parent_document_id.label('waypoint_id') ]). \ select_from(join( select_waypoint_parents, Association, and_( Association.child_document_id == select_waypoint_parents.c.waypoint_id, Association.parent_document_type == waypoint_type ))). \ cte('waypoint_grandparents') return union( select_linked_waypoints.select(), select_waypoint_parents.select(), select_waypoint_grandparents.select() ). \ cte('all_waypoints')
def access_list_paths(self, member, prefix=None, include_owned=False, include_containers=True): """Return the list of paths granted to member. Keyword arguments: prefix -- return only paths starting with prefix (default None) include_owned -- return also paths owned by member (default False) include_containers -- return also container paths owned by member (default True) """ xfeatures_xfeaturevals = self.xfeatures.join(self.xfeaturevals) selectable = (self.groups.c.owner + ':' + self.groups.c.name) member_groups = select([selectable.label('value')], self.groups.c.member == member) members = select([literal(member).label('value')]) any = select([literal('*').label('value')]) u = union(member_groups, members, any).alias() inner_join = join(xfeatures_xfeaturevals, u, self.xfeaturevals.c.value == u.c.value) s = select([self.xfeatures.c.path], from_obj=[inner_join]).distinct() if prefix: like = lambda p: self.xfeatures.c.path.like( self.escape_like(p) + '%', escape=ESCAPE_CHAR) s = s.where( or_(*map(like, self.access_inherit(prefix) or [prefix]))) r = self.conn.execute(s) l = [row[0] for row in r.fetchall()] r.close() if include_owned: container_nodes = select( [self.nodes.c.node], self.nodes.c.parent == self.node_lookup(member)) condition = self.nodes.c.parent.in_(container_nodes) if include_containers: condition = or_(condition, self.nodes.c.node.in_(container_nodes)) s = select([self.nodes.c.path], condition) r = self.conn.execute(s) l += [row[0] for row in r.fetchall() if row[0] not in l] r.close() return l
def _get_select_waypoints_for_routes(): waypoint_type = text('\'' + WAYPOINT_TYPE + '\'') route_type = text('\'' + ROUTE_TYPE + '\'') select_linked_waypoints = \ select([ Association.child_document_id.label('route_id'), Association.parent_document_id.label('waypoint_id') ]). \ where( and_( Association.parent_document_type == waypoint_type, Association.child_document_type == route_type)). \ cte('linked_waypoints') select_waypoint_parents = \ select([ select_linked_waypoints.c.route_id, Association.parent_document_id.label('waypoint_id') ]). \ select_from(join( select_linked_waypoints, Association, and_( Association.child_document_id == select_linked_waypoints.c.waypoint_id, Association.parent_document_type == waypoint_type ))). \ cte('waypoint_parents') select_waypoint_grandparents = \ select([ select_waypoint_parents.c.route_id, Association.parent_document_id.label('waypoint_id') ]). \ select_from(join( select_waypoint_parents, Association, and_( Association.child_document_id == select_waypoint_parents.c.waypoint_id, Association.parent_document_type == waypoint_type ))). \ cte('waypoint_grandparents') return union( select_linked_waypoints.select(), select_waypoint_parents.select(), select_waypoint_grandparents.select() ). \ cte('all_waypoints')
def build_included_union(self, params): selects = [ self.build_single_included(params.fields, subpath) for path in params.include for subpath in subpaths(path) ] union_select = union(*selects).alias() return sa.select( [union_select.c.included.label('included')], from_obj=union_select ).order_by( union_select.c.included[s('type')], union_select.c.included[s('id')] )
def career_fit(cls, session, student_id: int): """ returns (top 3) occupations and former students that most closely match the given student """ # get current snapshot info student = cls.find_by_id(session, student_id) # used when filtering students by grad year # d = date.today() # dummy grad date for testing d = date(2023, 1, 1) # get all former students with a gpa within +-.05 of student gpa_q = session.query(cls.id.label("sid"), cls.occupation_id.label("oid"), literal(3).label("sim_score")).\ filter(cls.grad_year < d.year).\ filter(and_(cls.gpa >= (student.gpa - .05), cls.gpa <= (student.gpa + .05))) # get all former students whose strongest subject is same as student strongest_sub_q = session.query(cls.id.label("sid"), cls.occupation_id.label("oid"), literal(2).label("sim_score")).\ join(StudentSnapshot).\ filter(cls.grad_year < d.year).\ filter(StudentSnapshot.strongest_sub_id == student.snapshot.strongest_sub_id) # get all former students whose weakest subject is same as student weakest_sub_q = session.query(cls.id.label("sid"), cls.occupation_id.label("oid"), literal(1).label("sim_score")).\ join(StudentSnapshot).\ filter(cls.grad_year < d.year).\ filter(StudentSnapshot.weakest_sub_id == student.snapshot.weakest_sub_id) # union of above 3 queries fit_values = union(gpa_q, strongest_sub_q, weakest_sub_q).alias("fit_values") sum_func = func.sum(fit_values.c.sim_score).label("total_score") # get top 3 students with highest similarity score top_students = session.query(fit_values.c.sid, sum_func).\ group_by(fit_values.c.sid).\ order_by(sum_func.desc()).\ limit(3).\ all() # get top 3 occupations with highest similarity score top_occupations = session.query(fit_values.c.oid, sum_func).\ group_by(fit_values.c.oid).\ order_by(sum_func.desc()).\ limit(3).\ all() return (top_students, top_occupations)
def get_query(self, req: CamcopsRequest) -> SelectBase: sql_icd9cm = get_diagnosis_report_query( req, diagnosis_class=DiagnosisIcd9CM, item_class=DiagnosisIcd9CMItem, item_fk_fieldname='diagnosis_icd9cm_id', system='ICD-9-CM') sql_icd10 = get_diagnosis_report_query( req, diagnosis_class=DiagnosisIcd10, item_class=DiagnosisIcd10Item, item_fk_fieldname='diagnosis_icd10_id', system='ICD-10') query = union(sql_icd9cm, sql_icd10) query = query.order_by(*ORDER_BY) return query
def access_list_paths(self, member, prefix=None, include_owned=False, include_containers=True): """Return the list of paths granted to member. Keyword arguments: prefix -- return only paths starting with prefix (default None) include_owned -- return also paths owned by member (default False) include_containers -- return also container paths owned by member (default True) """ xfeatures_xfeaturevals = self.xfeatures.join(self.xfeaturevals) selectable = (self.groups.c.owner + ':' + self.groups.c.name) member_groups = select([selectable.label('value')], self.groups.c.member == member) members = select([literal(member).label('value')]) any = select([literal('*').label('value')]) u = union(member_groups, members, any).alias() inner_join = join(xfeatures_xfeaturevals, u, self.xfeaturevals.c.value == u.c.value) s = select([self.xfeatures.c.path], from_obj=[inner_join]).distinct() if prefix: like = lambda p: self.xfeatures.c.path.like( self.escape_like(p) + '%', escape=ESCAPE_CHAR) s = s.where(or_(*map(like, self.access_inherit(prefix) or [prefix]))) r = self.conn.execute(s) l = [row[0] for row in r.fetchall()] r.close() if include_owned: container_nodes = select( [self.nodes.c.node], self.nodes.c.parent == self.node_lookup(member)) condition = self.nodes.c.parent.in_(container_nodes) if include_containers: condition = or_(condition, self.nodes.c.node.in_(container_nodes)) s = select([self.nodes.c.path], condition) r = self.conn.execute(s) l += [row[0] for row in r.fetchall() if row[0] not in l] r.close() return l
def sql(self, corpus): # TODO: needs the same treatment that we gave ``And`` replacing # intersect with join (but will it work?!?) # Alternative idea: # clauses = [] # if instanceof(self.left, Or): # clauses.append(self.left.left.sql(corpus)) # clauses.append(self.left.right.sql(corpus)) # else: # clauses.append(self.left.sql(corpus)) # if instanceof(self.right, Or): # clauses.append(self.right.left.sql(corpus)) # clauses.append(self.right.right.sql(corpus)) # else: # clauses.append(self.right.sql(corpus)) # return union(*clauses) return union(self.left.sql(corpus), self.right.sql(corpus))
def mostFrequentVariableAndValue(self, variableNameList): """ :type variableNameList: list(str) """ subQueryList = [] if len(variableNameList) == 0: raise EmptyVariableNameListError() with closing(self._sessionMaker()) as session: # For each variable, retrieve all possible values and their occurrence count. for variableName in variableNameList: variableNameColumn = literal(variableName).label( self._VARIABLE_NAME_KEY) variableValueColumn = getattr(SQLModsecurityAuditEntryMessage, variableName).label( self._VARIABLE_VALUE_KEY) variableValueCountColumn = count().label( self._VARIABLE_VALUE_COUNT_KEY) # Subquery of each variable. subQuery = self._makeQuery(session, [ variableNameColumn, variableValueColumn, variableValueCountColumn ]) subQuery = subQuery.group_by(self._VARIABLE_NAME_KEY, self._VARIABLE_VALUE_KEY) subQueryList.append(subQuery) # Merging all subqueries and sorting by reverse count... query = union(*subQueryList).order_by( desc(self._VARIABLE_VALUE_COUNT_KEY)).limit(1) query = query.order_by(desc( self._VARIABLE_VALUE_COUNT_KEY)).limit(1) # ... then picking the first one. item = session.execute(query).fetchone() if item is not None: return {str(item.variableName): item.variableValue} else: return None
def keys_and_changes(aliases, by_columns, start, end): keys_and_dates = [] for alias in aliases: from_clause = sqlalchemy.alias(alias.sql_table, alias.basename) where_clause = filter_date_range( from_clause.c[alias.modified_date_column], start, end) if alias.where: where_clause = and_(alias.where, where_clause) # one row for state start and one for end for state_date_column in alias.state_date_columns: keys_and_dates.append( select( by_columns + [literal_column(state_date_column) .label(STATE_START_COLUMN)]) .select_from(from_clause) .where(where_clause)) return union(*keys_and_dates)
def _get_select_children(waypoint): """ Return a WITH query that selects the document ids of the given waypoint, the children and the grand-children of the waypoint. See also: http://docs.sqlalchemy.org/en/latest/core/selectable.html#sqlalchemy.sql.expression.GenerativeSelect.cte # noqa """ select_waypoint = DBSession. \ query( literal_column(str(waypoint.document_id)).label('document_id'), literal_column('1').label('priority')). \ cte('waypoint') # query to get the direct child waypoints select_waypoint_children = DBSession. \ query( Waypoint.document_id, literal_column('0').label('priority')). \ join( Association, and_(Association.child_document_id == Waypoint.document_id, Association.parent_document_id == waypoint.document_id)). \ cte('waypoint_children') # query to get the grand-child waypoints select_waypoint_grandchildren = DBSession. \ query( Waypoint.document_id, literal_column('0').label('priority')). \ select_from(select_waypoint_children). \ join( Association, Association.parent_document_id == select_waypoint_children.c.document_id). \ join( Waypoint, Association.child_document_id == Waypoint.document_id). \ cte('waypoint_grandchildren') return union( select_waypoint.select(), select_waypoint_children.select(), select_waypoint_grandchildren.select()). \ cte('select_all_waypoints')
def get_neighbour_version_ids(version_id, document_id, lang): """ Get the previous and next version for a version of a document with a specific language. """ next_version = DBSession \ .query( DocumentVersion.id.label('id'), literal_column('1').label('t')) \ .filter(DocumentVersion.id > version_id) \ .filter(DocumentVersion.document_id == document_id) \ .filter(DocumentVersion.lang == lang) \ .order_by(DocumentVersion.id) \ .limit(1) \ .subquery() previous_version = DBSession \ .query( DocumentVersion.id.label('id'), literal_column('-1').label('t')) \ .filter(DocumentVersion.id < version_id) \ .filter(DocumentVersion.document_id == document_id) \ .filter(DocumentVersion.lang == lang) \ .order_by(DocumentVersion.id.desc()) \ .limit(1) \ .subquery() query = DBSession \ .query('id', 't') \ .select_from(union( next_version.select(), previous_version.select())) previous_version_id = None next_version_id = None for version, typ in query: if typ == -1: previous_version_id = version else: next_version_id = version return previous_version_id, next_version_id
def _get_select_children(waypoint): """ Return a WITH query that selects the document ids of the given waypoint, the children and the grand-children of the waypoint. See also: http://docs.sqlalchemy.org/en/latest/core/selectable.html#sqlalchemy.sql.expression.GenerativeSelect.cte # noqa """ select_waypoint = DBSession. \ query( literal_column(str(waypoint.document_id)).label('document_id'), literal_column('1').label('priority')). \ cte('waypoint') # query to get the direct child waypoints select_waypoint_children = DBSession. \ query( Waypoint.document_id, literal_column('0').label('priority')). \ join( Association, and_(Association.child_document_id == Waypoint.document_id, Association.parent_document_id == waypoint.document_id)). \ cte('waypoint_children') # query to get the grand-child waypoints select_waypoint_grandchildren = DBSession. \ query( Waypoint.document_id, literal_column('0').label('priority')). \ select_from(select_waypoint_children). \ join( Association, Association.parent_document_id == select_waypoint_children.c.document_id). \ join( Waypoint, Association.child_document_id == Waypoint.document_id). \ cte('waypoint_grandchildren') return union( select_waypoint.select(), select_waypoint_children.select(), select_waypoint_grandchildren.select()). \ cte('select_all_waypoints')
def get_neighbour_version_ids(version_id, document_id, lang): """ Get the previous and next version for a version of a document with a specific language. """ next_version = DBSession \ .query( DocumentVersion.id.label('id'), literal_column('1').label('t')) \ .filter(DocumentVersion.id > version_id) \ .filter(DocumentVersion.document_id == document_id) \ .filter(DocumentVersion.culture == lang) \ .order_by(DocumentVersion.id) \ .limit(1) \ .subquery() previous_version = DBSession \ .query( DocumentVersion.id.label('id'), literal_column('-1').label('t')) \ .filter(DocumentVersion.id < version_id) \ .filter(DocumentVersion.document_id == document_id) \ .filter(DocumentVersion.culture == lang) \ .order_by(DocumentVersion.id.desc()) \ .limit(1) \ .subquery() query = DBSession \ .query('id', 't') \ .select_from(union( next_version.select(), previous_version.select())) previous_version_id = None next_version_id = None for version, typ in query: if typ == -1: previous_version_id = version else: next_version_id = version return previous_version_id, next_version_id
def merge_tables(aliases, merge_type, as_alias, *args, **kwargs): sql_tables = [] for alias in aliases: sql_tables.append(select([alias.sql_table]).where(alias.where)) rename_to = as_alias.split('/')[-1] if merge_type == 'union all': sql = sqlalchemy.alias(union_all(*sql_tables), rename_to) state_date_columns = aliases[0].state_date_columns is_deleted_clause = aliases[0].is_deleted_column elif merge_type == 'union': sql = sqlalchemy.alias(union(*sql_tables), rename_to) state_date_columns = aliases[0].state_date_columns is_deleted_clause = aliases[0].is_deleted_column elif merge_type == 'modifications': sql, is_deleted_clause = merge_changes(aliases, by=kwargs.get('by'), rename_to=rename_to, start=kwargs.get('start'), end=kwargs.get('end')) state_date_columns = [STATE_START_COLUMN, STATE_END_COLUMN] return sql, state_date_columns, is_deleted_clause
def _get_select_children(waypoint): """ Return a WITH query that selects the document ids of the given waypoint, the children and the grand-children of the waypoint. See also: http://docs.sqlalchemy.org/en/latest/core/selectable.html#sqlalchemy.sql.expression.GenerativeSelect.cte # noqa """ select_waypoint = DBSession.query( literal_column(str(waypoint.document_id)).label("document_id"), literal_column("1").label("priority") ).cte("waypoint") # query to get the direct child waypoints select_waypoint_children = ( DBSession.query(Association.child_document_id.label("document_id"), literal_column("0").label("priority")) .filter( and_( Association.child_document_type == WAYPOINT_TYPE, Association.parent_document_id == waypoint.document_id ) ) .cte("waypoint_children") ) # query to get the grand-child waypoints select_waypoint_grandchildren = ( DBSession.query(Association.child_document_id.label("document_id"), literal_column("0").label("priority")) .select_from(select_waypoint_children) .join( Association, and_( Association.parent_document_id == select_waypoint_children.c.document_id, Association.child_document_type == WAYPOINT_TYPE, ), ) .cte("waypoint_grandchildren") ) return union( select_waypoint.select(), select_waypoint_children.select(), select_waypoint_grandchildren.select() ).cte("select_all_waypoints")
def union_select(select_components, distinct=False, select_type=TRIPLE_SELECT): """ Helper function for building union all select statement. Args: select_components (iterable of tuples): Indicates the table and table type (table_name, where_clause_string, table_type) distinct (bool): Whether to eliminate duplicate results select_type (int): From `rdflib_sqlalchemy.constants`. Either `COUNT_SELECT`, `CONTEXT_SELECT`, `TRIPLE_SELECT` """ selects = [] for table, whereClause, tableType in select_components: if select_type == COUNT_SELECT: c = table.c if tableType == ASSERTED_TYPE_PARTITION: cols = [c.member, c.klass] elif tableType in (ASSERTED_LITERAL_PARTITION, ASSERTED_NON_TYPE_PARTITION, QUOTED_PARTITION): cols = [c.subject, c.predicate, c.object] else: raise ValueError( 'Unrecognized table type {}'.format(tableType)) select_clause = expression.select([ functions.count().label('aCount') ]).select_from( expression.select(cols, whereClause).distinct().select_from(table)) elif select_type == CONTEXT_SELECT: select_clause = expression.select([table.c.context], whereClause) elif tableType in FULL_TRIPLE_PARTITIONS: select_clause = table.select(whereClause) elif tableType == ASSERTED_TYPE_PARTITION: select_clause = expression.select([ table.c.id.label("id"), table.c.member.label("subject"), expression.literal(text_type(RDF.type)).label("predicate"), table.c.klass.label("object"), table.c.context.label("context"), table.c.termComb.label("termcomb"), expression.literal_column("NULL").label("objlanguage"), expression.literal_column("NULL").label("objdatatype") ], whereClause) elif tableType == ASSERTED_NON_TYPE_PARTITION: select_clause = expression.select([c for c in table.columns] + [ expression.literal_column("NULL").label("objlanguage"), expression.literal_column("NULL").label("objdatatype") ], whereClause, from_obj=[table]) selects.append(select_clause) order_statement = [] if select_type == TRIPLE_SELECT: order_statement = [ expression.literal_column("subject"), expression.literal_column("predicate"), expression.literal_column("object"), ] if distinct and select_type != COUNT_SELECT: return expression.union(*selects, **{"order_by": order_statement}) else: return expression.union_all(*selects, **{"order_by": order_statement})
def as_clause_base(self, db, include_breakpoints=False): assert self.reduced def base_query(labeled=False): post = with_polymorphic( Post, [], Post.__table__, aliased=False, flat=True) if labeled: return post, db.query(post.id.label("post_id")) else: return post, db.query(post.id) if not self.paths: post, q = base_query(True) return q.filter(False).subquery("relposts") includes_by_level = [[]] excludes_by_level = [[]] ancestry = [] for path in self.paths: while ancestry: if not path.post_path.startswith(ancestry[-1].post_path): ancestry.pop() else: break level = len(ancestry) // 2 if path.positive: while len(includes_by_level) <= level: includes_by_level.append([]) includes_by_level[level].append(path) else: while len(excludes_by_level) <= level: excludes_by_level.append([]) excludes_by_level[level].append(path) ancestry.append(path) max_level = max(len(includes_by_level), len(excludes_by_level)) q = None for level in range(max_level): condition = None # with use_labels, name of final column determined by first query post, q2 = base_query(level == 0) includes = (includes_by_level[level] if level < len(includes_by_level) else []) excludes = (excludes_by_level[level] if level < len(excludes_by_level) else []) include_ids = [path.last_id for path in includes] exclude_ids = [path.last_id for path in excludes] if include_breakpoints: include_ids.extend(exclude_ids) exclude_ids = None if len(includes): condition = or_( post.id.in_(include_ids), *[post.ancestry.like(path.post_path + "%") for path in includes]) if level == 0: q = q2.filter(condition) else: assert condition is not None q2 = q2.filter(condition) # works in postgres, more efficient q = union(q, q2, use_labels=True) # rather than # q = q.union(q2) condition = None post, q2 = base_query() if len(excludes): condition = or_( *[post.ancestry.like(path.post_path + "%") for path in excludes]) if exclude_ids: condition = post.id.in_(exclude_ids) | condition q = except_(q, q2.filter(condition), use_labels=True) # q = q.except_(q2.filter(condition)) condition = None if getattr(q, "c", None) is None: # base query c = q._entities[0] q = q.with_entities(c.expr.label("post_id")) q = q.subquery("relposts") else: # compound query, already has columns q = q.alias("relposts") return q
def get_changed_routes_and_outings_ww(session, last_update): """ Returns the routes and outings when associations between waypoint and waypoint have been created/removed. E.g. when an association between waypoint W1 and W2 is created, all routes associated to W2, all routes associated to the direct children of W2 and all outings associated to these routes have to be updated. For example given the following associations: W1 -> W2, W2 -> W3, W3 -> R1 Route R1 has the following `associated_waypoint_ids`: W3, W2, W1 When association W1 -> W2 is deleted, all routes linked to W2 and all routes linked to the direct waypoint children of W2 (in this case W3) have to be updated. After the update, `associated_waypoint_ids` of R1 is: W3, W2 """ select_changed_waypoints = session. \ query(AssociationLog.child_document_id.label('waypoint_id')). \ filter(and_( AssociationLog.parent_document_type == WAYPOINT_TYPE, AssociationLog.child_document_type == WAYPOINT_TYPE )). \ filter(AssociationLog.written_at >= last_update). \ cte('changed_waypoints') select_changed_waypoint_children = session. \ query(Association.child_document_id.label('waypoint_id')). \ select_from(select_changed_waypoints). \ join( Association, and_( Association.parent_document_id == select_changed_waypoints.c.waypoint_id, Association.child_document_type == WAYPOINT_TYPE )). \ cte('changed_waypoint_children') select_all_changed_waypoints = union( select_changed_waypoints.select(), select_changed_waypoint_children.select()). \ cte('all_changed_waypoints') select_changed_routes = session. \ query( Association.child_document_id.label('route_id') ). \ select_from(select_all_changed_waypoints). \ join( Association, and_( Association.parent_document_id == select_all_changed_waypoints.c.waypoint_id, Association.child_document_type == ROUTE_TYPE )). \ group_by(Association.child_document_id). \ cte('changed_routes') select_changed_outings = session. \ query( Association.child_document_id.label('outing_id')). \ select_from(select_changed_routes). \ join( Association, and_( Association.parent_document_id == select_changed_routes.c.route_id, Association.child_document_type == OUTING_TYPE )). \ group_by(Association.child_document_id). \ cte('changed_outings') select_changed_routes_and_outings = union( session.query( select_changed_routes.c.route_id.label('document_id'), literal(ROUTE_TYPE).label('type') ).select_from(select_changed_routes), session.query( select_changed_outings.c.outing_id.label('document_id'), literal(OUTING_TYPE).label('type') ).select_from(select_changed_outings)). \ cte('changed_routes_and_outings') return session. \ query( select_changed_routes_and_outings.c.document_id, select_changed_routes_and_outings.c.type). \ select_from(select_changed_routes_and_outings). \ all()
def parents(self): session = object_session(self) parent_ids = union(session.query(children.c.parent_id).filter(children.c.child_id == self.id), session.query(embedded.c.parent_id).filter(embedded.c.child_id == self.id)) return session.query(Link).filter(Link.id == parent_ids.c.children_parent_id)
def as_clause_base(self, db, discussion_id, include_breakpoints=False, include_deleted=False): """Express collection as a SQLAlchemy query clause. :param bool include_breakpoints: Include posts where a threadbreak happens :param include_deleted: Include posts in deleted_publication_states. True means only deleted posts, None means all posts, False means only live posts or deleted posts with live descendants. """ assert self.reduced def base_query(labeled=False): post = with_polymorphic(Post, [], Post.__table__, aliased=False, flat=True) content = with_polymorphic(Content, [], Content.__table__, aliased=False, flat=True) if labeled: query = db.query(post.id.label("post_id")) else: query = db.query(post.id) query = query.join(content, (content.id == post.id) & (content.discussion_id == discussion_id)) if include_deleted is not None: if include_deleted: query = query.filter( post.publication_state.in_(deleted_publication_states)) else: query = query.filter(content.tombstone_date == None) return post, query if not self.paths: post, q = base_query(True) return q.filter(False).subquery("relposts") includes_by_level = [[]] excludes_by_level = [[]] ancestry = [] for path in self.paths: while ancestry: if not path.post_path.startswith(ancestry[-1].post_path): ancestry.pop() else: break level = len(ancestry) // 2 if path.positive: while len(includes_by_level) <= level: includes_by_level.append([]) includes_by_level[level].append(path) else: while len(excludes_by_level) <= level: excludes_by_level.append([]) excludes_by_level[level].append(path) ancestry.append(path) max_level = max(len(includes_by_level), len(excludes_by_level)) q = None for level in range(max_level): condition = None # with use_labels, name of final column determined by first query post, q2 = base_query(level == 0) includes = (includes_by_level[level] if level < len(includes_by_level) else []) excludes = (excludes_by_level[level] if level < len(excludes_by_level) else []) include_ids = [path.last_id for path in includes] exclude_ids = [path.last_id for path in excludes] if include_breakpoints: include_ids.extend(exclude_ids) exclude_ids = None if len(includes): ancestry_regex = '^(%s)' % ('|'.join(path.post_path for path in includes)) condition = or_(post.id.in_(include_ids), post.ancestry.op('~', 0, True)(ancestry_regex)) if level == 0: q = q2.filter(condition) else: assert condition is not None q2 = q2.filter(condition) # works in postgres, more efficient q = union(q, q2, use_labels=True) # rather than # q = q.union(q2) condition = None post, q2 = base_query() if len(excludes): ancestry_regex = '^(%s)' % ('|'.join(path.post_path for path in excludes)) condition = post.ancestry.op('~', 0, True)(ancestry_regex) if exclude_ids: condition = post.id.in_(exclude_ids) | condition q = except_(q, q2.filter(condition), use_labels=True) # q = q.except_(q2.filter(condition)) condition = None if getattr(q, "c", None) is None: # base query c = q._entities[0] q = q.with_entities(c.expr.label("post_id")) q = q.subquery("relposts") else: # compound query, already has columns q = q.alias("relposts") return q
def as_clause_base(self, db, include_breakpoints=False, include_deleted=False, include_moderating=None, user_id=None): """Express collection as a SQLAlchemy query clause. :param bool include_breakpoints: Include posts where a threadbreak happens :param include_deleted: Include posts in deleted_publication_states. True means only deleted posts, None means all posts, False means only live posts or deleted posts with live descendants. :param include_moderating: Include posts in SUBMITTED_AWAITING_MODERATION. True means include all those posts, Falsish means none of those posts, a "mine" value means only those belonging to this user. There is not currently a way to only get those posts. (todo?) NOTE: that parameter is interpreted differently in Idea.get_related_posts_query """ assert self.reduced def base_query(labeled=False): post = with_polymorphic(Post, [], Post.__table__, aliased=False, flat=True) content = with_polymorphic(Content, [], Content.__table__, aliased=False, flat=True) if labeled: query = db.query(post.id.label("post_id")) else: query = db.query(post.id) query = query.join(content, content.id == post.id) states = set(countable_publication_states) # Or just published? states.update(deleted_publication_states) if include_deleted is not None: if include_deleted is True: states = set(deleted_publication_states) else: query = query.filter( content.tombstone_date == None) # noqa: E711 if include_moderating is True: states.add(PublicationStates.SUBMITTED_AWAITING_MODERATION) state_condition = post.publication_state.in_(states) if user_id: if include_moderating == "mine": state_condition = state_condition | ( post.publication_state.in_([ PublicationStates.SUBMITTED_AWAITING_MODERATION, PublicationStates.DRAFT ]) & (post.creator_id == user_id)) else: state_condition = state_condition | ( (post.publication_state == PublicationStates.DRAFT) & (post.creator_id == user_id)) query = query.filter(state_condition) return post, query if not self.paths: post, q = base_query(True) return q.filter(False).subquery("relposts") includes_by_level = [[]] excludes_by_level = [[]] ancestry = [] for path in self.paths: while ancestry: if not path.post_path.startswith(ancestry[-1].post_path): ancestry.pop() else: break level = len(ancestry) // 2 if path.positive: while len(includes_by_level) <= level: includes_by_level.append([]) includes_by_level[level].append(path) else: while len(excludes_by_level) <= level: excludes_by_level.append([]) excludes_by_level[level].append(path) ancestry.append(path) max_level = max(len(includes_by_level), len(excludes_by_level)) q = None for level in range(max_level): condition = None # with use_labels, name of final column determined by first query post, q2 = base_query(level == 0) includes = (includes_by_level[level] if level < len(includes_by_level) else []) excludes = (excludes_by_level[level] if level < len(excludes_by_level) else []) include_ids = [path.last_id for path in includes] exclude_ids = [path.last_id for path in excludes] if include_breakpoints: include_ids.extend(exclude_ids) exclude_ids = None if len(includes): ancestry_regex = '^(%s)' % ('|'.join(path.post_path for path in includes)) condition = or_(post.id.in_(include_ids), post.ancestry.op('~', 0, True)(ancestry_regex)) if level == 0: q = q2.filter(condition) else: assert condition is not None q2 = q2.filter(condition) # works in postgres, more efficient q = union(q, q2, use_labels=True) # rather than # q = q.union(q2) condition = None post, q2 = base_query() if len(excludes): ancestry_regex = '^(%s)' % ('|'.join(path.post_path for path in excludes)) condition = post.ancestry.op('~', 0, True)(ancestry_regex) if exclude_ids: condition = post.id.in_(exclude_ids) | condition q = except_(q, q2.filter(condition), use_labels=True) # q = q.except_(q2.filter(condition)) condition = None if getattr(q, "c", None) is None: # base query c = q._entities[0] q = q.with_entities(c.expr.label("post_id")) q = q.subquery("relposts") else: # compound query, already has columns q = q.alias("relposts") return q
def get_changed_routes_and_outings_ww(session, last_update): """ Returns the routes and outings when associations between waypoint and waypoint have been created/removed. E.g. when an association between waypoint W1 and W2 is created, all routes associated to W2, all routes associated to the direct children of W2 and all outings associated to these routes have to be updated. For example given the following associations: W1 -> W2, W2 -> W3, W3 -> R1 Route R1 has the following `associated_waypoint_ids`: W3, W2, W1 When association W1 -> W2 is deleted, all routes linked to W2 and all routes linked to the direct waypoint children of W2 (in this case W3) have to be updated. After the update, `associated_waypoint_ids` of R1 is: W3, W2 """ select_changed_waypoints = session. \ query(AssociationLog.child_document_id.label('waypoint_id')). \ filter(and_( AssociationLog.parent_document_type == WAYPOINT_TYPE, AssociationLog.child_document_type == WAYPOINT_TYPE )). \ filter(AssociationLog.written_at >= last_update). \ cte('changed_waypoints') select_changed_waypoint_children = session. \ query(Association.child_document_id.label('waypoint_id')). \ select_from(select_changed_waypoints). \ join( Association, and_( Association.parent_document_id == select_changed_waypoints.c.waypoint_id, Association.child_document_type == WAYPOINT_TYPE )). \ cte('changed_waypoint_children') select_all_changed_waypoints = union( select_changed_waypoints.select(), select_changed_waypoint_children.select()). \ cte('all_changed_waypoints') select_changed_routes = session. \ query( Association.child_document_id.label('route_id') ). \ select_from(select_all_changed_waypoints). \ join( Association, and_( Association.parent_document_id == select_all_changed_waypoints.c.waypoint_id, Association.child_document_type == ROUTE_TYPE )). \ group_by(Association.child_document_id). \ cte('changed_routes') select_changed_outings = session. \ query( Association.child_document_id.label('outing_id')). \ select_from(select_changed_routes). \ join( Association, and_( Association.parent_document_id == select_changed_routes.c.route_id, Association.child_document_type == OUTING_TYPE )). \ group_by(Association.child_document_id). \ cte('changed_outings') select_changed_routes_and_outings = union( session.query( select_changed_routes.c.route_id.label('document_id'), literal(ROUTE_TYPE).label('type') ).select_from(select_changed_routes), session.query( select_changed_outings.c.outing_id.label('document_id'), literal(OUTING_TYPE).label('type') ).select_from(select_changed_outings)). \ cte('changed_routes_and_outings') return session. \ query( select_changed_routes_and_outings.c.document_id, select_changed_routes_and_outings.c.type). \ select_from(select_changed_routes_and_outings). \ all()
def as_clause_base(self, db, include_breakpoints=False, include_deleted=False, include_moderating=None, user_id=None): """Express collection as a SQLAlchemy query clause. :param bool include_breakpoints: Include posts where a threadbreak happens :param include_deleted: Include posts in deleted_publication_states. True means only deleted posts, None means all posts, False means only live posts or deleted posts with live descendants. :param include_moderating: Include posts in SUBMITTED_AWAITING_MODERATION. True means include all those posts, Falsish means none of those posts, a "mine" value means only those belonging to this user. There is not currently a way to only get those posts. (todo?) NOTE: that parameter is interpreted differently in Idea.get_related_posts_query """ assert self.reduced def base_query(labeled=False): post = with_polymorphic( Post, [], Post.__table__, aliased=False, flat=True) content = with_polymorphic( Content, [], Content.__table__, aliased=False, flat=True) if labeled: query = db.query(post.id.label("post_id")) else: query = db.query(post.id) query = query.join(content, content.id == post.id) states = set(countable_publication_states) # Or just published? states.update(deleted_publication_states) if include_deleted is not None: if include_deleted is True: states = set(deleted_publication_states) else: query = query.filter(content.tombstone_date == None) # noqa: E711 if include_moderating is True: states.add(PublicationStates.SUBMITTED_AWAITING_MODERATION) state_condition = post.publication_state.in_(states) if user_id: if include_moderating == "mine": state_condition = state_condition | ( post.publication_state.in_([ PublicationStates.SUBMITTED_AWAITING_MODERATION, PublicationStates.DRAFT]) & (post.creator_id == user_id)) else: state_condition = state_condition | ( (post.publication_state == PublicationStates.DRAFT) & (post.creator_id == user_id)) query = query.filter(state_condition) return post, query if not self.paths: post, q = base_query(True) return q.filter(False).subquery("relposts") includes_by_level = [[]] excludes_by_level = [[]] ancestry = [] for path in self.paths: while ancestry: if not path.post_path.startswith(ancestry[-1].post_path): ancestry.pop() else: break level = len(ancestry) // 2 if path.positive: while len(includes_by_level) <= level: includes_by_level.append([]) includes_by_level[level].append(path) else: while len(excludes_by_level) <= level: excludes_by_level.append([]) excludes_by_level[level].append(path) ancestry.append(path) max_level = max(len(includes_by_level), len(excludes_by_level)) q = None for level in range(max_level): condition = None # with use_labels, name of final column determined by first query post, q2 = base_query(level == 0) includes = (includes_by_level[level] if level < len(includes_by_level) else []) excludes = (excludes_by_level[level] if level < len(excludes_by_level) else []) include_ids = [path.last_id for path in includes] exclude_ids = [path.last_id for path in excludes] if include_breakpoints: include_ids.extend(exclude_ids) exclude_ids = None if len(includes): ancestry_regex = '^(%s)' % ('|'.join( path.post_path for path in includes)) condition = or_( post.id.in_(include_ids), post.ancestry.op('~', 0, True)(ancestry_regex)) if level == 0: q = q2.filter(condition) else: assert condition is not None q2 = q2.filter(condition) # works in postgres, more efficient q = union(q, q2, use_labels=True) # rather than # q = q.union(q2) condition = None post, q2 = base_query() if len(excludes): ancestry_regex = '^(%s)' % ('|'.join( path.post_path for path in excludes)) condition = post.ancestry.op('~', 0, True)(ancestry_regex) if exclude_ids: condition = post.id.in_(exclude_ids) | condition q = except_(q, q2.filter(condition), use_labels=True) # q = q.except_(q2.filter(condition)) condition = None if getattr(q, "c", None) is None: # base query c = q._entities[0] q = q.with_entities(c.expr.label("post_id")) q = q.subquery("relposts") else: # compound query, already has columns q = q.alias("relposts") return q