def fetch_modified_rpm_details(self) -> ResultProxy: rd: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) rdl: RpmFileDetailLink = aliased(RpmFileDetailLink) s: System = aliased(System) query = self._session.query(rd).join( s, (s.system_id == rd.system_id), ).join( rdl, (rdl.rpm_detail_id == rd.rpm_detail_id), ).outerjoin( fd, (fd.file_type == "F") & (rdl.file_detail_id == fd.file_detail_id) & (rd.digest == case( { 32: fd.md5_digest, 64: fd.sha256_digest, }, value=func.length(coalesce(rd.digest, "")), else_=None, ))).filter((s.system_id == self.system.system_id) & (fd.file_detail_id == None) & (func.coalesce(rd.file_info, "") != "directory") & (~func.coalesce(rd.file_info, "").startswith( "symbolic link"))).distinct() result: ResultProxy = query.all() return result
def sort( self, trans, query, ascending, column_name=None ): def get_foreign_key( source_class, target_class ): """ Returns foreign key in source class that references target class. """ target_fk = None for fk in source_class.table.foreign_keys: if fk.references( target_class.table ): target_fk = fk break if not target_fk: raise RuntimeException( "No foreign key found between objects: %s, %s" % source_class.table, target_class.table ) return target_fk # Get the columns that connect item's table and item's rating association table. item_rating_assoc_class = getattr( trans.model, '%sRatingAssociation' % self.model_class.__name__ ) foreign_key = get_foreign_key( item_rating_assoc_class, self.model_class ) fk_col = foreign_key.parent referent_col = foreign_key.get_referent( self.model_class.table ) # Do sorting using a subquery. # Subquery to get average rating for each item. ave_rating_subquery = trans.sa_session.query( fk_col, func.avg( item_rating_assoc_class.table.c.rating ).label('avg_rating') ) \ .group_by( fk_col ) \ .subquery() # Integrate subquery into main query. query = query.outerjoin( (ave_rating_subquery, referent_col == ave_rating_subquery.columns[fk_col.name]) ) # Sort using subquery results; use coalesce to avoid null values. if not ascending: # TODO: for now, reverse sorting b/c first sort is ascending, and that should be the natural sort. query = query.order_by( func.coalesce( ave_rating_subquery.c.avg_rating, 0 ).asc() ) else: query = query.order_by( func.coalesce( ave_rating_subquery.c.avg_rating, 0 ).desc() ) return query
def sort(self, trans, query, ascending, column_name=None): # Get the columns that connect item's table and item's rating association table. item_rating_assoc_class = getattr( trans.model, f'{self.model_class.__name__}RatingAssociation') foreign_key = get_foreign_key(item_rating_assoc_class, self.model_class) fk_col = foreign_key.parent referent_col = foreign_key.get_referent(self.model_class.table) # Do sorting using a subquery. # Subquery to get average rating for each item. ave_rating_subquery = trans.sa_session.query(fk_col, func.avg(item_rating_assoc_class.table.c.rating).label('avg_rating')) \ .group_by(fk_col).subquery() # Integrate subquery into main query. query = query.outerjoin( (ave_rating_subquery, referent_col == ave_rating_subquery.columns[fk_col.name])) # Sort using subquery results; use coalesce to avoid null values. if not ascending: # TODO: for now, reverse sorting b/c first sort is ascending, and that should be the natural sort. query = query.order_by( func.coalesce(ave_rating_subquery.c.avg_rating, 0).asc()) else: query = query.order_by( func.coalesce(ave_rating_subquery.c.avg_rating, 0).desc()) return query
def process_item(self, item, spider): session = Session() if isinstance(spider, (PostingSpider, ListSpider)): posting = session.query(Posting).filter( Posting.id == item["id"]).one_or_none() # upsert posting if posting: logger.debug(f"Updating {posting}") for column in Posting.__mapper__.columns: if column.name != "id": if column.name not in item: continue new_value = item.get(column.name, None) setattr(posting, column.name, new_value) else: posting = Posting(**item) session.add(posting) logger.debug(f"Creating {posting}") # get new revision_id and create posting revision kwargs = dict(**item) kwargs["posting_id"] = item["id"] max_id = (session.query( func.coalesce(func.max(PostingRevision.id), 0)).filter( PostingRevision.posting_id == item["id"]).one()[0]) kwargs["id"] = max_id + 1 revision = PostingRevision(**kwargs) logger.debug(f"Creating {revision}") session.add(revision) session.commit() return item
def bp_api_search(request): query = DBSession.query(Languoid, LanguageIdentifier, Identifier).join(LanguageIdentifier).join(Identifier) term = request.params['q'].strip().lower() whole = request.params.get('whole', "False") multilingual = request.params.get('multilingual', "True") MIN_QUERY_LEN = 3 if not term: query = None elif len(term) < MIN_QUERY_LEN: return [{'message': 'Query must be at least {} characters.'.format(MIN_QUERY_LEN)}] elif len(term) == 8 and GLOTTOCODE_PATTERN.match(term): query = query.filter(Languoid.id == term) kind = 'Glottocode' else: # list of criteria to search languoids by filters = [] filters.append(Language.active == True) ul_iname = func.unaccent(func.lower(Identifier.name)) ul_name = func.unaccent(term) if whole.lower() == 'true': filters.append(ul_iname == ul_name) else: filters.append(ul_iname.contains(ul_name)) if multilingual.lower() == 'false': # restrict to English identifiers filters.append(func.coalesce(Identifier.lang, '').in_((u'', u'eng', u'en'))) query = query.filter(and_(*filters)) kind = 'name part' if query is None: return [] else: results = query.order_by(Languoid.name)\ .options(joinedload(Languoid.family)).all() if not results: return [] # group together identifiers that matched for the same languoid mapped_results = {k:list(g) for k, g in groupby(results, lambda x: x.Languoid)} # order languoid results by greatest identifier similarity, and then by name to break ties + consistency ordered_results = OrderedDict(sorted( mapped_results.items(), key=lambda x: (best_identifier_score(x[1],term), x[0].name) )) return [{ 'name': k.name, 'glottocode': k.id, 'iso': k.hid if k.hid else '', 'level': k.level.name, 'matched_identifiers': sorted( set([i.Identifier.name for i in v]), key=lambda x: identifier_score(x, term) ) if kind != 'Glottocode' else [], } for k, v in ordered_results.items()]
def refund(self, invoice, amount): """Refund the invoice """ Transaction = tables.Transaction tx_model = self.factory.create_transaction_model() transactions = [] self.get(invoice.guid, with_lockmode='update') if invoice.status != self.statuses.SETTLED: raise InvalidOperationError('You can only refund a settled invoice') refunded_amount = ( self.session.query( func.coalesce(func.sum(Transaction.amount), 0) ) .filter( Transaction.invoice == invoice, Transaction.transaction_type == TransactionModel.types.REFUND, Transaction.submit_status.in_([ TransactionModel.submit_statuses.STAGED, TransactionModel.submit_statuses.RETRYING, TransactionModel.submit_statuses.DONE, ]) ) ).scalar() # Make sure do not allow refund more than effective amount if refunded_amount + amount > invoice.effective_amount: raise InvalidOperationError( 'Refund total amount {} + {} will exceed invoice effective amount {}' .format( refunded_amount, amount, invoice.effective_amount, ) ) # the settled transaction settled_transaction = ( self.session.query(Transaction) .filter( Transaction.invoice == invoice, Transaction.transaction_type == TransactionModel.types.DEBIT, Transaction.submit_status == TransactionModel.submit_statuses.DONE, ) ).one() # create the refund transaction transaction = tx_model.create( invoice=invoice, transaction_type=TransactionModel.types.REFUND, amount=amount, reference_to=settled_transaction, ) transactions.append(transaction) return transactions
def do_collector(self, input: str): try: arguments = self._process_input(KisConsoleConsoleCommand.collector, input) if len(arguments) == 0: with self._engine.session_scope() as session: workspace_id = session.query(Workspace.id).filter_by( name=self._workspace).scalar_subquery() query = session.query(CollectorName.name.label("collector"), CollectorName.type.label("type"), func.coalesce(Command.status, CommandStatus.pending.name).label("status"), CollectorName.priority, func.count(Command.status).label("count")) \ .outerjoin((Command, CollectorName.commands)) \ .filter(Command.workspace_id == workspace_id) \ .group_by(CollectorName.name, CollectorName.type, func.coalesce(Command.status, CommandStatus.pending.name), CollectorName.priority) df = pandas.read_sql(query.statement, query.session.bind) df["status"] = df["status"].apply( lambda x: CommandStatus(x).name) df["type"] = df["type"].apply( lambda x: CollectorType(x).name) results = pandas.pivot_table( df, index=["collector", "type", "priority"], columns=["status"], values="count", aggfunc=numpy.sum, fill_value=0).sort_values(by="priority") print(results) elif arguments[0] == CollectorArgumentEnum.current: if self._producer_thread.current_collector: print(self._producer_thread.current_collector.name) else: print("none") elif arguments[0] == CollectorArgumentEnum.remaining: for item in self._producer_thread.remaining_collectors: print(item) except Exception: traceback.print_exc(file=sys.stderr)
def total_adjustment_amount(self): """Sum of total adjustment amount """ from sqlalchemy import func session = object_session(self) return ( session.query(func.coalesce(func.sum(Adjustment.amount), 0)) .filter(Adjustment.invoice_guid == self.guid) .scalar() )
def get_product_by_tag(self, tag): products = db.session.query( Tag.id, TagProductDef.id.label('product_id'), Ofert.id.label('ofert_id'), Ofert.title, Ofert.url, Ofert.price, Ofert.currency, Image.image, Image.control_sum, func.coalesce(Ofert.manufacturer, '').label('brand_name'), Category.name.label('category'), Tag.value.label('tags')).join( TagProduct, TagProduct.tag_id == Tag.id).join( TagProductDef, TagProductDef.id == TagProduct.tag_product_def_id).join( TagOfert, TagOfert.tag_product_def_id == TagProduct.tag_product_def_id).join( Ofert, Ofert.id == TagOfert.ofert_id).join( Image, Image.image == Ofert.image).join( Category, Category.id == TagProductDef.category_id, isouter=True).filter( and_( Tag.value == tag, Ofert.creation_date.cast(Date) == func.current_date())).order_by( Ofert.price.asc()).cte( 'products') subcategory = db.bindparam('str_tworzacy', '').label('subcategory') colortags = db.bindparam('str_tworzacy', '').label('colortags') return db.session.query( products.c.product_id, products.c.ofert_id, products.c.title, products.c.url, products.c.price, products.c.currency, products.c.image, products.c.control_sum, products.c.brand_name, products.c.category, products.c.tags, func.string_agg(Tag.value, ';').label('all_tags'), subcategory, colortags).join( TagProduct, TagProduct.tag_product_def_id == products.c.product_id, isouter=True).join(Tag, Tag.id == TagProduct.tag_id, isouter=True).group_by( products.c.product_id, products.c.ofert_id, products.c.title, products.c.url, products.c.price, products.c.currency, products.c.image, products.c.control_sum, products.c.brand_name, products.c.category, products.c.tags, ).order_by(products.c.price.asc()).all()
def contents(self, ctx, rev=None, **kwargs): if rev and str(rev).lower() != 'head': _rev = ctx.db.query(Revision).filter( Revision.script_id == self.id, Revision.version == rev).first() else: _rev = ctx.db.query(Revision).filter( Revision.script_id == self.id, func.coalesce(Revision.draft, False) != True # noqa ).order_by(Revision.created_at.desc()).first() if _rev: return _rev else: return None
def update_score(connection, update_all=True): """ Update the score of all teams. If ``update_all`` is set, the points for all challenges are updated beforehand as well. This is your one-shot function to create up-to-date points for everything. """ from fluxscoreboard.models import dynamic_challenges if update_all: update_challenge_points(connection, update_team_count=True) bonus_col = func.sum(Challenge._points - Challenge.base_points) bonus_score = (select([func.coalesce(bonus_col, 0)]). where(Challenge.id == Submission.challenge_id). where(Team.id == Submission.team_id). where(~Challenge.dynamic). where(Challenge.published). correlate(Team)) # base score challenge_sum = func.coalesce(func.sum(Challenge.base_points), 0) # first blood fb_sum = func.coalesce(func.sum(Submission.additional_pts), 0) points_col = challenge_sum + fb_sum for module in dynamic_challenges.registry.values(): points_col += module.get_points_query(Team) base_score = (select([points_col]). where(Challenge.id == Submission.challenge_id). where(Team.id == Submission.team_id). where(~Challenge.dynamic). where(Challenge.published). correlate(Team)) query = (Team.__table__.update(). where(Team.active). values(base_score=base_score, bonus_score=bonus_score)) connection.execute(query)
def fetch_modified_rpms(self) -> ResultProxy: ri: RpmInfo = aliased(RpmInfo) rd: RpmDetail = aliased(RpmDetail) fd: FileDetail = aliased(FileDetail) rdl: RpmFileDetailLink = aliased(RpmFileDetailLink) s: System = aliased(System) sub_query = self._session.query(rd).join( s, (s.system_id == rd.system_id), ).join( rdl, (rdl.rpm_detail_id == rd.rpm_detail_id), ).outerjoin( fd, (rdl.file_detail_id == fd.file_detail_id) & (rd.digest == case( { 32: fd.md5_digest, 64: fd.sha256_digest, }, value=func.length(coalesce(rd.digest, "")), else_=None, ))).filter( (ri.rpm_info_id == rd.rpm_info_id) & (fd.file_detail_id == None) & (func.coalesce(rd.file_info, "") != "directory") & (~func.coalesce(rd.file_info, "").startswith("symbolic link"))) # fetch RpmInfo's for each RPM with a changed file query = self._session.query(ri).join(System).filter( sub_query.exists(), ).filter( System.system_id == self.system.system_id, ) result: ResultProxy = query.all() return result
def main(self): running_builds = self.db.query(CoprRebuild)\ .filter(CoprRebuild.state == Build.RUNNING)\ .count() if running_builds >= get_config('copr.max_builds'): self.log.debug("{} running builds, not scheduling".format(running_builds)) return last_index = self.db.query( func.coalesce( func.max(CoprRebuildRequest.scheduler_queue_index), 0 ) ).scalar() + 1 request = self.db.query(CoprRebuildRequest)\ .order_by(CoprRebuildRequest.scheduler_queue_index.nullsfirst())\ .filter(CoprRebuildRequest.state == 'in progress')\ .first() if not request: self.log.debug("No schedulable requests") return # move all requests of given user to the queue end self.db.query(CoprRebuildRequest)\ .filter(CoprRebuildRequest.user_id == request.user_id)\ .update({'scheduler_queue_index': last_index}) build_count = self.db.query(CoprRebuild)\ .filter(CoprRebuild.request_id == request.id)\ .filter(CoprRebuild.copr_build_id != None)\ .count() if build_count >= request.schedule_count: request.state = 'scheduled' self.db.commit() return rebuild = self.db.query(CoprRebuild)\ .filter(CoprRebuild.copr_build_id == None)\ .order_by(CoprRebuild.order)\ .first() if not rebuild: request.state = 'scheduled' self.db.commit() else: self.schedule_rebuild(rebuild)
def revision_before_insert(mapper, connection, target): if target.script_id: scr_id = target.script_id elif target.script: scr_id = target.script.id else: scr_id = None if scr_id and not target.version and not target.draft: q = select( [func.coalesce( select([cast(Revision.version, Integer) + 1], limit=1). where(Revision.script_id == scr_id). group_by(Revision.id). having(Revision.id == func.max(Revision.id)). order_by(Revision.id.desc()). as_scalar(), 1)]) target.version = connection.scalar(q)
def get_fetcher_hash_ranges(db, uuid): """ ordered by start_hash, ascending """ fh0 = aliased(db_models.FetcherHash) fh1 = aliased(db_models.FetcherHash) fh2 = aliased(db_models.FetcherHash) return ( db.query( fh1.fetcher_uuid, fh1.fetcher_hash, func.coalesce(func.min(fh2.fetcher_hash), func.min(fh0.fetcher_hash)), ) .join(fh2, fh2.fetcher_hash > fh1.fetcher_hash, isouter=True) .filter(fh1.fetcher_uuid == uuid) .group_by(fh1.fetcher_uuid, fh1.fetcher_hash) .order_by(fh1.fetcher_hash) ).all()
def scoreboard(self, format='html'): """ A list of users ordered by their score. The score is computed by by assigning every dataset a score (10 divided by no. of maintainers) and then adding that score up for all maintainers. This does give users who maintain a single dataset a higher score than those who are a part of a maintenance team, which is not really what we want (since that rewards single points of failure in the system). But this is an adequate initial score and this will only be accessible to administrators (who may be interested in findin these single points of failures). """ # If user is not an administrator we abort if not (c.account and c.account.admin): abort(403, _("You are not authorized to view this page")) # Assign scores to each dataset based on number of maintainers score = db.session.query(Dataset.id, (10 / func.count(Account.id)).label('sum')) score = score.join('managers').group_by(Dataset.id).subquery() # Order users based on their score which is the sum of the dataset # scores they maintain user_score = db.session.query( Account.name, Account.email, func.coalesce(func.sum(score.c.sum), 0).label('score')) user_score = user_score.outerjoin(Account.datasets).outerjoin(score) user_score = user_score.group_by(Account.name, Account.email) # We exclude the system user user_score = user_score.filter(Account.name != 'system') user_score = user_score.order_by(desc('score')) # Fetch all and assign to a context variable score and paginate them # We paginate 42 users per page, just because that's an awesome number scores = user_score.all() c.page = templating.Page(scores, items_per_page=42, item_count=len(scores), **request.params) return templating.render('account/scoreboard.html')
def getLanguoids(name=False, iso=False, namequerytype='part', country=False, multilingual=False, inactive=False): """return an array of languoids responding to the specified criterion. """ if not (name or iso or country): return [] query = DBSession.query(Languoid)\ .options(joinedload(Languoid.family))\ .order_by(Languoid.name) if not inactive: query = query.filter(Language.active == True) if name: namequeryfilter = { "part": or_( func.lower(Identifier.name).contains(name.lower()), func.unaccent(Identifier.name).contains(func.unaccent(name))), "whole": or_( func.lower(Identifier.name) == name.lower(), func.unaccent(Identifier.name) == func.unaccent(name)), }[namequerytype if namequerytype == 'whole' else 'part'] crit = [Identifier.type == 'name', namequeryfilter] if not multilingual: crit.append( func.coalesce(Identifier.lang, '').in_((u'', u'eng', u'en'))) query = query.filter(Language.identifiers.any(and_(*crit))) elif country: return [] # pragma: no cover else: query = query.join(LanguageIdentifier, Identifier)\ .filter(Identifier.name.contains(iso.lower()))\ .filter(Identifier.type == IdentifierType.iso.value) return query
def getLanguoids(name=False, iso=False, namequerytype='part', country=False, multilingual=False, inactive=False): """return an array of languoids responding to the specified criterion. """ if not (name or iso or country): return [] query = DBSession.query(Languoid)\ .options(joinedload(Languoid.family))\ .order_by(Languoid.name) if not inactive: query = query.filter(Language.active == True) if name: crit = [Identifier.type == 'name'] ul_iname = func.unaccent(func.lower(Identifier.name)) ul_name = func.unaccent(name.lower()) if namequerytype == 'whole': crit.append(ul_iname == ul_name) else: crit.append(ul_iname.contains(ul_name)) if not multilingual: crit.append(func.coalesce(Identifier.lang, '').in_((u'', u'eng', u'en'))) crit = Language.identifiers.any(and_(*crit)) query = query.filter(or_(icontains(Languoid.name, name), crit)) elif country: return [] # pragma: no cover else: query = query.join(LanguageIdentifier, Identifier)\ .filter(Identifier.type == IdentifierType.iso.value)\ .filter(Identifier.name.contains(iso.lower())) return query
def getLanguoids(name=False, iso=False, namequerytype='part', country=False, multilingual=False, inactive=False): """return an array of languoids responding to the specified criterion. """ if not (name or iso or country): return [] query = DBSession.query(Languoid)\ .options(joinedload(Languoid.family))\ .order_by(Languoid.name) if not inactive: query = query.filter(Language.active == True) if name: namequeryfilter = { "regex": func.lower(Identifier.name).like(name.lower()), "part": func.lower(Identifier.name).contains(name.lower()), "whole": func.lower(Identifier.name) == name.lower(), }[namequerytype if namequerytype in ('regex', 'whole') else 'part'] crit = [Identifier.type == 'name', namequeryfilter] if not multilingual: crit.append(func.coalesce(Identifier.lang, '').in_((u'', u'eng', u'en'))) query = query.filter(Language.identifiers.any(and_(*crit))) elif country: return [] # pragma: no cover else: query = query.join(LanguageIdentifier, Identifier)\ .filter(Identifier.name.contains(iso.lower()))\ .filter(Identifier.type == IdentifierType.iso.value) return query
def get_similarity_query(self, query): return (func.coalesce(func.similarity(self.column, query), 0).label('first_name_rank') * self.rank)
def generate_legs(keepto=None, maxtime=None, repair=False): """Record legs from stops and mobile activity found in device telemetry. keepto -- keep legs before this time, except last two or so for restart maxtime -- process device data up to this time repair -- re-evaluate and replace all changed legs""" now = datetime.datetime.now() if not keepto: keepto = now if not maxtime: maxtime = now print "generate_legs up to", maxtime dd = db.metadata.tables["device_data"] legs = db.metadata.tables["legs"] # Find first and last point sent from each device. devmax = select([ dd.c.device_id, func.min(dd.c.time).label("firstpoint"), func.max(dd.c.time).label("lastpoint") ], dd.c.time < maxtime, group_by=dd.c.device_id).alias("devmax") # The last recorded leg transition may be to phantom move that, given more # future context, will be merged into a preceding stop. Go back two legs # for the rewrite start point. # Due to activity summing window context and stabilization, and stop # entry/exit refinement, the first transition after starting the filter # process is not necessarily yet in sync with the previous run. Go back # another two legs to start the process. # (The window bounds expression is not supported until sqlalchemy 1.1 so # sneak it in in the order expression...) order = text("""time_start DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING""") rrlegs = select( [ legs.c.device_id, func.nth_value(legs.c.time_start, 4) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewind"), func.nth_value(legs.c.time_start, 2) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewrite")], and_(legs.c.activity != None, legs.c.time_start <= keepto), distinct=True).alias("rrlegs") # Find end of processed legs, including terminator for each device. lastleg = select( [legs.c.device_id, func.max(legs.c.time_end).label("time_end")], legs.c.time_start < keepto, group_by=legs.c.device_id).alias("lastleg") # If trailing points exist, start from rewind leg, or first point starts = select( [ devmax.c.device_id, func.coalesce(rrlegs.c.rewind, devmax.c.firstpoint), func.coalesce(rrlegs.c.rewrite, devmax.c.firstpoint)], or_(lastleg.c.time_end == None, devmax.c.lastpoint > lastleg.c.time_end), devmax \ .outerjoin(rrlegs, devmax.c.device_id == rrlegs.c.device_id) \ .outerjoin(lastleg, devmax.c.device_id == lastleg.c.device_id)) # In repair mode, just start from the top. if repair: starts = select([ devmax.c.device_id, devmax.c.firstpoint.label("rewind"), devmax.c.firstpoint.label("start") ]) starts = starts.order_by(devmax.c.device_id) for device, rewind, start in db.engine.execute(starts): query = select([ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), dd.c.accuracy, dd.c.time, dd.c.device_id, dd.c.activity_1, dd.c.activity_1_conf, dd.c.activity_2, dd.c.activity_2_conf, dd.c.activity_3, dd.c.activity_3_conf ], and_(dd.c.device_id == device, dd.c.time >= rewind, dd.c.time < maxtime), order_by=dd.c.time) points = db.engine.execute(query).fetchall() print "d"+str(device), "resume", str(start)[:19], \ "rewind", str(rewind)[:19], str(len(points))+"p" filterer = DeviceDataFilterer() # not very objecty rly lastend = None newlegs = filterer.generate_device_legs(points, start) for (prevleg, _), (leg, legmodes) in pairwise(chain([(None, None)], newlegs)): with db.engine.begin() as t: lastend = leg["time_end"] print " ".join([ "d" + str(device), str(leg["time_start"])[:19], str(leg["time_end"])[:19], leg["activity"] ]), # Adjust leg for db entry gj0 = leg.pop("geojson_start", None) gj1 = leg.pop("geojson_end", None) leg.update({ "device_id": device, "coordinate_start": gj0 and func.ST_GeomFromGeoJSON(gj0), "coordinate_end": gj1 and func.ST_GeomFromGeoJSON(gj1) }) # Deal with overlapping legs on rewind/repair legid = t.execute( select([legs.c.id], and_(*(legs.c[c] == leg[c] for c in leg.keys())))).scalar() if legid: print "-> unchanged", else: overlapstart = prevleg and prevleg["time_end"] or start overlaps = [ x[0] for x in t.execute( select([legs.c.id], and_(legs.c.device_id == leg["device_id"], legs.c.time_start < leg["time_end"], legs.c.time_end > overlapstart), order_by=legs.c.time_start)) ] if overlaps: legid, dels = overlaps[0], overlaps[1:] t.execute(legs.update(legs.c.id == legid, leg)) print "-> update", if dels: t.execute(legs.delete(legs.c.id.in_(dels))) print "-> delete %d" % len(dels) else: ins = legs.insert(leg).returning(legs.c.id) legid = t.execute(ins).scalar() print "-> insert", # Delete mismatching modes, add new modes modes = db.metadata.tables["modes"] exmodes = { x[0]: x[1:] for x in t.execute( select([modes.c.source, modes.c.mode, modes.c.line], legs.c.id == legid, legs.join(modes))) } for src in set(exmodes).union(legmodes): ex, nu = exmodes.get(src), legmodes.get(src) if nu == ex: continue if ex is not None: print "-> del", src, ex, t.execute( modes.delete( and_(modes.c.leg == legid, modes.c.source == src))) if nu is not None: print "-> ins", src, nu, t.execute(modes.insert().values(leg=legid, source=src, mode=nu[0], line=nu[1])) print # Emit null activity terminator leg to mark trailing undecided points, # if any, to avoid unnecessary reprocessing on resume. rejects = [x for x in points if not lastend or x["time"] > lastend] if rejects: db.engine.execute( legs.delete( and_(legs.c.device_id == device, legs.c.time_start <= rejects[-1]["time"], legs.c.time_end >= rejects[0]["time"]))) db.engine.execute( legs.insert({ "device_id": device, "time_start": rejects[0]["time"], "time_end": rejects[-1]["time"], "activity": None })) # Attach device legs to users. devices = db.metadata.tables["devices"] # Real legs from devices with the owner added in, also when unattached owned = select([ devices.c.user_id.label("owner"), legs.c.id, legs.c.user_id, legs.c.time_start, legs.c.time_end ], and_(legs.c.activity != None, legs.c.time_end < maxtime), devices.join(legs, devices.c.id == legs.c.device_id)) detached = owned.where(legs.c.user_id.is_(None)).alias("detached") attached = owned.where(legs.c.user_id.isnot(None)).alias("attached") owned = owned.alias("owned") # Find most recently received leg attached per user maxattached = select( [attached.c.owner, func.max(attached.c.id).label("id")], group_by=attached.c.owner).alias("maxattached") # Find start of earliest unattached leg received later mindetached = select([ detached.c.owner, func.min(detached.c.time_start).label("time_start") ], or_(maxattached.c.id.is_(None), detached.c.id > maxattached.c.id), detached.outerjoin( maxattached, detached.c.owner == maxattached.c.owner), group_by=detached.c.owner).alias("mindetached") # Find start of attached overlapping leg to make it visible to the process overattached = select( [ attached.c.owner, func.min(attached.c.time_start).label("time_start") ], from_obj=attached.join( mindetached, and_(attached.c.owner == mindetached.c.owner, attached.c.time_end > mindetached.c.time_start)), group_by=attached.c.owner).alias("overattached") # Find restart point starts = select([ mindetached.c.owner, func.least(mindetached.c.time_start, overattached.c.time_start) ], from_obj=mindetached.outerjoin( overattached, mindetached.c.owner == overattached.c.owner)) # In repair mode, just start from the top. if repair: starts = select( [owned.c.owner, func.min(owned.c.time_start)], group_by=owned.c.owner) for user, start in db.engine.execute(starts.order_by(column("owner"))): # Ignore the special legacy user linking userless data if user == 0: continue print "u" + str(user), "start attach", start # Get legs from user's devices in end time order, so shorter # legs get attached in favor of longer legs from a more idle device. s = select([ owned.c.id, owned.c.time_start, owned.c.time_end, owned.c.user_id ], and_(owned.c.owner == user, owned.c.time_start >= start), order_by=owned.c.time_end) lastend = None for lid, lstart, lend, luser in db.engine.execute(s): print " ".join(["u" + str(user), str(lstart)[:19], str(lend)[:19]]), if lastend and lstart < lastend: if luser is None: print "-> detached" continue db.engine.execute( legs.update(legs.c.id == lid).values( user_id=None)) # detach print "-> detach" continue lastend = lend if luser == user: print "-> attached" continue db.engine.execute( legs.update(legs.c.id == lid).values(user_id=user)) # attach print "-> attach" # Cluster backlog in batches cluster_legs(1000) # Reverse geocode labels for places created or shifted by new legs label_places(60)
class Post(Base): __tablename__ = 'post' SAFETY_SAFE = 'safe' SAFETY_SKETCHY = 'sketchy' SAFETY_UNSAFE = 'unsafe' TYPE_IMAGE = 'image' TYPE_ANIMATION = 'animation' TYPE_VIDEO = 'video' TYPE_FLASH = 'flash' FLAG_LOOP = 'loop' # basic meta post_id = Column('id', Integer, primary_key=True) user_id = Column('user_id', Integer, ForeignKey('user.id'), index=True) creation_time = Column('creation_time', DateTime, nullable=False) last_edit_time = Column('last_edit_time', DateTime) safety = Column('safety', Unicode(32), nullable=False) source = Column('source', Unicode(200)) flags = Column('flags', PickleType, default=None) # content description type = Column('type', Unicode(32), nullable=False) checksum = Column('checksum', Unicode(64), nullable=False) file_size = Column('file_size', Integer) canvas_width = Column('image_width', Integer) canvas_height = Column('image_height', Integer) mime_type = Column('mime-type', Unicode(32), nullable=False) # foreign tables user = relationship('User') tags = relationship('Tag', backref='posts', secondary='post_tag') relations = relationship('Post', secondary='post_relation', primaryjoin=post_id == PostRelation.parent_id, secondaryjoin=post_id == PostRelation.child_id) features = relationship('PostFeature', cascade='all, delete-orphan', lazy='joined') scores = relationship('PostScore', cascade='all, delete-orphan', lazy='joined') favorited_by = relationship('PostFavorite', cascade='all, delete-orphan', lazy='joined') notes = relationship('PostNote', cascade='all, delete-orphan', lazy='joined') comments = relationship('Comment') # dynamic columns tag_count = column_property( select([func.count(PostTag.tag_id)]) \ .where(PostTag.post_id == post_id) \ .correlate_except(PostTag)) canvas_area = column_property(canvas_width * canvas_height) @property def is_featured(self): featured_post = object_session(self) \ .query(PostFeature) \ .order_by(PostFeature.time.desc()) \ .first() return featured_post and featured_post.post_id == self.post_id score = column_property( select([func.coalesce(func.sum(PostScore.score), 0)]) \ .where(PostScore.post_id == post_id) \ .correlate_except(PostScore)) favorite_count = column_property( select([func.count(PostFavorite.post_id)]) \ .where(PostFavorite.post_id == post_id) \ .correlate_except(PostFavorite)) last_favorite_time = column_property( select([func.max(PostFavorite.time)]) \ .where(PostFavorite.post_id == post_id) \ .correlate_except(PostFavorite)) feature_count = column_property( select([func.count(PostFeature.post_id)]) \ .where(PostFeature.post_id == post_id) \ .correlate_except(PostFeature)) last_feature_time = column_property( select([func.max(PostFeature.time)]) \ .where(PostFeature.post_id == post_id) \ .correlate_except(PostFeature)) comment_count = column_property( select([func.count(Comment.post_id)]) \ .where(Comment.post_id == post_id) \ .correlate_except(Comment)) last_comment_creation_time = column_property( select([func.max(Comment.creation_time)]) \ .where(Comment.post_id == post_id) \ .correlate_except(Comment)) last_comment_edit_time = column_property( select([func.max(Comment.last_edit_time)]) \ .where(Comment.post_id == post_id) \ .correlate_except(Comment)) note_count = column_property( select([func.count(PostNote.post_id)]) \ .where(PostNote.post_id == post_id) \ .correlate_except(PostNote))
def visit_hierarchy(element, compiler, **kw): """visit compilation idiom for pgsql""" if compiler.dialect.server_version_info < supported_db['postgresql']: raise (HierarchyLesserError(compiler.dialect.name, supported_db['postgresql'])) else: if element.fk_type == String: element.fk_type = String(element.type_length) val = "a" else: element.fk_type = Integer val = "0" rec = _build_table_clause(element.select, 'rec', element.fk_type) # documentation used for pgsql >= 8.4.0 # # * http://www.postgresql.org/docs/8.4/static/queries-with.html # * http://explainextended.com/2009/07/17/ # postgresql-8-4-preserving-order-for-hierarchical-query/ # # build the first select sel1 = element.select # if the user wants to start from a given node, he pass the # starting_node option in the query if hasattr(element, 'starting_node') and \ getattr(element, 'starting_node') is not False: sel1 = sel1.where(func.coalesce( literal_column(element.child, type_=String), # GregM: Inverted to child to walk up the tree literal(val, type_=String))==\ literal(element.starting_node, type_=String)) # the same select submitted by the user plus a 1 as the first level and # an array with the current id sel1.append_column(literal_column('1', type_=Integer).label('level')) sel1.append_column(literal_column('ARRAY[%s]' %(element.child), # GregM: Didn't need inverting as path building stays the same type_=ARRAY(element.fk_type)).\ label('connect_path')) # the non recursive part of the with query must return false for the # first values sel1.append_column(literal_column("false", type_=Boolean).\ label('cycle')) # build the second select # the same select as above plus the level column is summing a 1 for # each iteration on the same brach. We also append the current id to # the array of ids we're building as connect_path sel2 = element.select sel2.append_column( label('level', rec.c.level + literal_column("1", type_=Integer))) sel2.append_column( label( 'connect_path', func.array_append( rec.c.connect_path, getattr(element.table.c, element.child) ) # GregM: Didn't need inverting as path building stays the same )) # check if any member of connect_path has already been visited and # return true in that case, preventing an infinite loop (see where # section sel2.append_column( literal_column( "%s=ANY(connect_path)" % getattr(element.table.c, element.parent)).label( 'cycle')) # GregM: Inverted to parent to walk up the tree sel2 = sel2.where( and_( getattr(element.table.c, element.child) == getattr( rec.c, # GregM: Inverted to child to walk up the tree element.parent ), # GregM: Inverted to parent to walk up the tree "not cycle")) # union_all the previous queries so we can wrapped them in the 'with # recursive .. ()' idiom sel3 = sel1.union_all(sel2) # adding comparison in connect_path to build the is_leaf param new_sel = select([rec]) # we know if a given node is a leaf by checking if the current # connect_path (an array of ids from the root to the present id) is # contained by the next row connect_path (we use the lead windowing # function for that). If it's contained it means the current id is not # a leaf, otherwise it is. new_sel.append_column( literal_column("case connect_path <@ lead(connect_path, 1) over "\ "(order by connect_path) when true then false "\ "else true end").label('is_leaf') ) qry = "with recursive rec as (%s) %s order by connect_path" %\ (compiler.process(sel3), new_sel) if element.return_leaf: qry = "SELECT tempqry.* FROM (%s) AS tempqry WHERE is_leaf = TRUE" % qry return qry
def get_list_meaning(self): return db.session.query( func.coalesce(Tag.meaning, 'null').label('meaning')).group_by( Tag.meaning).order_by(Tag.meaning).all()
def visit_hierarchy(element, compiler, **kw): """visit compilation idiom for pgsql""" if compiler.dialect.server_version_info < supported_db['postgresql']: raise HierarchyLesserError(compiler.dialect.name, supported_db['postgresql']) else: if element.fk_type == String: element.fk_type = String(element.type_length) val = "a" else: element.fk_type = Integer val = "0" ordering_colname = element.ordering_colname if not ordering_colname or ordering_colname not in element.table.c \ and ordering_colname not in element.select.c: ordering_colname = None # FIXME: pass type of ordering column in following call if it's not # Integer is_ordering = ordering_colname and ordering_colname in \ element.select.columns rec = _build_table_clause(element.select, 'rec', element.fk_type, ordering_colname if is_ordering else None) # documentation used for pgsql >= 8.4.0 # # * http://www.postgresql.org/docs/8.4/static/queries-with.html # * http://explainextended.com/2009/07/17/ # postgresql-8-4-preserving-order-for-hierarchical-query/ # # build the first select sel1 = element.select._clone() sel1._copy_internals() # if the user wants to start from a given node, he pass the # starting_node option in the query if hasattr(element, 'starting_node') and \ getattr(element, 'starting_node') is not False: sel1 = sel1.where(func.coalesce( literal_column(element.parent, type_=String), literal(val, type_=String)) == \ literal(element.starting_node, type_=String)) # the same select submitted by the user plus a 1 as the first level and # an array with the current id sel1.append_column(literal_column('1', type_=Integer).label('level')) sel1.append_column(literal_column('ARRAY[%s]' % (element.child), type_=ARRAY(element.fk_type)). \ label('connect_path')) if is_ordering: ordering_col = sel1.c.get(ordering_colname, None) if ordering_col is None: ordering_col = element.table.c[ordering_colname] sel1.append_column(literal_column('ARRAY[%s]' % (ordering_colname,), type_=ARRAY(ordering_col.type)). \ label('%s_path' % (ordering_colname,)) ) # the non recursive part of the with query must return false for the # first values sel1.append_column(literal_column("false", type_=Boolean). \ label('cycle')) # build the second select # the same select as above plus the level column is summing a 1 for # each iteration on the same brach. We also append the current id to # the array of ids we're building as connect_path sel2 = element.select._clone() sel2._copy_internals() sel2.append_column(label('level', rec.c.level + literal_column("1", type_=Integer))) sel2.append_column(label('connect_path', func.array_append(rec.c.connect_path, getattr(element.table.c, element.child)) )) if is_ordering: sel2.append_column(label('%s_path' % (ordering_colname), func.array_append(rec.c['%s_path' % (ordering_colname,)], getattr(element.table.c, ordering_colname)) )) # check if any member of connect_path has already been visited and # return true in that case, preventing an infinite loop (see where # section sel2.append_column(literal_column( "%s=ANY(connect_path)" % getattr(element.table.c, element.child)).label('cycle')) sel2 = sel2.where(and_( getattr(element.table.c, element.parent) == getattr(rec.c, element.child), "not cycle")) # union_all the previous queries so we can wrapped them in the 'with # recursive .. ()' idiom sel3 = sel1.union_all(sel2) # adding comparison in connect_path to build the is_leaf param new_sel = select([rec]) # we know if a given node is a leaf by checking if the current # connect_path (an array of ids from the root to the present id) is # contained by the next row connect_path (we use the lead windowing # function for that). If it's contained it means the current id is not # a leaf, otherwise it is. new_sel.append_column( literal_column("case connect_path <@ lead(connect_path, 1) over " "(order by connect_path) when true then false " "else true end").label('is_leaf') ) qry = "with recursive rec as (%s)\n%s\norder by %s_path" % \ (compiler.process(sel3), new_sel, ordering_colname if is_ordering else 'connect' ) if kw.get('asfrom', False): qry = '(%s)' % qry return qry
def get_status_by_entity(): result = {} # FIXME: use user.status_level instead of duplicating the levels scale q_users = (session.query( UserData.uid, UserData.corporation_id.label('corporation_id'), UserData.direction_id.label('direction_id'), func.sum(func.coalesce(PointData.nb_points, 0)).label('points')).filter( or_(PointData.label == None, PointData.label != u'GIFT_BOUGHT'))) q_users = (q_users.filter(UserData.enabled == True).group_by( UserData.corporation_id, UserData.direction_id, UserData.uid).outerjoin(UserData.points).subquery()) q = (session.query(q_users.c.corporation_id, q_users.c.direction_id, func.count(q_users.c.uid).label('user_count')).group_by( q_users.c.corporation_id, q_users.c.direction_id).order_by( q_users.c.corporation_id, q_users.c.direction_id)) q0 = q.filter(q_users.c.points == 0) for elt in q0: t = result.get((elt[0], elt[1]), {}) t.update(status_level0=elt[2]) result[(elt[0], elt[1])] = t q1 = q.filter(q_users.c.points > 0).filter(q_users.c.points <= 100) for elt in q1: t = result.get((elt[0], elt[1]), {}) t.update(status_level1=elt[2]) result[(elt[0], elt[1])] = t q2 = q.filter(q_users.c.points > 100).filter(q_users.c.points <= 200) for elt in q2: t = result.get((elt[0], elt[1]), {}) t.update(status_level2=elt[2]) result[(elt[0], elt[1])] = t q3 = q.filter(q_users.c.points > 200).filter(q_users.c.points <= 2000) for elt in q3: t = result.get((elt[0], elt[1]), {}) t.update(status_level3=elt[2]) result[(elt[0], elt[1])] = t q4 = q.filter(q_users.c.points > 2000).filter(q_users.c.points <= 10000) for elt in q4: t = result.get((elt[0], elt[1]), {}) t.update(status_level4=elt[2]) result[(elt[0], elt[1])] = t q5 = q.filter(q_users.c.points > 10000) for elt in q5: t = result.get((elt[0], elt[1]), {}) t.update(status_level5=elt[2]) result[(elt[0], elt[1])] = t return result
def recs_root_tree_query( cls, pid=0, ): """ Init select condition of index. :return: the query of db.session. """ lang = current_i18n.language if lang == 'ja': recursive_t = db.session.query( Index.parent.label("pid"), Index.id.label("cid"), func.cast(Index.id, db.Text).label("path"), func.coalesce(Index.index_name, Index.index_name_english).label("name"), Index.position, Index.public_state, Index.public_date, Index.browsing_role, Index.contribute_role, Index.browsing_group, Index.contribute_group, Index.more_check, Index.display_no, Index.coverpage_state, Index.recursive_coverpage_check, literal_column("1", db.Integer).label("lev")).filter( Index.id == pid).cte(name="recursive_t", recursive=True) rec_alias = aliased(recursive_t, name="rec") test_alias = aliased(Index, name="t") recursive_t = recursive_t.union_all( db.session.query( test_alias.parent, test_alias.id, rec_alias.c.path + '/' + func.cast(test_alias.id, db.Text), func.coalesce(test_alias.index_name, test_alias.index_name_english), test_alias.position, test_alias.public_state, test_alias.public_date, test_alias.browsing_role, test_alias.contribute_role, test_alias.browsing_group, test_alias.contribute_group, test_alias.more_check, test_alias.display_no, test_alias.coverpage_state, test_alias.recursive_coverpage_check, rec_alias.c.lev + 1).filter(test_alias.parent == rec_alias.c.cid)) else: recursive_t = db.session.query( Index.parent.label("pid"), Index.id.label("cid"), func.cast(Index.id, db.Text).label("path"), Index.index_name_english.label("name"), Index.position, Index.public_state, Index.public_date, Index.browsing_role, Index.contribute_role, Index.browsing_group, Index.contribute_group, Index.more_check, Index.display_no, Index.coverpage_state, Index.recursive_coverpage_check, literal_column("1", db.Integer).label("lev")).filter( Index.id == pid). \ cte(name="recursive_t", recursive=True) rec_alias = aliased(recursive_t, name="rec") test_alias = aliased(Index, name="t") recursive_t = recursive_t.union_all( db.session.query( test_alias.parent, test_alias.id, rec_alias.c.path + '/' + func.cast(test_alias.id, db.Text), test_alias.index_name_english, test_alias.position, test_alias.public_state, test_alias.public_date, test_alias.browsing_role, test_alias.contribute_role, test_alias.browsing_group, test_alias.contribute_group, test_alias.more_check, test_alias.display_no, test_alias.coverpage_state, test_alias.recursive_coverpage_check, rec_alias.c.lev + 1).filter(test_alias.parent == rec_alias.c.cid)) return recursive_t
def list_audits(): page = get_valid_page_or_1() try: per_page = int( request.args.get('per_page', current_app.config['DM_API_SERVICES_PAGE_SIZE'])) except ValueError: abort(400, 'invalid page size supplied') earliest_for_each_object = convert_to_boolean( request.args.get('earliest_for_each_object')) if earliest_for_each_object: # the rest of the filters we add will be added against a subquery which we will join back onto the main table # to retrieve the rest of the row. this allows the potentially expensive DISTINCT ON pass to be performed # against an absolutely minimal subset of rows which can probably be pulled straight from an index audits = db.session.query(AuditEvent.id) else: audits = AuditEvent.query audit_date = request.args.get('audit-date', None) if audit_date: try: filter_test = compare_sql_datetime_with_string( AuditEvent.created_at, audit_date) except ValueError: abort(400, 'invalid audit date supplied') audits = audits.filter(filter_test) audit_type = request.args.get('audit-type') if audit_type: if AuditTypes.is_valid_audit_type(audit_type): audits = audits.filter(AuditEvent.type == audit_type) else: abort(400, "Invalid audit type") user = request.args.get('user') if user: audits = audits.filter(AuditEvent.user == user) # note in the following that even though the supplier and draft ids *are* integers, we're doing the searches as # strings because of the postgres static type system's awkwardness with json types. we first let args.get normalize # them into actual integers though data_supplier_id = request.args.get('data-supplier-id', type=int) if data_supplier_id: # This filter relies on index `idx_audit_events_data_supplier_id`. See `app..models.main` for its definition. audits = audits.filter( func.coalesce( AuditEvent.data['supplierId'].astext, AuditEvent.data['supplier_id'].astext, ) == str(data_supplier_id)) data_draft_service_id = request.args.get('data-draft-service-id', type=int) if data_draft_service_id: # This filter relies on index `idx_audit_events_data_draft_id`. See `app..models.main` for its definition. audits = audits.filter( AuditEvent.data['draftId'].astext == str(data_draft_service_id)) acknowledged = request.args.get('acknowledged', None) if acknowledged and acknowledged != 'all': if is_valid_acknowledged_state(acknowledged): if convert_to_boolean(acknowledged): audits = audits.filter(AuditEvent.acknowledged == true()) elif not convert_to_boolean(acknowledged): audits = audits.filter(AuditEvent.acknowledged == false()) else: abort(400, 'invalid acknowledged state supplied') object_type = request.args.get('object-type') object_id = request.args.get('object-id') if object_type: if object_type not in AUDIT_OBJECT_TYPES: abort(400, 'invalid object-type supplied') ref_model = AUDIT_OBJECT_TYPES[object_type] ext_id_field = AUDIT_OBJECT_ID_FIELDS[object_type] audits = audits.filter(AuditEvent.object.is_type(ref_model)) # "object_id" here is the *external* object_id if object_id: ref_object = ref_model.query.filter( ext_id_field == object_id).first() if ref_object is None: abort( 404, "Object with given object-type and object-id doesn't exist" ) # this `.identity_key_from_instance(...)[1][0]` is exactly the method used by sqlalchemy_utils' generic # relationship code to extract an object's pk value, so *should* be relatively stable, API-wise. # the `[1]` is to select the pk's *value* rather than the `Column` object and the `[0]` simply fetches # the first of any pk values - generic relationships are already assuming that compound pks aren't in # use by the target. ref_object_pk = class_mapper(ref_model).identity_key_from_instance( ref_object)[1][0] audits = audits.filter(AuditEvent.object_id == ref_object_pk) elif object_id: abort(400, 'object-id cannot be provided without object-type') if earliest_for_each_object: if not (acknowledged and convert_to_boolean(acknowledged) is False and audit_type == "update_service" and object_type == "services"): current_app.logger.warning( "earliest_for_each_object option currently intended for use on acknowledged update_service events. " "If use with any other events is to be regular, the scope of the corresponding partial index " "should be expanded to cover it.") # we need to join the built-up subquery back onto the AuditEvent table to retrieve the rest of the row audits_subquery = audits.order_by( AuditEvent.object_type, AuditEvent.object_id, AuditEvent.created_at, AuditEvent.id, ).distinct( AuditEvent.object_type, AuditEvent.object_id, ).subquery() audits = AuditEvent.query.join(audits_subquery, audits_subquery.c.id == AuditEvent.id) sort_order = db.desc if convert_to_boolean( request.args.get('latest_first')) else db.asc audits = audits.order_by(sort_order(AuditEvent.created_at), sort_order(AuditEvent.id)) return paginated_result_response(result_name=RESOURCE_NAME, results_query=audits, page=page, per_page=per_page, endpoint='.list_audits', request_args=request.args), 200
def generate_legs(keepto=None, maxtime=None, repair=False): """Record legs from stops and mobile activity found in device telemetry. keepto -- keep legs before this time, except last two or so for restart maxtime -- process device data up to this time repair -- re-evaluate and replace all changed legs""" now = datetime.datetime.now() if not keepto: keepto = now if not maxtime: maxtime = now print("generate_legs up to", maxtime) dd = db.metadata.tables["device_data"] legs = db.metadata.tables["legs"] # Find first and last point sent from each device. devmax = select( [ dd.c.device_id, func.min(dd.c.time).label("firstpoint"), func.max(dd.c.time).label("lastpoint")], dd.c.time < maxtime, group_by=dd.c.device_id).alias("devmax") # The last recorded leg transition may be to phantom move that, given more # future context, will be merged into a preceding stop. Go back two legs # for the rewrite start point. # Due to activity summing window context and stabilization, and stop # entry/exit refinement, the first transition after starting the filter # process is not necessarily yet in sync with the previous run. Go back # another two legs to start the process. # (The window bounds expression is not supported until sqlalchemy 1.1 so # sneak it in in the order expression...) order = text("""time_start DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING""") rrlegs = select( [ legs.c.device_id, func.nth_value(legs.c.time_start, 4) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewind"), func.nth_value(legs.c.time_start, 2) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewrite")], and_(legs.c.activity != None, legs.c.time_start <= keepto), distinct=True).alias("rrlegs") # Find end of processed legs, including terminator for each device. lastleg = select( [legs.c.device_id, func.max(legs.c.time_end).label("time_end")], legs.c.time_start < keepto, group_by=legs.c.device_id).alias("lastleg") # If trailing points exist, start from rewind leg, or first point starts = select( [ devmax.c.device_id, func.coalesce(rrlegs.c.rewind, devmax.c.firstpoint), func.coalesce(rrlegs.c.rewrite, devmax.c.firstpoint)], or_(lastleg.c.time_end == None, devmax.c.lastpoint > lastleg.c.time_end), devmax \ .outerjoin(rrlegs, devmax.c.device_id == rrlegs.c.device_id) \ .outerjoin(lastleg, devmax.c.device_id == lastleg.c.device_id)) # In repair mode, just start from the top. if repair: starts = select([ devmax.c.device_id, devmax.c.firstpoint.label("rewind"), devmax.c.firstpoint.label("start")]) starts = starts.order_by(devmax.c.device_id) for device, rewind, start in db.engine.execute(starts): query = select( [ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), dd.c.accuracy, dd.c.time, dd.c.device_id, dd.c.activity_1, dd.c.activity_1_conf, dd.c.activity_2, dd.c.activity_2_conf, dd.c.activity_3, dd.c.activity_3_conf], and_( dd.c.device_id == device, dd.c.time >= rewind, dd.c.time < maxtime), order_by=dd.c.time) points = db.engine.execute(query).fetchall() print("d"+str(device), "resume", str(start)[:19], \ "rewind", str(rewind)[:19], str(len(points))+"p") filterer = DeviceDataFilterer() # not very objecty rly lastend = None newlegs = filterer.generate_device_legs(points, start) for (prevleg, _), (leg, legmodes) in pairwise( chain([(None, None)], newlegs)): with db.engine.begin() as t: lastend = leg["time_end"] print(" ".join([ "d"+str(device), str(leg["time_start"])[:19], str(leg["time_end"])[:19], leg["activity"]]), end=' ') # Adjust leg for db entry gj0 = leg.pop("geojson_start", None) gj1 = leg.pop("geojson_end", None) leg.update({ "device_id": device, "coordinate_start": gj0 and func.ST_GeomFromGeoJSON(gj0), "coordinate_end": gj1 and func.ST_GeomFromGeoJSON(gj1)}) # Deal with overlapping legs on rewind/repair legid = t.execute(select( [legs.c.id], and_(*(legs.c[c] == leg[c] for c in list(leg.keys()))))).scalar() if legid: print("-> unchanged", end=' ') else: overlapstart = prevleg and prevleg["time_end"] or start overlaps = [x[0] for x in t.execute(select( [legs.c.id], and_( legs.c.device_id == leg["device_id"], legs.c.time_start < leg["time_end"], legs.c.time_end > overlapstart), order_by=legs.c.time_start))] if overlaps: legid, dels = overlaps[0], overlaps[1:] t.execute(legs.update(legs.c.id == legid, leg)) print("-> update", end=' ') if dels: t.execute(legs.delete(legs.c.id.in_(dels))) print("-> delete %d" % len(dels)) else: ins = legs.insert(leg).returning(legs.c.id) legid = t.execute(ins).scalar() print("-> insert", end=' ') # Delete mismatching modes, add new modes modes = db.metadata.tables["modes"] exmodes = {x[0]: x[1:] for x in t.execute(select( [modes.c.source, modes.c.mode, modes.c.line], legs.c.id == legid, legs.join(modes)))} for src in set(exmodes).union(legmodes): ex, nu = exmodes.get(src), legmodes.get(src) if nu == ex: continue if ex is not None: print("-> del", src, ex, end=' ') t.execute(modes.delete(and_( modes.c.leg == legid, modes.c.source == src))) if nu is not None: print("-> ins", src, nu, end=' ') t.execute(modes.insert().values( leg=legid, source=src, mode=nu[0], line=nu[1])) print() # Emit null activity terminator leg to mark trailing undecided points, # if any, to avoid unnecessary reprocessing on resume. rejects = [x for x in points if not lastend or x["time"] > lastend] if rejects: db.engine.execute(legs.delete(and_( legs.c.device_id == device, legs.c.time_start <= rejects[-1]["time"], legs.c.time_end >= rejects[0]["time"]))) db.engine.execute(legs.insert({ "device_id": device, "time_start": rejects[0]["time"], "time_end": rejects[-1]["time"], "activity": None})) # Attach device legs to users. devices = db.metadata.tables["devices"] # Real legs from devices with the owner added in, also when unattached owned = select( [ devices.c.user_id.label("owner"), legs.c.id, legs.c.user_id, legs.c.time_start, legs.c.time_end], and_(legs.c.activity != None, legs.c.time_end < maxtime), devices.join(legs, devices.c.id == legs.c.device_id)) detached = owned.where(legs.c.user_id.is_(None)).alias("detached") attached = owned.where(legs.c.user_id.isnot(None)).alias("attached") owned = owned.alias("owned") # Find most recently received leg attached per user maxattached = select( [attached.c.owner, func.max(attached.c.id).label("id")], group_by=attached.c.owner).alias("maxattached") # Find start of earliest unattached leg received later mindetached = select( [ detached.c.owner, func.min(detached.c.time_start).label("time_start")], or_(maxattached.c.id.is_(None), detached.c.id > maxattached.c.id), detached.outerjoin( maxattached, detached.c.owner == maxattached.c.owner), group_by=detached.c.owner).alias("mindetached") # Find start of attached overlapping leg to make it visible to the process overattached = select( [ attached.c.owner, func.min(attached.c.time_start).label("time_start")], from_obj=attached.join(mindetached, and_( attached.c.owner == mindetached.c.owner, attached.c.time_end > mindetached.c.time_start)), group_by=attached.c.owner).alias("overattached") # Find restart point starts = select( [ mindetached.c.owner, func.least(mindetached.c.time_start, overattached.c.time_start)], from_obj=mindetached.outerjoin( overattached, mindetached.c.owner == overattached.c.owner)) # In repair mode, just start from the top. if repair: starts = select( [owned.c.owner, func.min(owned.c.time_start)], group_by=owned.c.owner) for user, start in db.engine.execute(starts.order_by(column("owner"))): # Ignore the special legacy user linking userless data if user == 0: continue print("u"+str(user), "start attach", start) # Get legs from user's devices in end time order, so shorter # legs get attached in favor of longer legs from a more idle device. s = select( [ owned.c.id, owned.c.time_start, owned.c.time_end, owned.c.user_id], and_(owned.c.owner == user, owned.c.time_start >= start), order_by=owned.c.time_end) lastend = None for lid, lstart, lend, luser in db.engine.execute(s): print(" ".join(["u"+str(user), str(lstart)[:19], str(lend)[:19]]), end=' ') if lastend and lstart < lastend: if luser is None: print("-> detached") continue db.engine.execute(legs.update( legs.c.id==lid).values(user_id=None)) # detach print("-> detach") continue lastend = lend if luser == user: print("-> attached") continue db.engine.execute(legs.update( legs.c.id==lid).values(user_id=user)) # attach print("-> attach") # Cluster backlog in batches cluster_legs(1000) # Reverse geocode labels for places created or shifted by new legs label_places(60)
def get_status_by_entity(): result = {} # FIXME: use user.status_level instead of duplicating the levels scale q_users = (session.query( UserData.uid, UserData.corporation_id.label('corporation_id'), UserData.direction_id.label('direction_id'), func.sum(func.coalesce(PointData.nb_points, 0)).label('points') ).filter(or_(PointData.label == None, PointData.label != u'GIFT_BOUGHT'))) q_users = ( q_users.filter(UserData.enabled == True) .group_by(UserData.corporation_id, UserData.direction_id, UserData.uid) .outerjoin(UserData.points) .subquery()) q = (session.query(q_users.c.corporation_id, q_users.c.direction_id, func.count(q_users.c.uid).label('user_count')) .group_by(q_users.c.corporation_id, q_users.c.direction_id) .order_by(q_users.c.corporation_id, q_users.c.direction_id)) q0 = q.filter(q_users.c.points == 0) for elt in q0: t = result.get((elt[0], elt[1]), {}) t.update(status_level0=elt[2]) result[(elt[0], elt[1])] = t q1 = q.filter(q_users.c.points > 0).filter(q_users.c.points <= 100) for elt in q1: t = result.get((elt[0], elt[1]), {}) t.update(status_level1=elt[2]) result[(elt[0], elt[1])] = t q2 = q.filter(q_users.c.points > 100).filter(q_users.c.points <= 200) for elt in q2: t = result.get((elt[0], elt[1]), {}) t.update(status_level2=elt[2]) result[(elt[0], elt[1])] = t q3 = q.filter(q_users.c.points > 200).filter(q_users.c.points <= 2000) for elt in q3: t = result.get((elt[0], elt[1]), {}) t.update(status_level3=elt[2]) result[(elt[0], elt[1])] = t q4 = q.filter(q_users.c.points > 2000).filter(q_users.c.points <= 10000) for elt in q4: t = result.get((elt[0], elt[1]), {}) t.update(status_level4=elt[2]) result[(elt[0], elt[1])] = t q5 = q.filter(q_users.c.points > 10000) for elt in q5: t = result.get((elt[0], elt[1]), {}) t.update(status_level5=elt[2]) result[(elt[0], elt[1])] = t return result
def visit_hierarchy(element, compiler, **kw): """visit compilation idiom for pgsql""" if compiler.dialect.server_version_info < supported_db['postgresql']: raise(HierarchyLesserError(compiler.dialect.name, supported_db['postgresql'])) else: if element.fk_type == String: element.fk_type = String(element.type_length) val = "a" else: element.fk_type = Integer val = "0" ordering_colname = element.ordering_colname if not ordering_colname or ordering_colname not in element.table.c\ and ordering_colname not in element.select.c: ordering_colname = None # FIXME: pass type of ordering column in following call if it's not # Integer is_ordering = ordering_colname and ordering_colname in \ element.select.columns rec = _build_table_clause(element.select, 'rec', element.fk_type, ordering_colname if is_ordering else None) # documentation used for pgsql >= 8.4.0 # # * http://www.postgresql.org/docs/8.4/static/queries-with.html # * http://explainextended.com/2009/07/17/ # postgresql-8-4-preserving-order-for-hierarchical-query/ # # build the first select sel1 = element.select._clone() sel1._copy_internals() # if the user wants to start from a given node, he pass the # starting_node option in the query if hasattr(element, 'starting_node') and \ getattr(element, 'starting_node') is not False: sel1 = sel1.where(func.coalesce( literal_column(element.parent, type_=String), literal(val, type_=String))==\ literal(element.starting_node, type_=String)) # the same select submitted by the user plus a 1 as the first level and # an array with the current id sel1.append_column(literal_column('1', type_=Integer).label('level')) sel1.append_column(literal_column('ARRAY[%s]' %(element.child), type_=ARRAY(element.fk_type)).\ label('connect_path')) if is_ordering: ordering_col = sel1.c.get(ordering_colname, None) if ordering_col is None: ordering_col = element.table.c[ordering_colname] sel1.append_column(literal_column('ARRAY[%s]' % (ordering_colname,), type_=ARRAY(ordering_col.type)).\ label('%s_path' % (ordering_colname,)) ) # the non recursive part of the with query must return false for the # first values sel1.append_column(literal_column("false", type_=Boolean).\ label('cycle')) # build the second select # the same select as above plus the level column is summing a 1 for # each iteration on the same brach. We also append the current id to # the array of ids we're building as connect_path sel2 = element.select._clone() sel2._copy_internals() sel2.append_column(label('level', rec.c.level+literal_column("1", type_=Integer))) sel2.append_column(label('connect_path', func.array_append(rec.c.connect_path, getattr(element.table.c, element.child)) )) if is_ordering: sel2.append_column(label('%s_path' % (ordering_colname), func.array_append(rec.c['%s_path' % (ordering_colname,)], getattr(element.table.c, ordering_colname)) )) # check if any member of connect_path has already been visited and # return true in that case, preventing an infinite loop (see where # section sel2.append_column(literal_column( "%s=ANY(connect_path)" % getattr(element.table.c, element.child)).label('cycle')) sel2 = sel2.where(and_( getattr(element.table.c,element.parent)==getattr(rec.c, element.child), "not cycle")) # union_all the previous queries so we can wrapped them in the 'with # recursive .. ()' idiom sel3 = sel1.union_all(sel2) # adding comparison in connect_path to build the is_leaf param new_sel = select([rec]) # we know if a given node is a leaf by checking if the current # connect_path (an array of ids from the root to the present id) is # contained by the next row connect_path (we use the lead windowing # function for that). If it's contained it means the current id is not # a leaf, otherwise it is. new_sel.append_column( literal_column("case connect_path <@ lead(connect_path, 1) over "\ "(order by connect_path) when true then false "\ "else true end").label('is_leaf') ) qry = "with recursive rec as (%s)\n%s\norder by %s_path" %\ (compiler.process(sel3), new_sel, ordering_colname if is_ordering else 'connect' ) if kw.get('asfrom', False): qry = '(%s)' % qry return qry
def _filter_atl(query): """ Отфильтровать сырые AT по возможности их создания для выбранного типа события и по существованию соответствующих позиций в прайсе услуг. В итоговый результат нужно также включить внутренние узлы дерева для возможности построения этого самого дерева в интерфейсе. Поэтому к отфильтрованным AT сначала добавляются все возможные промежуточные узлы, а затем лишние убираются. """ at_2 = aliased(ActionType, name='AT2') internal_nodes_q = db.session.query( ActionType.id.distinct().label('id'), ActionType.group_id).join( at_2, ActionType.id == at_2.group_id).filter( ActionType.deleted == 0, at_2.deleted == 0, ActionType.hidden == 0, at_2.hidden == 0, ActionType.class_ == at_class, at_2.class_ == at_class).subquery('AllInternalNodes') # 1) filter atl query by EventType_Action reference and include *all* internal AT tree nodes in result ats = query.outerjoin( internal_nodes_q, ActionType.id == internal_nodes_q.c.id).outerjoin( EventType_Action, db.and_(EventType_Action.actionType_id == ActionType.id, EventType_Action.eventType_id == event_type_id)) # 2) filter atl query by contract tariffs if necessary need_price_list = EventType.query.get( event_type_id).createOnlyActionsWithinPriceList if contract_id and need_price_list: ats = ats.outerjoin( ActionType_Service, db.and_( ActionType.id == ActionType_Service.master_id, between( func.curdate(), ActionType_Service.begDate, func.coalesce(ActionType_Service.endDate, func.curdate()))) ).outerjoin( ContractTariff, db.and_( ActionType_Service.service_id == ContractTariff.service_id, ContractTariff.master_id == contract_id, ContractTariff.eventType_id == event_type_id, ContractTariff.deleted == 0, between(func.curdate(), ContractTariff.begDate, ContractTariff.endDate))).outerjoin( Contract, db.and_( Contract.id == ContractTariff.master_id, Contract.deleted == 0)).filter( db.or_( db.and_( EventType_Action.id != None, ContractTariff.id != None, Contract.id != None, ActionType_Service.id != None), internal_nodes_q.c.id != None)) else: # filter for 1) ats = ats.filter( db.or_(internal_nodes_q.c.id != None, EventType_Action.id != None)) result = map(schwing, ats) # remove unnecessary internal nodes all_internal_nodes = dict( (at_id, gid) for at_id, gid in db.session.query(internal_nodes_q)) used_internal_nodes = set() for item in result: at_id = item[0] gid = item[4] if at_id not in all_internal_nodes and gid: used_internal_nodes.add(gid) while used_internal_nodes: at_id = used_internal_nodes.pop() if at_id in all_internal_nodes: used_internal_nodes.add(all_internal_nodes[at_id]) del all_internal_nodes[at_id] exclude_ids = all_internal_nodes.keys() result = [item for item in result if item[0] not in exclude_ids] # and from external reference for at_id in exclude_ids: del id_list[at_id] return result
def list_audits(): page = get_valid_page_or_1() try: per_page = int(request.args.get('per_page', current_app.config['DM_API_SERVICES_PAGE_SIZE'])) except ValueError: abort(400, 'invalid page size supplied') earliest_for_each_object = convert_to_boolean(request.args.get('earliest_for_each_object')) if earliest_for_each_object: # the rest of the filters we add will be added against a subquery which we will join back onto the main table # to retrieve the rest of the row. this allows the potentially expensive DISTINCT ON pass to be performed # against an absolutely minimal subset of rows which can probably be pulled straight from an index audits = db.session.query(AuditEvent.id) else: audits = AuditEvent.query audit_date = request.args.get('audit-date', None) if audit_date: if is_valid_date(audit_date): audit_datetime = datetime.strptime(audit_date, DATE_FORMAT) audits = audits.filter( AuditEvent.created_at.between(audit_datetime, audit_datetime + timedelta(days=1)) ) else: abort(400, 'invalid audit date supplied') audit_type = request.args.get('audit-type') if audit_type: if AuditTypes.is_valid_audit_type(audit_type): audits = audits.filter( AuditEvent.type == audit_type ) else: abort(400, "Invalid audit type") user = request.args.get('user') if user: audits = audits.filter( AuditEvent.user == user ) data_supplier_id = request.args.get('data-supplier-id') if data_supplier_id: # This filter relies on index `idx_audit_events_data_supplier_id`. See `app..models.main` for its definition. audits = audits.filter( func.coalesce( AuditEvent.__table__.c.data['supplierId'].astext, AuditEvent.__table__.c.data['supplier_id'].astext, ) == data_supplier_id ) acknowledged = request.args.get('acknowledged', None) if acknowledged and acknowledged != 'all': if is_valid_acknowledged_state(acknowledged): if convert_to_boolean(acknowledged): audits = audits.filter( AuditEvent.acknowledged == true() ) elif not convert_to_boolean(acknowledged): audits = audits.filter( AuditEvent.acknowledged == false() ) else: abort(400, 'invalid acknowledged state supplied') object_type = request.args.get('object-type') object_id = request.args.get('object-id') if object_type: if object_type not in AUDIT_OBJECT_TYPES: abort(400, 'invalid object-type supplied') ref_model = AUDIT_OBJECT_TYPES[object_type] ext_id_field = AUDIT_OBJECT_ID_FIELDS[object_type] audits = audits.filter(AuditEvent.object.is_type(ref_model)) # "object_id" here is the *external* object_id if object_id: ref_object = ref_model.query.filter( ext_id_field == object_id ).first() if ref_object is None: abort(404, "Object with given object-type and object-id doesn't exist") # this `.identity_key_from_instance(...)[1][0]` is exactly the method used by sqlalchemy_utils' generic # relationship code to extract an object's pk value, so *should* be relatively stable, API-wise. # the `[1]` is to select the pk's *value* rather than the `Column` object and the `[0]` simply fetches # the first of any pk values - generic relationships are already assuming that compound pks aren't in # use by the target. ref_object_pk = class_mapper(ref_model).identity_key_from_instance(ref_object)[1][0] audits = audits.filter( AuditEvent.object_id == ref_object_pk ) elif object_id: abort(400, 'object-id cannot be provided without object-type') if earliest_for_each_object: if not ( acknowledged and convert_to_boolean(acknowledged) is False and audit_type == "update_service" and object_type == "services" ): current_app.logger.warning( "earliest_for_each_object option currently intended for use on acknowledged update_service events. " "If use with any other events is to be regular, the scope of the corresponding partial index " "should be expanded to cover it." ) # we need to join the built-up subquery back onto the AuditEvent table to retrieve the rest of the row audits_subquery = audits.order_by( AuditEvent.object_type, AuditEvent.object_id, AuditEvent.created_at, AuditEvent.id, ).distinct( AuditEvent.object_type, AuditEvent.object_id, ).subquery() audits = AuditEvent.query.join(audits_subquery, audits_subquery.c.id == AuditEvent.id) sort_order = db.desc if convert_to_boolean(request.args.get('latest_first')) else db.asc audits = audits.order_by(sort_order(AuditEvent.created_at), sort_order(AuditEvent.id)) return paginated_result_response( result_name=RESOURCE_NAME, results_query=audits, page=page, per_page=per_page, endpoint='.list_audits', request_args=request.args ), 200