def overlaps(lhs, rhs, min_overlap=None, rhs_start=None, rhs_end=None): """Returns term of ``lhs`` overlapping with ``rhs`` based on the start/end fields.""" if rhs_start is None: rhs_start = rhs.sa.start if rhs_end is None: rhs_end = rhs.sa.end if min_overlap is None: return and_( lhs.sa.release == rhs.sa.release, lhs.sa.chromosome == rhs.sa.chromosome, lhs.sa.bin.in_( select([column("bin")]).select_from(func.overlapping_bins(rhs_start - 1, rhs_end)) ), lhs.sa.end >= rhs_start, lhs.sa.start <= rhs_end, ) else: term_overlap = func.least(lhs.sa.end, rhs_end) - func.greatest(lhs.sa.start, rhs_start) + 1 return and_( lhs.sa.release == rhs.sa.release, lhs.sa.chromosome == rhs.sa.chromosome, rhs.sa.bin.in_( select([column("bin")]).select_from( func.overlapping_bins(lhs.sa.start - 1, lhs.sa.end) ) ), lhs.sa.end >= rhs_start, lhs.sa.start <= rhs_end, cast(term_overlap, Float) / func.greatest((rhs_end - rhs_start + 1), 1) > min_overlap, cast(term_overlap, Float) / func.greatest((lhs.sa.end - lhs.sa.start + 1), 1) > min_overlap, )
def best_matching_flags(sa_engine, case_id, sv_uuid, min_overlap=0.95): """Find best matching ``StructuralVariantFlags`` object for the given case and SV. Returns ``None`` if none could be found. """ sv = StructuralVariant.objects.get(case_id=case_id, sv_uuid=sv_uuid) term_overlap = ( func.least(StructuralVariantFlags.sa.end, sv.end) - func.greatest(StructuralVariantFlags.sa.start, sv.start) + 1 ) query = ( select( [ StructuralVariantFlags.sa.sodar_uuid.label("flags_uuid"), func.least( cast(term_overlap, Float) / func.greatest((sv.end - sv.start + 1), 1), cast(term_overlap, Float) / func.greatest( (StructuralVariantFlags.sa.end - StructuralVariantFlags.sa.start + 1), 1 ), ).label("reciprocal_overlap"), ] ) .select_from(StructuralVariantFlags.sa) .where( and_( StructuralVariantFlags.sa.case_id == case_id, StructuralVariantFlags.sa.release == sv.release, StructuralVariantFlags.sa.chromosome == sv.chromosome, StructuralVariantFlags.sa.bin.in_( select([column("bin")]).select_from(func.overlapping_bins(sv.start - 1, sv.end)) ), StructuralVariantFlags.sa.end >= sv.start, StructuralVariantFlags.sa.start <= sv.end, StructuralVariantFlags.sa.sv_type == sv.sv_type, cast(term_overlap, Float) / func.greatest((sv.end - sv.start + 1), 1) > min_overlap, cast(term_overlap, Float) / func.greatest( (StructuralVariantFlags.sa.end - StructuralVariantFlags.sa.start + 1), 1 ) > min_overlap, ) ) ) return sa_engine.execute(query.order_by(query.c.reciprocal_overlap.desc()))
def __call__(self, var, minval=0, label=None): label = label or var return func.greatest( func.sum(column(var)) - func.lag(func.sum(column(var))) .over(order_by=self.order_by), minval ).label(label)
def __call__(self, var, minval=0, label=None): label = label or var return func.greatest( func.lead(column(var)) .over(order_by=self.order_by, partition_by=self.base_columns) - column(var), minval).label(label)
def process_update_recommendation_scores(payload): text_fields = [ User.hometown, User.occupation, User.education, User.about_me, User.my_travels, User.things_i_like, User.about_place, User.additional_information, User.pet_details, User.kid_details, User.housemate_details, User.other_host_info, User.sleeping_details, User.area, User.house_rules, ] home_fields = [User.about_place, User.other_host_info, User.sleeping_details, User.area, User.house_rules] def poor_man_gaussian(): """ Produces an approximatley std normal random variate """ trials = 5 return (sum([func.random() for _ in range(trials)]) - trials / 2) / sqrt(trials / 12) def int_(stmt): return func.coalesce(cast(stmt, Integer), 0) def float_(stmt): return func.coalesce(cast(stmt, Float), 0.0) with session_scope() as session: # profile profile_text = "" for field in text_fields: profile_text += func.coalesce(field, "") text_length = func.length(profile_text) home_text = "" for field in home_fields: home_text += func.coalesce(field, "") home_length = func.length(home_text) has_text = int_(text_length > 500) long_text = int_(text_length > 2000) has_pic = int_(User.avatar_key != None) can_host = int_(User.hosting_status == HostingStatus.can_host) cant_host = int_(User.hosting_status == HostingStatus.cant_host) filled_home = int_(User.last_minute != None) * int_(home_length > 200) profile_points = 2 * has_text + 3 * long_text + 2 * has_pic + 3 * can_host + 2 * filled_home - 5 * cant_host # references left_ref_expr = int_(1).label("left_reference") left_refs_subquery = ( select(Reference.from_user_id.label("user_id"), left_ref_expr).group_by(Reference.from_user_id).subquery() ) left_reference = int_(left_refs_subquery.c.left_reference) has_reference_expr = int_(func.count(Reference.id) >= 1).label("has_reference") ref_count_expr = int_(func.count(Reference.id)).label("ref_count") ref_avg_expr = func.avg(1.4 * (Reference.rating - 0.3)).label("ref_avg") has_multiple_types_expr = int_(func.count(distinct(Reference.reference_type)) >= 2).label("has_multiple_types") has_bad_ref_expr = int_(func.sum(int_((Reference.rating <= 0.2) | (~Reference.was_appropriate))) >= 1).label( "has_bad_ref" ) received_ref_subquery = ( select( Reference.to_user_id.label("user_id"), has_reference_expr, has_multiple_types_expr, has_bad_ref_expr, ref_count_expr, ref_avg_expr, ) .group_by(Reference.to_user_id) .subquery() ) has_multiple_types = int_(received_ref_subquery.c.has_multiple_types) has_reference = int_(received_ref_subquery.c.has_reference) has_bad_reference = int_(received_ref_subquery.c.has_bad_ref) rating_score = float_( received_ref_subquery.c.ref_avg * ( 2 * func.least(received_ref_subquery.c.ref_count, 5) + func.greatest(received_ref_subquery.c.ref_count - 5, 0) ) ) ref_score = 2 * has_reference + has_multiple_types + left_reference - 5 * has_bad_reference + rating_score # activeness recently_active = int_(User.last_active >= now() - timedelta(days=180)) very_recently_active = int_(User.last_active >= now() - timedelta(days=14)) recently_messaged = int_(func.max(Message.time) > now() - timedelta(days=14)) messaged_lots = int_(func.count(Message.id) > 5) messaging_points_subquery = (recently_messaged + messaged_lots).label("messaging_points") messaging_subquery = ( select(Message.author_id.label("user_id"), messaging_points_subquery) .where(Message.message_type == MessageType.text) .group_by(Message.author_id) .subquery() ) activeness_points = recently_active + 2 * very_recently_active + int_(messaging_subquery.c.messaging_points) # verification phone_verified = int_(User.phone_is_verified) cb_subquery = ( select(ClusterSubscription.user_id.label("user_id"), func.min(Cluster.parent_node_id).label("min_node_id")) .join(Cluster, Cluster.id == ClusterSubscription.cluster_id) .where(ClusterSubscription.role == ClusterRole.admin) .where(Cluster.is_official_cluster) .group_by(ClusterSubscription.user_id) .subquery() ) min_node_id = cb_subquery.c.min_node_id cb = int_(min_node_id >= 1) f = int_(User.id <= 2) wcb = int_(min_node_id == 1) verification_points = 0.0 + 100 * f + 10 * wcb + 5 * cb # response rate t = ( select(Message.conversation_id, Message.time) .where(Message.message_type == MessageType.chat_created) .subquery() ) s = ( select(Message.conversation_id, Message.author_id, func.min(Message.time).label("time")) .group_by(Message.conversation_id, Message.author_id) .subquery() ) hr_subquery = ( select( HostRequest.host_user_id.label("user_id"), func.avg(s.c.time - t.c.time).label("avg_response_time"), func.count(t.c.time).label("received"), func.count(s.c.time).label("responded"), float_( extract( "epoch", percentile_disc(0.33).within_group(func.coalesce(s.c.time - t.c.time, timedelta(days=1000))), ) / 60.0 ).label("response_time_33p"), float_( extract( "epoch", percentile_disc(0.66).within_group(func.coalesce(s.c.time - t.c.time, timedelta(days=1000))), ) / 60.0 ).label("response_time_66p"), ) .join(t, t.c.conversation_id == HostRequest.conversation_id) .outerjoin( s, and_(s.c.conversation_id == HostRequest.conversation_id, s.c.author_id == HostRequest.host_user_id) ) .group_by(HostRequest.host_user_id) .subquery() ) avg_response_time = hr_subquery.c.avg_response_time avg_response_time_hr = float_(extract("epoch", avg_response_time) / 60.0) received = hr_subquery.c.received responded = hr_subquery.c.responded response_time_33p = hr_subquery.c.response_time_33p response_time_66p = hr_subquery.c.response_time_66p response_rate = float_(responded / (1.0 * func.greatest(received, 1))) # be careful with nulls response_rate_points = -10 * int_(response_time_33p > 60 * 48.0) + 5 * int_(response_time_66p < 60 * 48.0) recommendation_score = ( profile_points + ref_score + activeness_points + verification_points + response_rate_points + 2 * poor_man_gaussian() ) scores = ( select(User.id.label("user_id"), recommendation_score.label("score")) .outerjoin(messaging_subquery, messaging_subquery.c.user_id == User.id) .outerjoin(left_refs_subquery, left_refs_subquery.c.user_id == User.id) .outerjoin(received_ref_subquery, received_ref_subquery.c.user_id == User.id) .outerjoin(cb_subquery, cb_subquery.c.user_id == User.id) .outerjoin(hr_subquery, hr_subquery.c.user_id == User.id) ).subquery() session.execute( User.__table__.update().values(recommendation_score=scores.c.score).where(User.id == scores.c.user_id) ) logger.info("Updated recommendation scores")
def get_query_column(self, entity): return func.greatest(*(getattr(entity, c) for c in self.entity_columns))
def query_work_day_stats( company_id, start_date=None, end_date=None, first=None, after=None, tzname="Europe/Paris", ): tz = gettz(tzname) if after: max_time, user_id_ = parse_datetime_plus_id_cursor(after) max_date = max_time.date() end_date = min(max_date, end_date) if end_date else max_date query = (Activity.query.join(Mission).join( Expenditure, and_( Activity.user_id == Expenditure.user_id, Activity.mission_id == Expenditure.mission_id, ), isouter=True, ).with_entities( Activity.id, Activity.user_id, Activity.mission_id, Mission.name, Activity.start_time, Activity.end_time, Activity.type, Expenditure.id.label("expenditure_id"), Expenditure.type.label("expenditure_type"), func.generate_series( func.date_trunc( "day", func.timezone( tzname, func.timezone("UTC", Activity.start_time), ), ), func.timezone( tzname, func.coalesce( func.timezone("UTC", Activity.end_time), func.now(), ), ), "1 day", ).label("day"), ).filter( Mission.company_id == company_id, ~Activity.is_dismissed, Activity.start_time != Activity.end_time, )) query = _apply_time_range_filters( query, to_datetime(start_date, tz_for_date=tz), to_datetime(end_date, tz_for_date=tz, convert_dates_to_end_of_day_times=True), ) has_next_page = False if first: activity_first = max(first * 5, 200) query = query.order_by(desc("day"), desc( Activity.user_id)).limit(activity_first + 1) has_next_page = query.count() > activity_first query = query.subquery() query = (db.session.query(query).group_by( query.c.user_id, query.c.day, query.c.mission_id, query.c.name).with_entities( query.c.user_id.label("user_id"), query.c.day, func.timezone("UTC", func.timezone(tzname, query.c.day)).label("utc_day_start"), query.c.mission_id.label("mission_id"), query.c.name.label("mission_name"), func.min( func.greatest( query.c.start_time, func.timezone("UTC", func.timezone(tzname, query.c.day)), )).label("start_time"), func.max( func.least( func.timezone( "UTC", func.timezone( tzname, query.c.day + func.cast("1 day", Interval)), ), func.coalesce(query.c.end_time, func.now()), )).label("end_time"), func.bool_or( and_( query.c.end_time.is_(None), query.c.day == func.current_date(), )).label("is_running"), *[ func.sum( case( [( query.c.type == a_type.value, extract( "epoch", func.least( func.timezone( "UTC", func.timezone( tzname, query.c.day + func.cast("1 day", Interval), ), ), func.coalesce(query.c.end_time, func.now()), ) - func.greatest( query.c.start_time, func.timezone( "UTC", func.timezone(tzname, query.c.day), ), ), ), )], else_=0, )).label(f"{a_type.value}_duration") for a_type in ActivityType ], func.greatest(func.count(distinct(query.c.expenditure_id)), 1).label("n_exp_dups"), func.count(distinct(query.c.id)).label("n_act_dups"), *[ func.sum( case( [(query.c.expenditure_type == e_type.value, 1)], else_=0, )).label(f"n_{e_type.value}_expenditures") for e_type in ExpenditureType ], ).subquery()) query = (db.session.query(query).group_by( query.c.user_id, query.c.day).with_entities( query.c.user_id.label("user_id"), query.c.day, func.array_agg(distinct( query.c.mission_name)).label("mission_names"), func.min(query.c.start_time).label("start_time"), func.max(query.c.end_time).label("end_time"), func.bool_or(query.c.is_running).label("is_running"), *[ func.sum( getattr(query.c, f"{a_type.value}_duration") / query.c.n_exp_dups).cast(Integer).label( f"{a_type.value}_duration") for a_type in ActivityType ], *[ func.sum( getattr(query.c, f"n_{e_type.value}_expenditures") / query.c.n_act_dups).cast(Integer).label( f"n_{e_type.value}_expenditures") for e_type in ExpenditureType ], ).order_by(desc("day"), desc("user_id")).subquery()) query = db.session.query(query).with_entities( *query.c, extract("epoch", query.c.end_time - query.c.start_time).label("service_duration"), reduce( lambda a, b: a + b, [ getattr(query.c, f"{a_type.value}_duration") for a_type in ActivityType ], ).label("total_work_duration"), ) results = query.all() if after: results = [ r for r in results if r.day.date() < max_date or ( r.day.date() == max_date and r.user_id < user_id_) ] if first: if has_next_page: # The last work day may be incomplete because we didn't fetch all the activities => remove it results = results[:-1] if len(results) > first: results = results[:first] has_next_page = True return results, has_next_page
class Stock(PcObject, Model, ProvidableMixin, SoftDeletableMixin, VersionedMixin): # We redefine this so we can reference it in the baseScore column_property id = Column(BigInteger, primary_key=True, autoincrement=True) dateModified = Column(DateTime, nullable=False, default=datetime.utcnow) beginningDatetime = Column(DateTime, index=True, nullable=True) endDatetime = Column( DateTime, CheckConstraint('"endDatetime" > "beginningDatetime"', name='check_end_datetime_is_after_beginning_datetime'), nullable=True) offerId = Column(BigInteger, ForeignKey('offer.id'), index=True, nullable=False) offer = relationship('Offer', foreign_keys=[offerId], backref='stocks') price = Column(Numeric(10, 2), CheckConstraint('price >= 0', name='check_price_is_not_negative'), nullable=False) available = Column(Integer, index=True, nullable=True) remainingQuantity = column_property( select([ func.greatest( available - func.coalesce(func.sum(Booking.quantity), 0), 0) ]).where( and_( Booking.stockId == id, or_( and_(Booking.isUsed == False, Booking.isCancelled == False), and_(Booking.isUsed == True, Booking.dateUsed > dateModified))))) groupSize = Column(Integer, nullable=False, default=1) bookingLimitDatetime = Column(DateTime, nullable=True) bookingRecapSent = Column(DateTime, nullable=True) def errors(self): api_errors = super(Stock, self).errors() if self.available is not None and self.available < 0: api_errors.add_error('available', 'Le stock doit être positif') if self.endDatetime \ and self.beginningDatetime \ and self.endDatetime <= self.beginningDatetime: api_errors.add_error( 'endDatetime', 'La date de fin de l\'événement doit être postérieure à la date de début' ) return api_errors @property def isBookable(self): return self.bookingLimitDatetime is None \ or self.bookingLimitDatetime >= datetime.utcnow() @property def resolvedOffer(self): return self.offer or self.eventOccurrence.offer @classmethod def queryNotSoftDeleted(cls): return Stock.query.filter_by(isSoftDeleted=False) @staticmethod def restize_internal_error(ie): if 'check_stock' in str(ie.orig): if 'available_too_low' in str(ie.orig): return [ 'available', 'la quantité pour cette offre' + ' ne peut pas être inférieure' + ' au nombre de réservations existantes.' ] elif 'bookingLimitDatetime_too_late' in str(ie.orig): return [ 'bookingLimitDatetime', 'La date limite de réservation pour cette offre est postérieure à la date de début de l\'évènement' ] else: logger.error("Unexpected error in patch stocks: " + pformat(ie)) return PcObject.restize_internal_error(ie)
def GetResponseRate(self, request, context): with session_scope() as session: # this subquery gets the time that the request was sent t = (select(Message.conversation_id, Message.time).where( Message.message_type == MessageType.chat_created).subquery()) # this subquery gets the time that the user responded to the request s = (select(Message.conversation_id, func.min(Message.time).label("time")).where( Message.author_id == request.user_id).group_by( Message.conversation_id).subquery()) res = session.execute( select( User.id, # number of requests received func.count().label("n"), # percentage of requests responded to (func.count(s.c.time) / func.cast(func.greatest(func.count(t.c.time), 1.0), Float) ).label("response_rate"), # the 33rd percentile response time percentile_disc(0.33).within_group( func.coalesce(s.c.time - t.c.time, timedelta(days=1000)) ).label("response_time_p33"), # the 66th percentile response time percentile_disc(0.66).within_group( func.coalesce(s.c.time - t.c.time, timedelta(days=1000)) ).label("response_time_p66"), ).where_users_visible(context).where( User.id == request.user_id).outerjoin( HostRequest, HostRequest.host_user_id == User.id).outerjoin( t, t.c.conversation_id == HostRequest.conversation_id).outerjoin( s, s.c.conversation_id == HostRequest.conversation_id).group_by( User.id)).one_or_none() if not res: context.abort(grpc.StatusCode.NOT_FOUND, errors.USER_NOT_FOUND) _, n, response_rate, response_time_p33, response_time_p66 = res if n < 3: return requests_pb2.GetResponseRateRes( insufficient_data=requests_pb2. ResponseRateInsufficientData(), ) if response_rate <= 0.33: return requests_pb2.GetResponseRateRes( low=requests_pb2.ResponseRateLow(), ) response_time_p33_coarsened = Duration_from_timedelta( timedelta( seconds=round(response_time_p33.total_seconds() / 60) * 60)) if response_rate <= 0.66: return requests_pb2.GetResponseRateRes( some=requests_pb2.ResponseRateSome( response_time_p33=response_time_p33_coarsened), ) response_time_p66_coarsened = Duration_from_timedelta( timedelta( seconds=round(response_time_p66.total_seconds() / 60) * 60)) if response_rate <= 0.90: return requests_pb2.GetResponseRateRes( most=requests_pb2.ResponseRateMost( response_time_p33=response_time_p33_coarsened, response_time_p66=response_time_p66_coarsened), ) else: return requests_pb2.GetResponseRateRes( almost_all=requests_pb2.ResponseRateAlmostAll( response_time_p33=response_time_p33_coarsened, response_time_p66=response_time_p66_coarsened), )