Esempio n. 1
0
def overlaps(lhs, rhs, min_overlap=None, rhs_start=None, rhs_end=None):
    """Returns term of ``lhs`` overlapping with ``rhs`` based on the start/end fields."""
    if rhs_start is None:
        rhs_start = rhs.sa.start
    if rhs_end is None:
        rhs_end = rhs.sa.end
    if min_overlap is None:
        return and_(
            lhs.sa.release == rhs.sa.release,
            lhs.sa.chromosome == rhs.sa.chromosome,
            lhs.sa.bin.in_(
                select([column("bin")]).select_from(func.overlapping_bins(rhs_start - 1, rhs_end))
            ),
            lhs.sa.end >= rhs_start,
            lhs.sa.start <= rhs_end,
        )
    else:
        term_overlap = func.least(lhs.sa.end, rhs_end) - func.greatest(lhs.sa.start, rhs_start) + 1
        return and_(
            lhs.sa.release == rhs.sa.release,
            lhs.sa.chromosome == rhs.sa.chromosome,
            rhs.sa.bin.in_(
                select([column("bin")]).select_from(
                    func.overlapping_bins(lhs.sa.start - 1, lhs.sa.end)
                )
            ),
            lhs.sa.end >= rhs_start,
            lhs.sa.start <= rhs_end,
            cast(term_overlap, Float) / func.greatest((rhs_end - rhs_start + 1), 1) > min_overlap,
            cast(term_overlap, Float) / func.greatest((lhs.sa.end - lhs.sa.start + 1), 1)
            > min_overlap,
        )
Esempio n. 2
0
def best_matching_flags(sa_engine, case_id, sv_uuid, min_overlap=0.95):
    """Find best matching ``StructuralVariantFlags`` object for the given case and SV.

    Returns ``None`` if none could be found.
    """
    sv = StructuralVariant.objects.get(case_id=case_id, sv_uuid=sv_uuid)
    term_overlap = (
        func.least(StructuralVariantFlags.sa.end, sv.end)
        - func.greatest(StructuralVariantFlags.sa.start, sv.start)
        + 1
    )
    query = (
        select(
            [
                StructuralVariantFlags.sa.sodar_uuid.label("flags_uuid"),
                func.least(
                    cast(term_overlap, Float) / func.greatest((sv.end - sv.start + 1), 1),
                    cast(term_overlap, Float)
                    / func.greatest(
                        (StructuralVariantFlags.sa.end - StructuralVariantFlags.sa.start + 1), 1
                    ),
                ).label("reciprocal_overlap"),
            ]
        )
        .select_from(StructuralVariantFlags.sa)
        .where(
            and_(
                StructuralVariantFlags.sa.case_id == case_id,
                StructuralVariantFlags.sa.release == sv.release,
                StructuralVariantFlags.sa.chromosome == sv.chromosome,
                StructuralVariantFlags.sa.bin.in_(
                    select([column("bin")]).select_from(func.overlapping_bins(sv.start - 1, sv.end))
                ),
                StructuralVariantFlags.sa.end >= sv.start,
                StructuralVariantFlags.sa.start <= sv.end,
                StructuralVariantFlags.sa.sv_type == sv.sv_type,
                cast(term_overlap, Float) / func.greatest((sv.end - sv.start + 1), 1) > min_overlap,
                cast(term_overlap, Float)
                / func.greatest(
                    (StructuralVariantFlags.sa.end - StructuralVariantFlags.sa.start + 1), 1
                )
                > min_overlap,
            )
        )
    )
    return sa_engine.execute(query.order_by(query.c.reciprocal_overlap.desc()))
Esempio n. 3
0
 def __call__(self, var, minval=0, label=None):
     label = label or var
     return func.greatest(
         func.sum(column(var)) -
         func.lag(func.sum(column(var)))
         .over(order_by=self.order_by),
         minval
     ).label(label)
Esempio n. 4
0
 def __call__(self, var, minval=0, label=None):
     label = label or var
     return func.greatest(
         func.lead(column(var))
         .over(order_by=self.order_by,
               partition_by=self.base_columns)
         - column(var),
         minval).label(label)
Esempio n. 5
0
def process_update_recommendation_scores(payload):
    text_fields = [
        User.hometown,
        User.occupation,
        User.education,
        User.about_me,
        User.my_travels,
        User.things_i_like,
        User.about_place,
        User.additional_information,
        User.pet_details,
        User.kid_details,
        User.housemate_details,
        User.other_host_info,
        User.sleeping_details,
        User.area,
        User.house_rules,
    ]
    home_fields = [User.about_place, User.other_host_info, User.sleeping_details, User.area, User.house_rules]

    def poor_man_gaussian():
        """
        Produces an approximatley std normal random variate
        """
        trials = 5
        return (sum([func.random() for _ in range(trials)]) - trials / 2) / sqrt(trials / 12)

    def int_(stmt):
        return func.coalesce(cast(stmt, Integer), 0)

    def float_(stmt):
        return func.coalesce(cast(stmt, Float), 0.0)

    with session_scope() as session:
        # profile
        profile_text = ""
        for field in text_fields:
            profile_text += func.coalesce(field, "")
        text_length = func.length(profile_text)
        home_text = ""
        for field in home_fields:
            home_text += func.coalesce(field, "")
        home_length = func.length(home_text)

        has_text = int_(text_length > 500)
        long_text = int_(text_length > 2000)
        has_pic = int_(User.avatar_key != None)
        can_host = int_(User.hosting_status == HostingStatus.can_host)
        cant_host = int_(User.hosting_status == HostingStatus.cant_host)
        filled_home = int_(User.last_minute != None) * int_(home_length > 200)
        profile_points = 2 * has_text + 3 * long_text + 2 * has_pic + 3 * can_host + 2 * filled_home - 5 * cant_host

        # references
        left_ref_expr = int_(1).label("left_reference")
        left_refs_subquery = (
            select(Reference.from_user_id.label("user_id"), left_ref_expr).group_by(Reference.from_user_id).subquery()
        )
        left_reference = int_(left_refs_subquery.c.left_reference)
        has_reference_expr = int_(func.count(Reference.id) >= 1).label("has_reference")
        ref_count_expr = int_(func.count(Reference.id)).label("ref_count")
        ref_avg_expr = func.avg(1.4 * (Reference.rating - 0.3)).label("ref_avg")
        has_multiple_types_expr = int_(func.count(distinct(Reference.reference_type)) >= 2).label("has_multiple_types")
        has_bad_ref_expr = int_(func.sum(int_((Reference.rating <= 0.2) | (~Reference.was_appropriate))) >= 1).label(
            "has_bad_ref"
        )
        received_ref_subquery = (
            select(
                Reference.to_user_id.label("user_id"),
                has_reference_expr,
                has_multiple_types_expr,
                has_bad_ref_expr,
                ref_count_expr,
                ref_avg_expr,
            )
            .group_by(Reference.to_user_id)
            .subquery()
        )
        has_multiple_types = int_(received_ref_subquery.c.has_multiple_types)
        has_reference = int_(received_ref_subquery.c.has_reference)
        has_bad_reference = int_(received_ref_subquery.c.has_bad_ref)
        rating_score = float_(
            received_ref_subquery.c.ref_avg
            * (
                2 * func.least(received_ref_subquery.c.ref_count, 5)
                + func.greatest(received_ref_subquery.c.ref_count - 5, 0)
            )
        )
        ref_score = 2 * has_reference + has_multiple_types + left_reference - 5 * has_bad_reference + rating_score

        # activeness
        recently_active = int_(User.last_active >= now() - timedelta(days=180))
        very_recently_active = int_(User.last_active >= now() - timedelta(days=14))
        recently_messaged = int_(func.max(Message.time) > now() - timedelta(days=14))
        messaged_lots = int_(func.count(Message.id) > 5)
        messaging_points_subquery = (recently_messaged + messaged_lots).label("messaging_points")
        messaging_subquery = (
            select(Message.author_id.label("user_id"), messaging_points_subquery)
            .where(Message.message_type == MessageType.text)
            .group_by(Message.author_id)
            .subquery()
        )
        activeness_points = recently_active + 2 * very_recently_active + int_(messaging_subquery.c.messaging_points)

        # verification
        phone_verified = int_(User.phone_is_verified)
        cb_subquery = (
            select(ClusterSubscription.user_id.label("user_id"), func.min(Cluster.parent_node_id).label("min_node_id"))
            .join(Cluster, Cluster.id == ClusterSubscription.cluster_id)
            .where(ClusterSubscription.role == ClusterRole.admin)
            .where(Cluster.is_official_cluster)
            .group_by(ClusterSubscription.user_id)
            .subquery()
        )
        min_node_id = cb_subquery.c.min_node_id
        cb = int_(min_node_id >= 1)
        f = int_(User.id <= 2)
        wcb = int_(min_node_id == 1)
        verification_points = 0.0 + 100 * f + 10 * wcb + 5 * cb

        # response rate
        t = (
            select(Message.conversation_id, Message.time)
            .where(Message.message_type == MessageType.chat_created)
            .subquery()
        )
        s = (
            select(Message.conversation_id, Message.author_id, func.min(Message.time).label("time"))
            .group_by(Message.conversation_id, Message.author_id)
            .subquery()
        )
        hr_subquery = (
            select(
                HostRequest.host_user_id.label("user_id"),
                func.avg(s.c.time - t.c.time).label("avg_response_time"),
                func.count(t.c.time).label("received"),
                func.count(s.c.time).label("responded"),
                float_(
                    extract(
                        "epoch",
                        percentile_disc(0.33).within_group(func.coalesce(s.c.time - t.c.time, timedelta(days=1000))),
                    )
                    / 60.0
                ).label("response_time_33p"),
                float_(
                    extract(
                        "epoch",
                        percentile_disc(0.66).within_group(func.coalesce(s.c.time - t.c.time, timedelta(days=1000))),
                    )
                    / 60.0
                ).label("response_time_66p"),
            )
            .join(t, t.c.conversation_id == HostRequest.conversation_id)
            .outerjoin(
                s, and_(s.c.conversation_id == HostRequest.conversation_id, s.c.author_id == HostRequest.host_user_id)
            )
            .group_by(HostRequest.host_user_id)
            .subquery()
        )
        avg_response_time = hr_subquery.c.avg_response_time
        avg_response_time_hr = float_(extract("epoch", avg_response_time) / 60.0)
        received = hr_subquery.c.received
        responded = hr_subquery.c.responded
        response_time_33p = hr_subquery.c.response_time_33p
        response_time_66p = hr_subquery.c.response_time_66p
        response_rate = float_(responded / (1.0 * func.greatest(received, 1)))
        # be careful with nulls
        response_rate_points = -10 * int_(response_time_33p > 60 * 48.0) + 5 * int_(response_time_66p < 60 * 48.0)

        recommendation_score = (
            profile_points
            + ref_score
            + activeness_points
            + verification_points
            + response_rate_points
            + 2 * poor_man_gaussian()
        )

        scores = (
            select(User.id.label("user_id"), recommendation_score.label("score"))
            .outerjoin(messaging_subquery, messaging_subquery.c.user_id == User.id)
            .outerjoin(left_refs_subquery, left_refs_subquery.c.user_id == User.id)
            .outerjoin(received_ref_subquery, received_ref_subquery.c.user_id == User.id)
            .outerjoin(cb_subquery, cb_subquery.c.user_id == User.id)
            .outerjoin(hr_subquery, hr_subquery.c.user_id == User.id)
        ).subquery()

        session.execute(
            User.__table__.update().values(recommendation_score=scores.c.score).where(User.id == scores.c.user_id)
        )

    logger.info("Updated recommendation scores")
Esempio n. 6
0
 def get_query_column(self, entity):
     return func.greatest(*(getattr(entity, c) for c in self.entity_columns))
Esempio n. 7
0
def query_work_day_stats(
    company_id,
    start_date=None,
    end_date=None,
    first=None,
    after=None,
    tzname="Europe/Paris",
):
    tz = gettz(tzname)
    if after:
        max_time, user_id_ = parse_datetime_plus_id_cursor(after)
        max_date = max_time.date()
        end_date = min(max_date, end_date) if end_date else max_date

    query = (Activity.query.join(Mission).join(
        Expenditure,
        and_(
            Activity.user_id == Expenditure.user_id,
            Activity.mission_id == Expenditure.mission_id,
        ),
        isouter=True,
    ).with_entities(
        Activity.id,
        Activity.user_id,
        Activity.mission_id,
        Mission.name,
        Activity.start_time,
        Activity.end_time,
        Activity.type,
        Expenditure.id.label("expenditure_id"),
        Expenditure.type.label("expenditure_type"),
        func.generate_series(
            func.date_trunc(
                "day",
                func.timezone(
                    tzname,
                    func.timezone("UTC", Activity.start_time),
                ),
            ),
            func.timezone(
                tzname,
                func.coalesce(
                    func.timezone("UTC", Activity.end_time),
                    func.now(),
                ),
            ),
            "1 day",
        ).label("day"),
    ).filter(
        Mission.company_id == company_id,
        ~Activity.is_dismissed,
        Activity.start_time != Activity.end_time,
    ))

    query = _apply_time_range_filters(
        query,
        to_datetime(start_date, tz_for_date=tz),
        to_datetime(end_date,
                    tz_for_date=tz,
                    convert_dates_to_end_of_day_times=True),
    )

    has_next_page = False
    if first:
        activity_first = max(first * 5, 200)
        query = query.order_by(desc("day"), desc(
            Activity.user_id)).limit(activity_first + 1)
        has_next_page = query.count() > activity_first

    query = query.subquery()

    query = (db.session.query(query).group_by(
        query.c.user_id, query.c.day, query.c.mission_id,
        query.c.name).with_entities(
            query.c.user_id.label("user_id"),
            query.c.day,
            func.timezone("UTC",
                          func.timezone(tzname,
                                        query.c.day)).label("utc_day_start"),
            query.c.mission_id.label("mission_id"),
            query.c.name.label("mission_name"),
            func.min(
                func.greatest(
                    query.c.start_time,
                    func.timezone("UTC", func.timezone(tzname, query.c.day)),
                )).label("start_time"),
            func.max(
                func.least(
                    func.timezone(
                        "UTC",
                        func.timezone(
                            tzname,
                            query.c.day + func.cast("1 day", Interval)),
                    ),
                    func.coalesce(query.c.end_time, func.now()),
                )).label("end_time"),
            func.bool_or(
                and_(
                    query.c.end_time.is_(None),
                    query.c.day == func.current_date(),
                )).label("is_running"),
            *[
                func.sum(
                    case(
                        [(
                            query.c.type == a_type.value,
                            extract(
                                "epoch",
                                func.least(
                                    func.timezone(
                                        "UTC",
                                        func.timezone(
                                            tzname,
                                            query.c.day +
                                            func.cast("1 day", Interval),
                                        ),
                                    ),
                                    func.coalesce(query.c.end_time,
                                                  func.now()),
                                ) - func.greatest(
                                    query.c.start_time,
                                    func.timezone(
                                        "UTC",
                                        func.timezone(tzname, query.c.day),
                                    ),
                                ),
                            ),
                        )],
                        else_=0,
                    )).label(f"{a_type.value}_duration")
                for a_type in ActivityType
            ],
            func.greatest(func.count(distinct(query.c.expenditure_id)),
                          1).label("n_exp_dups"),
            func.count(distinct(query.c.id)).label("n_act_dups"),
            *[
                func.sum(
                    case(
                        [(query.c.expenditure_type == e_type.value, 1)],
                        else_=0,
                    )).label(f"n_{e_type.value}_expenditures")
                for e_type in ExpenditureType
            ],
        ).subquery())

    query = (db.session.query(query).group_by(
        query.c.user_id, query.c.day).with_entities(
            query.c.user_id.label("user_id"),
            query.c.day,
            func.array_agg(distinct(
                query.c.mission_name)).label("mission_names"),
            func.min(query.c.start_time).label("start_time"),
            func.max(query.c.end_time).label("end_time"),
            func.bool_or(query.c.is_running).label("is_running"),
            *[
                func.sum(
                    getattr(query.c, f"{a_type.value}_duration") /
                    query.c.n_exp_dups).cast(Integer).label(
                        f"{a_type.value}_duration") for a_type in ActivityType
            ],
            *[
                func.sum(
                    getattr(query.c, f"n_{e_type.value}_expenditures") /
                    query.c.n_act_dups).cast(Integer).label(
                        f"n_{e_type.value}_expenditures")
                for e_type in ExpenditureType
            ],
        ).order_by(desc("day"), desc("user_id")).subquery())

    query = db.session.query(query).with_entities(
        *query.c,
        extract("epoch", query.c.end_time -
                query.c.start_time).label("service_duration"),
        reduce(
            lambda a, b: a + b,
            [
                getattr(query.c, f"{a_type.value}_duration")
                for a_type in ActivityType
            ],
        ).label("total_work_duration"),
    )

    results = query.all()
    if after:
        results = [
            r for r in results if r.day.date() < max_date or (
                r.day.date() == max_date and r.user_id < user_id_)
        ]

    if first:
        if has_next_page:
            # The last work day may be incomplete because we didn't fetch all the activities => remove it
            results = results[:-1]
        if len(results) > first:
            results = results[:first]
            has_next_page = True

    return results, has_next_page
Esempio n. 8
0
class Stock(PcObject, Model, ProvidableMixin, SoftDeletableMixin,
            VersionedMixin):
    # We redefine this so we can reference it in the baseScore column_property
    id = Column(BigInteger, primary_key=True, autoincrement=True)

    dateModified = Column(DateTime, nullable=False, default=datetime.utcnow)

    beginningDatetime = Column(DateTime, index=True, nullable=True)

    endDatetime = Column(
        DateTime,
        CheckConstraint('"endDatetime" > "beginningDatetime"',
                        name='check_end_datetime_is_after_beginning_datetime'),
        nullable=True)

    offerId = Column(BigInteger,
                     ForeignKey('offer.id'),
                     index=True,
                     nullable=False)

    offer = relationship('Offer', foreign_keys=[offerId], backref='stocks')

    price = Column(Numeric(10, 2),
                   CheckConstraint('price >= 0',
                                   name='check_price_is_not_negative'),
                   nullable=False)

    available = Column(Integer, index=True, nullable=True)

    remainingQuantity = column_property(
        select([
            func.greatest(
                available - func.coalesce(func.sum(Booking.quantity), 0), 0)
        ]).where(
            and_(
                Booking.stockId == id,
                or_(
                    and_(Booking.isUsed == False,
                         Booking.isCancelled == False),
                    and_(Booking.isUsed == True,
                         Booking.dateUsed > dateModified)))))

    groupSize = Column(Integer, nullable=False, default=1)

    bookingLimitDatetime = Column(DateTime, nullable=True)

    bookingRecapSent = Column(DateTime, nullable=True)

    def errors(self):
        api_errors = super(Stock, self).errors()
        if self.available is not None and self.available < 0:
            api_errors.add_error('available', 'Le stock doit être positif')

        if self.endDatetime \
                and self.beginningDatetime \
                and self.endDatetime <= self.beginningDatetime:
            api_errors.add_error(
                'endDatetime',
                'La date de fin de l\'événement doit être postérieure à la date de début'
            )

        return api_errors

    @property
    def isBookable(self):
        return self.bookingLimitDatetime is None \
               or self.bookingLimitDatetime >= datetime.utcnow()

    @property
    def resolvedOffer(self):
        return self.offer or self.eventOccurrence.offer

    @classmethod
    def queryNotSoftDeleted(cls):
        return Stock.query.filter_by(isSoftDeleted=False)

    @staticmethod
    def restize_internal_error(ie):
        if 'check_stock' in str(ie.orig):
            if 'available_too_low' in str(ie.orig):
                return [
                    'available', 'la quantité pour cette offre' +
                    ' ne peut pas être inférieure' +
                    ' au nombre de réservations existantes.'
                ]
            elif 'bookingLimitDatetime_too_late' in str(ie.orig):
                return [
                    'bookingLimitDatetime',
                    'La date limite de réservation pour cette offre est postérieure à la date de début de l\'évènement'
                ]
            else:
                logger.error("Unexpected error in patch stocks: " +
                             pformat(ie))
        return PcObject.restize_internal_error(ie)
Esempio n. 9
0
 def get_query_column(self, entity):
     return func.greatest(*(getattr(entity, c) for c in self.entity_columns))
Esempio n. 10
0
    def GetResponseRate(self, request, context):
        with session_scope() as session:
            # this subquery gets the time that the request was sent
            t = (select(Message.conversation_id, Message.time).where(
                Message.message_type == MessageType.chat_created).subquery())
            # this subquery gets the time that the user responded to the request
            s = (select(Message.conversation_id,
                        func.min(Message.time).label("time")).where(
                            Message.author_id == request.user_id).group_by(
                                Message.conversation_id).subquery())

            res = session.execute(
                select(
                    User.id,
                    # number of requests received
                    func.count().label("n"),
                    # percentage of requests responded to
                    (func.count(s.c.time) /
                     func.cast(func.greatest(func.count(t.c.time), 1.0), Float)
                     ).label("response_rate"),
                    # the 33rd percentile response time
                    percentile_disc(0.33).within_group(
                        func.coalesce(s.c.time - t.c.time,
                                      timedelta(days=1000))
                    ).label("response_time_p33"),
                    # the 66th percentile response time
                    percentile_disc(0.66).within_group(
                        func.coalesce(s.c.time - t.c.time,
                                      timedelta(days=1000))
                    ).label("response_time_p66"),
                ).where_users_visible(context).where(
                    User.id == request.user_id).outerjoin(
                        HostRequest,
                        HostRequest.host_user_id == User.id).outerjoin(
                            t, t.c.conversation_id ==
                            HostRequest.conversation_id).outerjoin(
                                s, s.c.conversation_id ==
                                HostRequest.conversation_id).group_by(
                                    User.id)).one_or_none()

            if not res:
                context.abort(grpc.StatusCode.NOT_FOUND, errors.USER_NOT_FOUND)

            _, n, response_rate, response_time_p33, response_time_p66 = res

            if n < 3:
                return requests_pb2.GetResponseRateRes(
                    insufficient_data=requests_pb2.
                    ResponseRateInsufficientData(), )

            if response_rate <= 0.33:
                return requests_pb2.GetResponseRateRes(
                    low=requests_pb2.ResponseRateLow(), )

            response_time_p33_coarsened = Duration_from_timedelta(
                timedelta(
                    seconds=round(response_time_p33.total_seconds() / 60) *
                    60))

            if response_rate <= 0.66:
                return requests_pb2.GetResponseRateRes(
                    some=requests_pb2.ResponseRateSome(
                        response_time_p33=response_time_p33_coarsened), )

            response_time_p66_coarsened = Duration_from_timedelta(
                timedelta(
                    seconds=round(response_time_p66.total_seconds() / 60) *
                    60))

            if response_rate <= 0.90:
                return requests_pb2.GetResponseRateRes(
                    most=requests_pb2.ResponseRateMost(
                        response_time_p33=response_time_p33_coarsened,
                        response_time_p66=response_time_p66_coarsened), )
            else:
                return requests_pb2.GetResponseRateRes(
                    almost_all=requests_pb2.ResponseRateAlmostAll(
                        response_time_p33=response_time_p33_coarsened,
                        response_time_p66=response_time_p66_coarsened), )