Esempio n. 1
0
 def _build_subquery(self):
     if not self.kwargs.get("tad_set_uuid"):
         return None
     else:
         set_pk = TadSet.objects.get(
             sodar_uuid=self.kwargs["tad_set_uuid"]).pk
         term_interval_center = (TadBoundaryInterval.sa.start +
                                 (TadBoundaryInterval.sa.end -
                                  TadBoundaryInterval.sa.start + 1) / 2)
         term_overlaps = and_(
             term_interval_center >= StructuralVariant.sa.start,
             term_interval_center <= StructuralVariant.sa.end,
         )
         fields = [
             func.coalesce(
                 func.min(
                     case(
                         [(term_overlaps, 0)],
                         else_=func.least(
                             func.abs(term_interval_center -
                                      StructuralVariant.sa.start),
                             func.abs(term_interval_center -
                                      StructuralVariant.sa.end),
                         ),
                     )),
                 -1,
             ).label("distance_to_center")
         ]
         return (select(fields).select_from(TadBoundaryInterval.sa).where(
             and_(
                 TadBoundaryInterval.sa.tad_set_id == set_pk,
                 overlaps(TadBoundaryInterval, StructuralVariant),
             )).alias("subquery_tad_boundaries_inner").lateral(
                 "subquery_tad_boundaries_outer"))
Esempio n. 2
0
 def order_distance(query, latlon):
     # Very simplistic
     latitude, longitude = latlon
     lat = func.abs(latitude - Address.latitude)
     lon = func.abs(longitude - Address.longitude)
     scale = Address.scale(latitude)
     return query \
         .filter(Address.latitude != None) \
         .filter(Address.longitude != None) \
         .order_by(lat + lon * scale)
Esempio n. 3
0
    def load_similar(self, delta=0.05):
        from sqlalchemy.sql import func

        ingredients = (Ingredient.query.filter(
            and_(
                func.abs(Ingredient.sugar - self.sugar) <= delta,
                func.abs(Ingredient.protein - self.protein) <= delta,
                func.abs(Ingredient.fat - self.fat) <= delta,
            )).filter(Ingredient.is_current_user_author).all())
        return ingredients
Esempio n. 4
0
    def get_analytics(cls, ticker_name, date_from, date_to):
        PricesA = aliased(cls)
        PricesB = aliased(cls)

        result = db.session.query(
            label('open', func.abs(PricesA.open - PricesB.open)),
            label('close', func.abs(PricesA.close - PricesB.close)),
            label('low', func.abs(PricesA.low - PricesB.low)),
            label('high',
                  func.abs(PricesA.close - PricesB.high))).join(Ticker, ).join(
                      PricesB, and_(PricesB.ticker_id == Ticker.id)).filter(
                          Ticker.name == ticker_name,
                          PricesA.date == date_from, PricesB.date == date_to)
        return result
Esempio n. 5
0
    def max_distance(orm, query, latitude, longitude, default=1):
        lat = func.abs(latitude - Address.latitude)
        lon = func.abs(longitude - Address.longitude)
        scale = Address.scale(latitude)

        func_max = func.max
        if orm.connection().dialect.name == "mysql":
            func_max = func.greatest

        query = query.add_columns(func_max(lat, lon * scale).label("dist"))
        sq = query.subquery()
        result = orm.query(sq).order_by("dist desc").first()
        if result:
            return result.dist
        return default
Esempio n. 6
0
    def select(self, unit):
        target_features = unit.features

        dist_func = func.abs(Features.feat_0 - target_features.feat_0)

        for slot in range(FEATURE_SLOTS - 1):
            col_name = "feat_{}".format(slot + 1)
            dist_func += func.abs(getattr(Features, col_name) -
                                  getattr(target_features, col_name))

        pk = self.session.query(Features.unit_id) \
            .filter(Features.mediafile_id.in_(self.mediafiles)) \
            .order_by(dist_func).limit(1).one()

        return self.session.query(Unit).get(pk)
Esempio n. 7
0
    def nearest_height(cls):

        other = aliased(cls)

        time_deltas = sa.select([
            cls.id.label('this_id'),
            other.height.label('other_height'),
            func.abs(func.timediff(other.time, cls.time)).label('time_delta')
        ]).where(other.height != None).where(
            other.person_id == cls.person_id).alias('time_deltas')

        min_time_delta = sa.select([
            func.min(time_deltas.c.time_delta).label('min_time_delta'),
            time_deltas.c.this_id.label('min_id')
        ]).group_by(time_deltas.c.this_id).alias('min_time_deltas')

        avg_other_height = sa.select([
            time_deltas.c.this_id.label('avg_id'),
            func.avg(time_deltas.c.other_height).label('avg_height')
        ]).where(time_deltas.c.this_id == min_time_delta.c.min_id).where(
            time_deltas.c.time_delta ==
            min_time_delta.c.min_time_delta).group_by(
                time_deltas.c.this_id).alias('avg_other_height')

        nearest_height = sa.select([
            case([(cls.height != None, cls.height)],
                 else_=avg_other_height.c.avg_height).label('nearest_height'),
        ]).where(avg_other_height.c.avg_id == cls.id).as_scalar()

        return nearest_height
Esempio n. 8
0
 def _read_data(self, s, start, finish):
     midpt = start + dt.timedelta(hours=12)
     m0 = s.query(func.avg(func.abs(StatisticJournalInteger.time - midpt))).join(StatisticName). \
         filter(StatisticName.name == HEART_RATE,
                StatisticName.owner == self.owner_in,
                StatisticJournalInteger.time < finish,
                StatisticJournalInteger.time >= start,
                StatisticJournalInteger.value > 0).scalar()
     log.debug('M0: %s' % m0)
     if m0 and abs(m0 -
                   QUARTER_DAY) < 0.25 * QUARTER_DAY:  # not evenly sampled
         all_hr = [row[0] for row in s.query(StatisticJournalInteger.value).join(StatisticName). \
             filter(StatisticName.name == HEART_RATE,
                    StatisticName.owner == self.owner_in,
                    StatisticJournalInteger.time < finish,
                    StatisticJournalInteger.time >= start,
                    StatisticJournalInteger.value > 0).all()]
         n = len(all_hr)
         rest_heart_rate = sorted(all_hr)[n // 10]  # 10th percentile
     else:
         log.info(
             f'Insufficient coverage for {REST_HR} for {start} - {finish}')
         rest_heart_rate = None
     daily_steps = s.query(func.sum(StatisticJournalInteger.value)).join(StatisticName). \
         filter(StatisticName.name == STEPS,
                StatisticName.owner == self.owner_in,
                StatisticJournalInteger.time < finish,
                StatisticJournalInteger.time >= start).scalar()
     input_source_ids = [
         row[0] for row in s.query(MonitorJournal.id).filter(
             MonitorJournal.start <= finish,
             MonitorJournal.finish >= start).all()
     ]
     return input_source_ids, (rest_heart_rate, daily_steps)
Esempio n. 9
0
 def _read_data(self, s, start, finish):
     rest_heart_rate = []
     midpt = start + 0.5 * (finish - start)
     m0 = s.query(func.avg(func.abs(StatisticJournalInteger.time - midpt))).join(StatisticName). \
         filter(StatisticName.name == HEART_RATE,
                StatisticName.owner == self.owner_in,
                StatisticJournalInteger.time < finish,
                StatisticJournalInteger.time >= start,
                StatisticJournalInteger.value > 0).scalar()
     log.debug('M0: %s' % m0)
     if m0 and abs(m0 - QUARTER_DAY) < 0.25 * QUARTER_DAY:  # not evenly sampled
         all_hr = sorted([row[0] for row in s.query(StatisticJournalInteger.value).join(StatisticName). \
             filter(StatisticName.name == HEART_RATE,
                    StatisticName.owner == self.owner_in,
                    StatisticJournalInteger.time < finish,
                    StatisticJournalInteger.time >= start,
                    StatisticJournalInteger.value > 0).all()])
         n = len(all_hr)
         if n > 10:
             rest_heart_rate = [(name, all_hr[n // pc]) for name, pc in REST_HR_PERCENTILES.items()]
     if not rest_heart_rate:
         log.info(f'Insufficient coverage for {REST_HR} for {start} - {finish}')
     daily_steps = s.query(func.sum(StatisticJournalInteger.value)).join(StatisticName). \
         filter(StatisticName.name == STEPS,
                StatisticName.owner == self.owner_in,
                StatisticJournalInteger.time < finish,
                StatisticJournalInteger.time >= start).scalar()
     input_source_ids = [row[0] for row in s.query(MonitorJournal.id).
         filter(MonitorJournal.start < finish,
                MonitorJournal.finish >= start).all()]
     return input_source_ids, (rest_heart_rate, daily_steps)
Esempio n. 10
0
def getRate(querydate):
    if type(querydate) is not datetime:
        querydate = parser.parse(querydate)
    querydate = int(querydate.strftime("%s"))
    print(querydate)
    closest_price = db.session \
        .query(models.Rates) \
        .order_by(func.abs( querydate -  models.Rates.date)) \
        .first()
    return closest_price.rate
Esempio n. 11
0
def main():
    session = users.connect_db()
    user_id = request_data()
    user = session.query(users.User).filter(users.User.id == user_id).first()
    if user:
        nearest_birth_athelete = session.query(Athelete)\
                                 .filter(Athelete.birthdate.isnot(None))\
                                 .order_by(func.abs(func.julianday(Athelete.birthdate) - func.julianday(user.birthdate))).first()
        nearest_height_athelete = session.query(Athelete)\
                                  .filter(Athelete.height.isnot(None))\
                                  .order_by(func.abs(Athelete.height - user.height)).first()
        print(
            f'Ближайший по дате рождения атлет: id: {nearest_birth_athelete.id}, имя: {nearest_birth_athelete.name}, дата рождения: {nearest_birth_athelete.birthdate}'
        )
        print(
            f'Ближайший по росту атлет: id: {nearest_height_athelete.id}, имя: {nearest_height_athelete.name}, рост: {nearest_height_athelete.height}'
        )
    else:
        print('Пользовалель с таким id не найден')
Esempio n. 12
0
def nearest_by_height_athelete(user_height, session):
    """
	Нахождение ближайшего атлета осуществляется средствами СУБД
	выбирается запись с минимальной по модулю разницей между днем рождения 
	атлета и днем рождения пользователя
	"""
    query = session.query(Athelete.name, Athelete.height,
                          func.min(func.abs(Athelete.height -
                                            user_height))).first()
    return query
Esempio n. 13
0
def getRate(querydate):
    if type(querydate) is not datetime:
        querydate = parser.parse(querydate)
    querydate = int(querydate.strftime("%s"))
    print(querydate)
    closest_price = db.session \
        .query(models.Rates) \
        .order_by(func.abs( querydate -  models.Rates.date)) \
        .first()
    return closest_price.rate
Esempio n. 14
0
 def match(self, session=None):
     """Match to a pair sequence"""
     cls = self.__class__
     session = session or object_session(self)
     match_query = session.query(cls).filter_by(**self.sequence_attributes())
     match_query = match_query.filter(cls.id != self.id, cls.loop != self.loop)
     matches = match_query.join(Dataset).order_by(func.abs(Dataset.sequence - self.dataset.sequence)).all()
     if not len(matches):
         return None
     closest = min(abs(s.dataset.sequence - self.dataset.sequence) for s in matches)
     matches = filter(lambda s : abs(s.dataset.sequence - self.dataset.sequence) <= closest, matches)
     return matches[0].dataset
Esempio n. 15
0
def get_controversial_stories(
        timediff=timedelta(hours=48), contro_threshold=5, contro_min=10):
    """
    Get a story list that is suitable for display as "controversial".
    @param timediff: timedelta representing controversial eligibility age
    @param contro_threshold: the maximum difference between up and down votes to be considered "controversial"
    @param contro_min: the minimum number of points required for something to appear as controversial
    @return: SA query ready to get list of stuff
    """
    stories = dbsession.query(Submission).options(
        joinedload('submitter')).filter(Submission.deleted == False).filter(
            Submission.added_on > (general.now_in_utc() - timediff)).filter(
                func.abs(Submission.upvote_tally - Submission.downvote_tally)
                <= contro_threshold).filter(
                    Submission.total_vote_tally > contro_min).order_by(
                        Submission.added_on.desc())
    return stories
Esempio n. 16
0
def athlets_by_height(sess, height):
    """
   Return all athlets as Athlet class close by 'heigth'
   """
    athlete_height = \
       sess.query(Athlete.height)\
          .filter(Athlete.height.isnot(None)).order_by(
             func.abs( Athlete.height - height ) )\
       .limit(1).first()

    if athlete_height:
        return sess.query(Athlete)\
               .filter(
                  Athlete.height.in_(athlete_height) )\
               .all()
    else:
        return None
Esempio n. 17
0
    def matchCalcMoney(self, u, dealerid):
        udata = self.session.query(UserData).filter(
            UserData.userid == u.userid).first()
        if udata is None:
            self.errorReturn(GLBConfig.API_ERROR, '目标金额未设置.')

        self.matchresult(u, udata, SysUtil.getYesterday())
        self.matchresult(u, udata, SysUtil.getToday())

        ct = self.session.query(func.count('*')).\
            filter(MatchData.userid == u.userid).\
            filter(MatchData.date == SysUtil.getToday()).\
            filter(func.abs(MatchData.ResultMoney) < 0.001).scalar()

        if datetime.datetime.now().time() < datetime.time(19, 0, 0):
            if ct != 0:
                self.errorReturn(GLBConfig.API_ERROR, '有比赛结果未出.')

        self.getMatch(u, udata, dealerid)
Esempio n. 18
0
def athlets_by_birthdate(sess, birthdate):
    """
   Return all athlets as Athlet class close by 'birthdate'
   """
    athlet_birthdate = \
       sess.query(Athlete.birthdate)\
       .order_by(
          func.abs(
             func.strftime('%s', Athlete.birthdate)
             - func.strftime('%s', birthdate) ) )\
       .limit(1).first()

    if athlet_birthdate:
        return sess.query(Athlete)\
               .filter(
                  Athlete.birthdate.in_(athlet_birthdate) )\
               .all()
    else:
        return None
Esempio n. 19
0
    def nearest_height(self):

        if self.height is not None:
            return self.height

        other = aliased(HeightWeight)

        time_delta = func.abs(func.timediff(other.time, self.time))

        min_time_delta_query = object_session(self).query(
            func.min(time_delta).label('min_time_delta')).filter(
                other.height != None, other.person_id == self.person_id)

        min_time_delta = min_time_delta_query.one().min_time_delta

        nearest_height_query = object_session(self).query(
            func.avg(other.height).label('height'), ).filter(
                other.height != None, time_delta == min_time_delta,
                other.person_id == self.person_id)

        return nearest_height_query.one().height
Esempio n. 20
0
    def matchCalcMoney(self, u, udata, req_para):
        self.matchresult(u, udata, SysUtil.getLast2())
        self.matchresult(u, udata, SysUtil.getYesterday())
        self.matchresult(u, udata, SysUtil.getToday())

        ct = self.session.query(func.count('*')).\
            filter(MatchData.userid == u.userid).\
            filter(MatchData.date == SysUtil.getToday()).\
            filter(func.abs(MatchData.ResultMoney) < 0.001).scalar()

        if datetime.datetime.now().time() < datetime.time(19, 0, 0):
            if ct != 0:
                self.errorReturn(GLBConfig.API_ERROR, '有比赛结果未出.')

        self.session.commit()

        self.session.query(MatchData).\
            filter(MatchData.userid == u.userid).\
            filter(MatchData.date == SysUtil.getTomorrow()).delete()

        self.session.flush()
        if u.accounttype == GLBConfig.ATYPE_GROUP:
            self.getMatchGroup(u, udata)
        elif u.accounttype == GLBConfig.ATYPE_PERSION:
            if udata.mode == GLBConfig.MODE_A:
                if 'dealerid' in req_para.keys():
                    self.getMatchModeA(u, udata, req_para['dealerid'])
                if 'matchA' in req_para.keys():
                    self.getMatchModeASelfChoice(u, udata, req_para['matchA'],
                                                 req_para['AResult'],
                                                 req_para['matchB'],
                                                 req_para['BResult'])
            if udata.mode == GLBConfig.MODE_B:
                self.getMatchModeB(u, udata)
            if udata.mode == GLBConfig.MODE_C:
                self.getMatchModeC(u, udata, req_para['matchid'])
            if udata.mode == GLBConfig.MODE_D:
                self.getMatchModeD(u, udata, req_para['matchid'])
Esempio n. 21
0
    def updateAllocation(self, settings, question_cap=DEFAULT_QUESTION_CAP):
        # Get all existing allocations from the DB and their questions
        allocsByType = dict()
        hist_sel = float(settings.get('hist_sel', '0'))
        if hist_sel > 0.001:
            allocsByType['historical'] = []
            # Only get half the question cap if there's not much chance of the questions being used
            if hist_sel < 0.5 and 'question_cap_historical' not in settings:
                settings['question_cap_historical'] = int(
                    settings.get('question_cap', DEFAULT_QUESTION_CAP)) / 2
        if hist_sel < 0.999:
            # NB: Need to add rows for each distinct question type, otherwise won't try and assign them
            allocsByType['regular'] = []
            allocsByType['template'] = []

        # Fetch all existing allocations, divide by allocType
        for (dbAlloc, dbQn) in (Session.query(db.Allocation, db.Question).join(
                db.Question
        ).filter(db.Allocation.studentId == self.student.studentId).filter(
                db.Allocation.active == True).filter(
                    db.Allocation.lectureId == self.dbLec.lectureId).order_by(
                        db.Allocation.allocationId)):
            if not (dbQn.active) or (dbAlloc.allocationTime < dbQn.lastUpdate):
                # Question has been removed or is stale
                dbAlloc.active = False
            else:
                # Still around, so save it
                if (dbAlloc.allocType or dbQn.defAllocType) in allocsByType:
                    # NB: If hist_sel has changed, we might not want some types any more
                    allocsByType[dbAlloc.allocType
                                 or dbQn.defAllocType].append(
                                     dict(alloc=dbAlloc, question=dbQn))

        # Each question type should have at most question_cap questions
        for (allocType, allocs) in allocsByType.items():
            questionCap = int(
                settings.get(
                    'question_cap_' + allocType,
                    settings.get('question_cap', DEFAULT_QUESTION_CAP)))

            # If there's too many allocs, throw some away
            for i in sorted(random.sample(xrange(len(allocs)),
                                          max(len(allocs) - questionCap, 0)),
                            reverse=True):
                allocs[i]['alloc'].active = False
                del allocs[i]

            # If there's questions to spare, and requested to do so, throw away oldest questions
            if len(allocs) == questionCap and self.reAllocQuestions:
                for i in reversed(
                        xrange(
                            int(questionCap * float(
                                settings.get('allocation_realloc_perc', 20)) /
                                100))):
                    allocs[i]['alloc'].active = False
                    del allocs[i]

            # Assign required questions randomly
            if len(allocs) < questionCap:
                query = Session.query(db.Question).filter_by(
                    qnType='tw_questiontemplate' if allocType ==
                    'template' else 'tw_latexquestion').filter_by(active=True)
                if allocType == 'historical':
                    # Get questions from lectures "before" the current one
                    targetQuestions = (Session.query(
                        db.LectureQuestion.questionId).join(db.Lecture).filter(
                            db.Lecture.plonePath.startswith(
                                re.sub(r'/[^/]+/?$', '/',
                                       self.dbLec.plonePath))).filter(
                                           db.Lecture.plonePath <
                                           self.dbLec.plonePath).subquery())
                    query = query.filter(
                        db.Question.questionId.in_(targetQuestions))
                else:
                    # Git questions from current lecture
                    query = query.filter(
                        db.Question.lectures.contains(self.dbLec))

                # Filter out anything already allocated
                allocIds = [a['alloc'].questionId for a in allocs]
                if len(allocIds) > 0:
                    query = query.filter(~db.Question.questionId.in_(allocIds))

                # Give a target difficulty
                if self.targetDifficulty is not None:
                    query = query.order_by(
                        func.abs(
                            round(self.targetDifficulty * 50) -
                            func.round((50.0 * db.Question.timesCorrect) /
                                       db.Question.timesAnswered)))

                for dbQn in query.order_by(func.random()).limit(
                        max(questionCap - len(allocs), 0)):
                    dbAlloc = db.Allocation(
                        studentId=self.student.studentId,
                        questionId=dbQn.questionId,
                        lectureId=self.dbLec.lectureId,
                        allocationTime=datetime.datetime.utcnow(),
                        allocType='historical'
                        if allocType == 'historical' else None,
                    )
                    Session.add(dbAlloc)
                    allocs.append(dict(alloc=dbAlloc, question=dbQn, new=True))

        Session.flush()
        for allocType, allocs in allocsByType.items():
            for a in allocs:
                yield (
                    self._questionUrl(a['alloc'].publicId),
                    allocType,
                    a['question'],
                )
Esempio n. 22
0
def update_device_stats_jumps(session, date, logger=None):
    """Update device stats jumps."""

    if logger is None:
        logger = app.logger

    (start, end) = date_to_timestamps(date)

    # speed limits in m/s (values above indicates a unplausible position / jump)
    max_horizontal_speed = 1000
    max_vertical_speed = 100
    max_jumps = 10  # threshold for an 'ambiguous' device

    # find consecutive positions for a device
    sq = (session.query(
        AircraftBeacon.device_id,
        AircraftBeacon.timestamp,
        func.lead(AircraftBeacon.timestamp).over(
            partition_by=AircraftBeacon.device_id,
            order_by=AircraftBeacon.timestamp).label("timestamp_next"),
        AircraftBeacon.location_wkt,
        func.lead(AircraftBeacon.location_wkt).over(
            partition_by=AircraftBeacon.device_id,
            order_by=AircraftBeacon.timestamp).label("location_next"),
        AircraftBeacon.altitude,
        func.lead(AircraftBeacon.altitude).over(
            partition_by=AircraftBeacon.device_id,
            order_by=AircraftBeacon.timestamp).label("altitude_next"),
    ).filter(
        and_(between(AircraftBeacon.timestamp, start, end),
             AircraftBeacon.error_count == 0)).subquery())

    # calc vertial and horizontal speed between points
    sq2 = (session.query(
        sq.c.device_id,
        (func.st_distancesphere(sq.c.location_next, sq.c.location) /
         (func.extract("epoch", sq.c.timestamp_next) -
          func.extract("epoch", sq.c.timestamp))).label("horizontal_speed"),
        ((sq.c.altitude_next - sq.c.altitude) /
         (func.extract("epoch", sq.c.timestamp_next) -
          func.extract("epoch", sq.c.timestamp))).label("vertical_speed"),
    ).filter(
        and_(sq.c.timestamp != null(), sq.c.timestamp_next != null(),
             sq.c.timestamp < sq.c.timestamp_next)).subquery())

    # ... and find and count 'jumps'
    sq3 = (session.query(
        sq2.c.device_id,
        func.sum(
            case([(or_(
                func.abs(sq2.c.horizontal_speed) > max_horizontal_speed,
                func.abs(sq2.c.vertical_speed) > max_vertical_speed), 1)],
                 else_=0)).label("jumps")).group_by(
                     sq2.c.device_id).subquery())

    upd = update(DeviceStats).where(
        and_(DeviceStats.date == date,
             DeviceStats.device_id == sq3.c.device_id)).values({
                 "ambiguous":
                 sq3.c.jumps > max_jumps,
                 "jumps":
                 sq3.c.jumps
             })

    result = session.execute(upd)
    update_counter = result.rowcount
    session.commit()
    logger.warn("Updated {} DeviceStats jumps".format(update_counter))

    return "DeviceStats jumps for {}: {} updated".format(date, update_counter)
Esempio n. 23
0
    def updateAllocation(self, settings, question_cap=DEFAULT_QUESTION_CAP):
        # Get all existing allocations from the DB and their questions
        allocsByType = dict()
        hist_sel = float(settings.get('hist_sel', '0'))
        if hist_sel > 0.001:
            allocsByType['historical'] = []
            # Only get half the question cap if there's not much chance of the questions being used
            if hist_sel < 0.5 and 'question_cap_historical' not in settings:
                settings['question_cap_historical'] = int(settings.get('question_cap', DEFAULT_QUESTION_CAP)) / 2
        if hist_sel < 0.999:
            # NB: Need to add rows for each distinct question type, otherwise won't try and assign them
            allocsByType['regular'] = []
            allocsByType['template'] = []

        # Fetch all existing allocations, divide by allocType
        for (dbAlloc, dbQn) in (Session.query(db.Allocation, db.Question)
                .join(db.Question)
                .filter(db.Allocation.studentId == self.student.studentId)
                .filter(db.Allocation.active == True)
                .filter(db.Allocation.lectureId == self.dbLec.lectureId)):
            if not(dbQn.active) or (dbAlloc.allocationTime < dbQn.lastUpdate):
                # Question has been removed or is stale
                dbAlloc.active = False
            else:
                # Still around, so save it
                if (dbAlloc.allocType or dbQn.defAllocType) in allocsByType:
                    # NB: If hist_sel has changed, we might not want some types any more
                    allocsByType[dbAlloc.allocType or dbQn.defAllocType].append(dict(alloc=dbAlloc, question=dbQn))

        # Each question type should have at most question_cap questions
        for (allocType, allocs) in allocsByType.items():
            questionCap = int(settings.get('question_cap_' + allocType, settings.get('question_cap', DEFAULT_QUESTION_CAP)))

            # If there's too many allocs, throw some away
            for i in sorted(random.sample(xrange(len(allocs)), max(len(allocs) - questionCap, 0)), reverse=True):
                allocs[i]['alloc'].active = False
                del allocs[i]

            # If there's questions to spare, and requested to do so, reallocate questions
            if len(allocs) == questionCap and self.reAllocQuestions:
                if self.targetDifficulty is None:
                    raise ValueError("Must have a target difficulty to know what to remove")

                # Make ranking how likely questions are, based on targetDifficulty
                suitability = []
                for a in allocs:
                    if a['question'].timesAnswered == 0:
                        # New questions should be added regardless
                        suitability.append(1)
                    else:
                        suitability.append(1 - abs(self.targetDifficulty - float(a['question'].timesCorrect) / a['question'].timesAnswered))
                ranking = sorted(range(len(allocs)), key=lambda k: suitability[k])

                # Remove the least likely tenth
                for i in sorted(ranking[0:len(allocs) / 10 + 1], reverse=True):
                    allocs[i]['alloc'].active = False
                    del allocs[i]

            # Assign required questions randomly
            if len(allocs) < questionCap:
                query = Session.query(db.Question).filter_by(qnType='tw_questiontemplate' if allocType == 'template' else 'tw_latexquestion').filter_by(active=True)
                if allocType == 'historical':
                    # Get questions from lectures "before" the current one
                    targetQuestions = (Session.query(db.LectureQuestion.questionId)
                        .join(db.Lecture)
                        .filter(db.Lecture.plonePath.startswith(re.sub(r'/[^/]+/?$', '/', self.dbLec.plonePath)))
                        .filter(db.Lecture.plonePath < self.dbLec.plonePath)
                        .subquery())
                    query = query.filter(db.Question.questionId.in_(targetQuestions))
                else:
                    # Git questions from current lecture
                    query = query.filter(db.Question.lectures.contains(self.dbLec))

                # Filter out anything already allocated
                allocIds = [a['alloc'].questionId for a in allocs]
                if len(allocIds) > 0:
                    query = query.filter(~db.Question.questionId.in_(allocIds))

                # Give a target difficulty
                if self.targetDifficulty is not None:
                    query = query.order_by(func.abs(round(self.targetDifficulty * 50) - func.round((50.0 * db.Question.timesCorrect) / db.Question.timesAnswered)))

                for dbQn in query.order_by(func.random()).limit(max(questionCap - len(allocs), 0)):
                    dbAlloc = db.Allocation(
                        studentId=self.student.studentId,
                        questionId=dbQn.questionId,
                        lectureId=self.dbLec.lectureId,
                        allocationTime=datetime.datetime.utcnow(),
                        allocType='historical' if allocType == 'historical' else None,
                    )
                    Session.add(dbAlloc)
                    allocs.append(dict(alloc=dbAlloc, question=dbQn, new=True))

        Session.flush()
        for allocType, allocs in allocsByType.items():
            for a in allocs:
                yield (
                    self._questionUrl(a['alloc'].publicId),
                    allocType,
                    a['question'],
                )
Esempio n. 24
0
 def lat_dist(cls, lat, n):
     return (func.abs(cls.lat - lat) < n)
Esempio n. 25
0
def cluster_units(session, clusters, max_iterations=10000):
    """Cluster all units. Returns the number of iterations

    Kwargs:
      session: Sqlalchemy database session
      clusters (int): Number of clusters

    """
    random_pks = random.sample(
        xrange(session.query(Unit).count() - 2), clusters)
    random_features = [session.query(Unit).get(pk + 1).features
                       for pk in random_pks]

    # initialize centers
    session.query(Cluster).delete()
    clusters = []
    for features in random_features:
        cluster = Cluster()
        for index, _, value in features:
            setattr(cluster, "feat_{}".format(index), value)
        session.add(cluster)
        clusters.append(cluster)
    session.commit()

    iterations = 0
    feature_labels = ["feat_{}".format(slot)
                      for slot in xrange(FEATURE_SLOTS)]
    while True:
        # Assign each unit to nearest cluster
        for features in session.query(Features).all():
            dist_func = func.abs(Cluster.feat_0 - features.feat_0)

            # TODO: Use euclidean distance?
            for slot in xrange(FEATURE_SLOTS - 1):
                col_name = feature_labels[slot + 1]
                dist_func += func.abs(getattr(Cluster, col_name) -
                                      getattr(features, col_name))

            cluster = session.query(Cluster.id) \
                .order_by(dist_func).limit(1).one()
            features.cluster = cluster[0]

        session.flush()

        # Calculate new centroids for each cluster
        for cluster in clusters:
            query = []
            for slot in xrange(FEATURE_SLOTS):
                col_name = feature_labels[slot]
                query.append(func.avg(getattr(Features, col_name)))

            averages = session.query(*query).filter(
                Features.cluster == cluster.id).one()

            for index, value in enumerate(averages):
                col_name = feature_labels[index]
                setattr(cluster, col_name, value)

        session.flush()
        iterations += 1

        # Check if centroids have converged
        equal = True
        for cluster in clusters:
            for slot in xrange(FEATURE_SLOTS):
                col_name = feature_labels[slot]
                prv_name = "previous_feat_{}".format(slot)
                current = getattr(cluster, col_name)
                if current != getattr(cluster, prv_name):
                    equal = False
                    break
            if equal is False:
                break

        # Update previous values
        for cluster in clusters:
            for slot in xrange(FEATURE_SLOTS):
                col_name = feature_labels[slot]
                prv_name = "previous_feat_{}".format(slot)
                setattr(cluster, prv_name, getattr(cluster, col_name))

        session.flush()
        if iterations == max_iterations or equal is True:
            break

    return iterations
Esempio n. 26
0
 def lng_dist(cls, lng, n):
     return func.abs(cls.lng - lng) < n
Esempio n. 27
0

# CUSTOM SELECT FOR ACTIVITY CLIFFS THAT WILL BE MAPPED AGAINST A CLASS
A1, A2 = metadata.tables['chembl.activities'].alias(
), metadata.tables['chembl.activities'].alias()
FP1, FP2 = metadata.tables['chembl.compound_rdfps'].alias(
), metadata.tables['chembl.compound_rdfps'].alias()

join = A1.join(A2, A2.c.assay_id == A1.c.assay_id).join(
    FP1, FP1.c.molregno == A1.c.molregno).join(FP2,
                                               FP2.c.molregno == A2.c.molregno)

delta_tanimoto = 1 - func.rdkit.tanimoto_sml(FP1.c.circular_fp,
                                             FP2.c.circular_fp)
delta_activity = func.abs(
    func.log(A1.c.standard_value) -
    func.log(A2.c.standard_value)).label('delta_activity')
sali = (delta_activity / delta_tanimoto).label('sali')

whereclause = and_(A1.c.activity_id != A2.c.activity_id,
                   A1.c.molregno < A2.c.molregno,
                   A1.c.standard_type == A2.c.standard_type,
                   A1.c.standard_value > 0, A2.c.standard_value > 0,
                   A1.c.standard_flag > 0, A2.c.standard_flag > 0,
                   A1.c.standard_units == 'nM', A2.c.standard_units == 'nM',
                   A1.c.relation == '=', A1.c.relation == '=', sali >= 1.5)

activity_cliffs = select([
    A1.c.assay_id,
    A1.c.activity_id.label('activity_bgn_id'),
    A2.c.activity_id.label('activity_end_id'),
Esempio n. 28
0
def get_controversial_stories(timediff = timedelta(hours = 48), contro_threshold = 5, contro_min = 10):
    """
    Get a story list that is suitable for display as "controversial".
    @param timediff: timedelta representing controversial eligibility age
    @param contro_threshold: the maximum difference between up and down votes to be considered "controversial"
    @param contro_min: the minimum number of points required for something to appear as controversial
    @return: SA query ready to get list of stuff
    """
    stories = dbsession.query(Submission).options(joinedload('submitter')).filter(Submission.deleted == False).filter(Submission.added_on > (now_in_utc() - timediff)).filter(func.abs(Submission.upvote_tally - Submission.downvote_tally) <= contro_threshold).filter(Submission.total_vote_tally > contro_min).order_by(Submission.added_on.desc())
    return stories