def _build_subquery(self): if not self.kwargs.get("tad_set_uuid"): return None else: set_pk = TadSet.objects.get( sodar_uuid=self.kwargs["tad_set_uuid"]).pk term_interval_center = (TadBoundaryInterval.sa.start + (TadBoundaryInterval.sa.end - TadBoundaryInterval.sa.start + 1) / 2) term_overlaps = and_( term_interval_center >= StructuralVariant.sa.start, term_interval_center <= StructuralVariant.sa.end, ) fields = [ func.coalesce( func.min( case( [(term_overlaps, 0)], else_=func.least( func.abs(term_interval_center - StructuralVariant.sa.start), func.abs(term_interval_center - StructuralVariant.sa.end), ), )), -1, ).label("distance_to_center") ] return (select(fields).select_from(TadBoundaryInterval.sa).where( and_( TadBoundaryInterval.sa.tad_set_id == set_pk, overlaps(TadBoundaryInterval, StructuralVariant), )).alias("subquery_tad_boundaries_inner").lateral( "subquery_tad_boundaries_outer"))
def order_distance(query, latlon): # Very simplistic latitude, longitude = latlon lat = func.abs(latitude - Address.latitude) lon = func.abs(longitude - Address.longitude) scale = Address.scale(latitude) return query \ .filter(Address.latitude != None) \ .filter(Address.longitude != None) \ .order_by(lat + lon * scale)
def load_similar(self, delta=0.05): from sqlalchemy.sql import func ingredients = (Ingredient.query.filter( and_( func.abs(Ingredient.sugar - self.sugar) <= delta, func.abs(Ingredient.protein - self.protein) <= delta, func.abs(Ingredient.fat - self.fat) <= delta, )).filter(Ingredient.is_current_user_author).all()) return ingredients
def get_analytics(cls, ticker_name, date_from, date_to): PricesA = aliased(cls) PricesB = aliased(cls) result = db.session.query( label('open', func.abs(PricesA.open - PricesB.open)), label('close', func.abs(PricesA.close - PricesB.close)), label('low', func.abs(PricesA.low - PricesB.low)), label('high', func.abs(PricesA.close - PricesB.high))).join(Ticker, ).join( PricesB, and_(PricesB.ticker_id == Ticker.id)).filter( Ticker.name == ticker_name, PricesA.date == date_from, PricesB.date == date_to) return result
def max_distance(orm, query, latitude, longitude, default=1): lat = func.abs(latitude - Address.latitude) lon = func.abs(longitude - Address.longitude) scale = Address.scale(latitude) func_max = func.max if orm.connection().dialect.name == "mysql": func_max = func.greatest query = query.add_columns(func_max(lat, lon * scale).label("dist")) sq = query.subquery() result = orm.query(sq).order_by("dist desc").first() if result: return result.dist return default
def select(self, unit): target_features = unit.features dist_func = func.abs(Features.feat_0 - target_features.feat_0) for slot in range(FEATURE_SLOTS - 1): col_name = "feat_{}".format(slot + 1) dist_func += func.abs(getattr(Features, col_name) - getattr(target_features, col_name)) pk = self.session.query(Features.unit_id) \ .filter(Features.mediafile_id.in_(self.mediafiles)) \ .order_by(dist_func).limit(1).one() return self.session.query(Unit).get(pk)
def nearest_height(cls): other = aliased(cls) time_deltas = sa.select([ cls.id.label('this_id'), other.height.label('other_height'), func.abs(func.timediff(other.time, cls.time)).label('time_delta') ]).where(other.height != None).where( other.person_id == cls.person_id).alias('time_deltas') min_time_delta = sa.select([ func.min(time_deltas.c.time_delta).label('min_time_delta'), time_deltas.c.this_id.label('min_id') ]).group_by(time_deltas.c.this_id).alias('min_time_deltas') avg_other_height = sa.select([ time_deltas.c.this_id.label('avg_id'), func.avg(time_deltas.c.other_height).label('avg_height') ]).where(time_deltas.c.this_id == min_time_delta.c.min_id).where( time_deltas.c.time_delta == min_time_delta.c.min_time_delta).group_by( time_deltas.c.this_id).alias('avg_other_height') nearest_height = sa.select([ case([(cls.height != None, cls.height)], else_=avg_other_height.c.avg_height).label('nearest_height'), ]).where(avg_other_height.c.avg_id == cls.id).as_scalar() return nearest_height
def _read_data(self, s, start, finish): midpt = start + dt.timedelta(hours=12) m0 = s.query(func.avg(func.abs(StatisticJournalInteger.time - midpt))).join(StatisticName). \ filter(StatisticName.name == HEART_RATE, StatisticName.owner == self.owner_in, StatisticJournalInteger.time < finish, StatisticJournalInteger.time >= start, StatisticJournalInteger.value > 0).scalar() log.debug('M0: %s' % m0) if m0 and abs(m0 - QUARTER_DAY) < 0.25 * QUARTER_DAY: # not evenly sampled all_hr = [row[0] for row in s.query(StatisticJournalInteger.value).join(StatisticName). \ filter(StatisticName.name == HEART_RATE, StatisticName.owner == self.owner_in, StatisticJournalInteger.time < finish, StatisticJournalInteger.time >= start, StatisticJournalInteger.value > 0).all()] n = len(all_hr) rest_heart_rate = sorted(all_hr)[n // 10] # 10th percentile else: log.info( f'Insufficient coverage for {REST_HR} for {start} - {finish}') rest_heart_rate = None daily_steps = s.query(func.sum(StatisticJournalInteger.value)).join(StatisticName). \ filter(StatisticName.name == STEPS, StatisticName.owner == self.owner_in, StatisticJournalInteger.time < finish, StatisticJournalInteger.time >= start).scalar() input_source_ids = [ row[0] for row in s.query(MonitorJournal.id).filter( MonitorJournal.start <= finish, MonitorJournal.finish >= start).all() ] return input_source_ids, (rest_heart_rate, daily_steps)
def _read_data(self, s, start, finish): rest_heart_rate = [] midpt = start + 0.5 * (finish - start) m0 = s.query(func.avg(func.abs(StatisticJournalInteger.time - midpt))).join(StatisticName). \ filter(StatisticName.name == HEART_RATE, StatisticName.owner == self.owner_in, StatisticJournalInteger.time < finish, StatisticJournalInteger.time >= start, StatisticJournalInteger.value > 0).scalar() log.debug('M0: %s' % m0) if m0 and abs(m0 - QUARTER_DAY) < 0.25 * QUARTER_DAY: # not evenly sampled all_hr = sorted([row[0] for row in s.query(StatisticJournalInteger.value).join(StatisticName). \ filter(StatisticName.name == HEART_RATE, StatisticName.owner == self.owner_in, StatisticJournalInteger.time < finish, StatisticJournalInteger.time >= start, StatisticJournalInteger.value > 0).all()]) n = len(all_hr) if n > 10: rest_heart_rate = [(name, all_hr[n // pc]) for name, pc in REST_HR_PERCENTILES.items()] if not rest_heart_rate: log.info(f'Insufficient coverage for {REST_HR} for {start} - {finish}') daily_steps = s.query(func.sum(StatisticJournalInteger.value)).join(StatisticName). \ filter(StatisticName.name == STEPS, StatisticName.owner == self.owner_in, StatisticJournalInteger.time < finish, StatisticJournalInteger.time >= start).scalar() input_source_ids = [row[0] for row in s.query(MonitorJournal.id). filter(MonitorJournal.start < finish, MonitorJournal.finish >= start).all()] return input_source_ids, (rest_heart_rate, daily_steps)
def getRate(querydate): if type(querydate) is not datetime: querydate = parser.parse(querydate) querydate = int(querydate.strftime("%s")) print(querydate) closest_price = db.session \ .query(models.Rates) \ .order_by(func.abs( querydate - models.Rates.date)) \ .first() return closest_price.rate
def main(): session = users.connect_db() user_id = request_data() user = session.query(users.User).filter(users.User.id == user_id).first() if user: nearest_birth_athelete = session.query(Athelete)\ .filter(Athelete.birthdate.isnot(None))\ .order_by(func.abs(func.julianday(Athelete.birthdate) - func.julianday(user.birthdate))).first() nearest_height_athelete = session.query(Athelete)\ .filter(Athelete.height.isnot(None))\ .order_by(func.abs(Athelete.height - user.height)).first() print( f'Ближайший по дате рождения атлет: id: {nearest_birth_athelete.id}, имя: {nearest_birth_athelete.name}, дата рождения: {nearest_birth_athelete.birthdate}' ) print( f'Ближайший по росту атлет: id: {nearest_height_athelete.id}, имя: {nearest_height_athelete.name}, рост: {nearest_height_athelete.height}' ) else: print('Пользовалель с таким id не найден')
def nearest_by_height_athelete(user_height, session): """ Нахождение ближайшего атлета осуществляется средствами СУБД выбирается запись с минимальной по модулю разницей между днем рождения атлета и днем рождения пользователя """ query = session.query(Athelete.name, Athelete.height, func.min(func.abs(Athelete.height - user_height))).first() return query
def match(self, session=None): """Match to a pair sequence""" cls = self.__class__ session = session or object_session(self) match_query = session.query(cls).filter_by(**self.sequence_attributes()) match_query = match_query.filter(cls.id != self.id, cls.loop != self.loop) matches = match_query.join(Dataset).order_by(func.abs(Dataset.sequence - self.dataset.sequence)).all() if not len(matches): return None closest = min(abs(s.dataset.sequence - self.dataset.sequence) for s in matches) matches = filter(lambda s : abs(s.dataset.sequence - self.dataset.sequence) <= closest, matches) return matches[0].dataset
def get_controversial_stories( timediff=timedelta(hours=48), contro_threshold=5, contro_min=10): """ Get a story list that is suitable for display as "controversial". @param timediff: timedelta representing controversial eligibility age @param contro_threshold: the maximum difference between up and down votes to be considered "controversial" @param contro_min: the minimum number of points required for something to appear as controversial @return: SA query ready to get list of stuff """ stories = dbsession.query(Submission).options( joinedload('submitter')).filter(Submission.deleted == False).filter( Submission.added_on > (general.now_in_utc() - timediff)).filter( func.abs(Submission.upvote_tally - Submission.downvote_tally) <= contro_threshold).filter( Submission.total_vote_tally > contro_min).order_by( Submission.added_on.desc()) return stories
def athlets_by_height(sess, height): """ Return all athlets as Athlet class close by 'heigth' """ athlete_height = \ sess.query(Athlete.height)\ .filter(Athlete.height.isnot(None)).order_by( func.abs( Athlete.height - height ) )\ .limit(1).first() if athlete_height: return sess.query(Athlete)\ .filter( Athlete.height.in_(athlete_height) )\ .all() else: return None
def matchCalcMoney(self, u, dealerid): udata = self.session.query(UserData).filter( UserData.userid == u.userid).first() if udata is None: self.errorReturn(GLBConfig.API_ERROR, '目标金额未设置.') self.matchresult(u, udata, SysUtil.getYesterday()) self.matchresult(u, udata, SysUtil.getToday()) ct = self.session.query(func.count('*')).\ filter(MatchData.userid == u.userid).\ filter(MatchData.date == SysUtil.getToday()).\ filter(func.abs(MatchData.ResultMoney) < 0.001).scalar() if datetime.datetime.now().time() < datetime.time(19, 0, 0): if ct != 0: self.errorReturn(GLBConfig.API_ERROR, '有比赛结果未出.') self.getMatch(u, udata, dealerid)
def athlets_by_birthdate(sess, birthdate): """ Return all athlets as Athlet class close by 'birthdate' """ athlet_birthdate = \ sess.query(Athlete.birthdate)\ .order_by( func.abs( func.strftime('%s', Athlete.birthdate) - func.strftime('%s', birthdate) ) )\ .limit(1).first() if athlet_birthdate: return sess.query(Athlete)\ .filter( Athlete.birthdate.in_(athlet_birthdate) )\ .all() else: return None
def nearest_height(self): if self.height is not None: return self.height other = aliased(HeightWeight) time_delta = func.abs(func.timediff(other.time, self.time)) min_time_delta_query = object_session(self).query( func.min(time_delta).label('min_time_delta')).filter( other.height != None, other.person_id == self.person_id) min_time_delta = min_time_delta_query.one().min_time_delta nearest_height_query = object_session(self).query( func.avg(other.height).label('height'), ).filter( other.height != None, time_delta == min_time_delta, other.person_id == self.person_id) return nearest_height_query.one().height
def matchCalcMoney(self, u, udata, req_para): self.matchresult(u, udata, SysUtil.getLast2()) self.matchresult(u, udata, SysUtil.getYesterday()) self.matchresult(u, udata, SysUtil.getToday()) ct = self.session.query(func.count('*')).\ filter(MatchData.userid == u.userid).\ filter(MatchData.date == SysUtil.getToday()).\ filter(func.abs(MatchData.ResultMoney) < 0.001).scalar() if datetime.datetime.now().time() < datetime.time(19, 0, 0): if ct != 0: self.errorReturn(GLBConfig.API_ERROR, '有比赛结果未出.') self.session.commit() self.session.query(MatchData).\ filter(MatchData.userid == u.userid).\ filter(MatchData.date == SysUtil.getTomorrow()).delete() self.session.flush() if u.accounttype == GLBConfig.ATYPE_GROUP: self.getMatchGroup(u, udata) elif u.accounttype == GLBConfig.ATYPE_PERSION: if udata.mode == GLBConfig.MODE_A: if 'dealerid' in req_para.keys(): self.getMatchModeA(u, udata, req_para['dealerid']) if 'matchA' in req_para.keys(): self.getMatchModeASelfChoice(u, udata, req_para['matchA'], req_para['AResult'], req_para['matchB'], req_para['BResult']) if udata.mode == GLBConfig.MODE_B: self.getMatchModeB(u, udata) if udata.mode == GLBConfig.MODE_C: self.getMatchModeC(u, udata, req_para['matchid']) if udata.mode == GLBConfig.MODE_D: self.getMatchModeD(u, udata, req_para['matchid'])
def updateAllocation(self, settings, question_cap=DEFAULT_QUESTION_CAP): # Get all existing allocations from the DB and their questions allocsByType = dict() hist_sel = float(settings.get('hist_sel', '0')) if hist_sel > 0.001: allocsByType['historical'] = [] # Only get half the question cap if there's not much chance of the questions being used if hist_sel < 0.5 and 'question_cap_historical' not in settings: settings['question_cap_historical'] = int( settings.get('question_cap', DEFAULT_QUESTION_CAP)) / 2 if hist_sel < 0.999: # NB: Need to add rows for each distinct question type, otherwise won't try and assign them allocsByType['regular'] = [] allocsByType['template'] = [] # Fetch all existing allocations, divide by allocType for (dbAlloc, dbQn) in (Session.query(db.Allocation, db.Question).join( db.Question ).filter(db.Allocation.studentId == self.student.studentId).filter( db.Allocation.active == True).filter( db.Allocation.lectureId == self.dbLec.lectureId).order_by( db.Allocation.allocationId)): if not (dbQn.active) or (dbAlloc.allocationTime < dbQn.lastUpdate): # Question has been removed or is stale dbAlloc.active = False else: # Still around, so save it if (dbAlloc.allocType or dbQn.defAllocType) in allocsByType: # NB: If hist_sel has changed, we might not want some types any more allocsByType[dbAlloc.allocType or dbQn.defAllocType].append( dict(alloc=dbAlloc, question=dbQn)) # Each question type should have at most question_cap questions for (allocType, allocs) in allocsByType.items(): questionCap = int( settings.get( 'question_cap_' + allocType, settings.get('question_cap', DEFAULT_QUESTION_CAP))) # If there's too many allocs, throw some away for i in sorted(random.sample(xrange(len(allocs)), max(len(allocs) - questionCap, 0)), reverse=True): allocs[i]['alloc'].active = False del allocs[i] # If there's questions to spare, and requested to do so, throw away oldest questions if len(allocs) == questionCap and self.reAllocQuestions: for i in reversed( xrange( int(questionCap * float( settings.get('allocation_realloc_perc', 20)) / 100))): allocs[i]['alloc'].active = False del allocs[i] # Assign required questions randomly if len(allocs) < questionCap: query = Session.query(db.Question).filter_by( qnType='tw_questiontemplate' if allocType == 'template' else 'tw_latexquestion').filter_by(active=True) if allocType == 'historical': # Get questions from lectures "before" the current one targetQuestions = (Session.query( db.LectureQuestion.questionId).join(db.Lecture).filter( db.Lecture.plonePath.startswith( re.sub(r'/[^/]+/?$', '/', self.dbLec.plonePath))).filter( db.Lecture.plonePath < self.dbLec.plonePath).subquery()) query = query.filter( db.Question.questionId.in_(targetQuestions)) else: # Git questions from current lecture query = query.filter( db.Question.lectures.contains(self.dbLec)) # Filter out anything already allocated allocIds = [a['alloc'].questionId for a in allocs] if len(allocIds) > 0: query = query.filter(~db.Question.questionId.in_(allocIds)) # Give a target difficulty if self.targetDifficulty is not None: query = query.order_by( func.abs( round(self.targetDifficulty * 50) - func.round((50.0 * db.Question.timesCorrect) / db.Question.timesAnswered))) for dbQn in query.order_by(func.random()).limit( max(questionCap - len(allocs), 0)): dbAlloc = db.Allocation( studentId=self.student.studentId, questionId=dbQn.questionId, lectureId=self.dbLec.lectureId, allocationTime=datetime.datetime.utcnow(), allocType='historical' if allocType == 'historical' else None, ) Session.add(dbAlloc) allocs.append(dict(alloc=dbAlloc, question=dbQn, new=True)) Session.flush() for allocType, allocs in allocsByType.items(): for a in allocs: yield ( self._questionUrl(a['alloc'].publicId), allocType, a['question'], )
def update_device_stats_jumps(session, date, logger=None): """Update device stats jumps.""" if logger is None: logger = app.logger (start, end) = date_to_timestamps(date) # speed limits in m/s (values above indicates a unplausible position / jump) max_horizontal_speed = 1000 max_vertical_speed = 100 max_jumps = 10 # threshold for an 'ambiguous' device # find consecutive positions for a device sq = (session.query( AircraftBeacon.device_id, AircraftBeacon.timestamp, func.lead(AircraftBeacon.timestamp).over( partition_by=AircraftBeacon.device_id, order_by=AircraftBeacon.timestamp).label("timestamp_next"), AircraftBeacon.location_wkt, func.lead(AircraftBeacon.location_wkt).over( partition_by=AircraftBeacon.device_id, order_by=AircraftBeacon.timestamp).label("location_next"), AircraftBeacon.altitude, func.lead(AircraftBeacon.altitude).over( partition_by=AircraftBeacon.device_id, order_by=AircraftBeacon.timestamp).label("altitude_next"), ).filter( and_(between(AircraftBeacon.timestamp, start, end), AircraftBeacon.error_count == 0)).subquery()) # calc vertial and horizontal speed between points sq2 = (session.query( sq.c.device_id, (func.st_distancesphere(sq.c.location_next, sq.c.location) / (func.extract("epoch", sq.c.timestamp_next) - func.extract("epoch", sq.c.timestamp))).label("horizontal_speed"), ((sq.c.altitude_next - sq.c.altitude) / (func.extract("epoch", sq.c.timestamp_next) - func.extract("epoch", sq.c.timestamp))).label("vertical_speed"), ).filter( and_(sq.c.timestamp != null(), sq.c.timestamp_next != null(), sq.c.timestamp < sq.c.timestamp_next)).subquery()) # ... and find and count 'jumps' sq3 = (session.query( sq2.c.device_id, func.sum( case([(or_( func.abs(sq2.c.horizontal_speed) > max_horizontal_speed, func.abs(sq2.c.vertical_speed) > max_vertical_speed), 1)], else_=0)).label("jumps")).group_by( sq2.c.device_id).subquery()) upd = update(DeviceStats).where( and_(DeviceStats.date == date, DeviceStats.device_id == sq3.c.device_id)).values({ "ambiguous": sq3.c.jumps > max_jumps, "jumps": sq3.c.jumps }) result = session.execute(upd) update_counter = result.rowcount session.commit() logger.warn("Updated {} DeviceStats jumps".format(update_counter)) return "DeviceStats jumps for {}: {} updated".format(date, update_counter)
def updateAllocation(self, settings, question_cap=DEFAULT_QUESTION_CAP): # Get all existing allocations from the DB and their questions allocsByType = dict() hist_sel = float(settings.get('hist_sel', '0')) if hist_sel > 0.001: allocsByType['historical'] = [] # Only get half the question cap if there's not much chance of the questions being used if hist_sel < 0.5 and 'question_cap_historical' not in settings: settings['question_cap_historical'] = int(settings.get('question_cap', DEFAULT_QUESTION_CAP)) / 2 if hist_sel < 0.999: # NB: Need to add rows for each distinct question type, otherwise won't try and assign them allocsByType['regular'] = [] allocsByType['template'] = [] # Fetch all existing allocations, divide by allocType for (dbAlloc, dbQn) in (Session.query(db.Allocation, db.Question) .join(db.Question) .filter(db.Allocation.studentId == self.student.studentId) .filter(db.Allocation.active == True) .filter(db.Allocation.lectureId == self.dbLec.lectureId)): if not(dbQn.active) or (dbAlloc.allocationTime < dbQn.lastUpdate): # Question has been removed or is stale dbAlloc.active = False else: # Still around, so save it if (dbAlloc.allocType or dbQn.defAllocType) in allocsByType: # NB: If hist_sel has changed, we might not want some types any more allocsByType[dbAlloc.allocType or dbQn.defAllocType].append(dict(alloc=dbAlloc, question=dbQn)) # Each question type should have at most question_cap questions for (allocType, allocs) in allocsByType.items(): questionCap = int(settings.get('question_cap_' + allocType, settings.get('question_cap', DEFAULT_QUESTION_CAP))) # If there's too many allocs, throw some away for i in sorted(random.sample(xrange(len(allocs)), max(len(allocs) - questionCap, 0)), reverse=True): allocs[i]['alloc'].active = False del allocs[i] # If there's questions to spare, and requested to do so, reallocate questions if len(allocs) == questionCap and self.reAllocQuestions: if self.targetDifficulty is None: raise ValueError("Must have a target difficulty to know what to remove") # Make ranking how likely questions are, based on targetDifficulty suitability = [] for a in allocs: if a['question'].timesAnswered == 0: # New questions should be added regardless suitability.append(1) else: suitability.append(1 - abs(self.targetDifficulty - float(a['question'].timesCorrect) / a['question'].timesAnswered)) ranking = sorted(range(len(allocs)), key=lambda k: suitability[k]) # Remove the least likely tenth for i in sorted(ranking[0:len(allocs) / 10 + 1], reverse=True): allocs[i]['alloc'].active = False del allocs[i] # Assign required questions randomly if len(allocs) < questionCap: query = Session.query(db.Question).filter_by(qnType='tw_questiontemplate' if allocType == 'template' else 'tw_latexquestion').filter_by(active=True) if allocType == 'historical': # Get questions from lectures "before" the current one targetQuestions = (Session.query(db.LectureQuestion.questionId) .join(db.Lecture) .filter(db.Lecture.plonePath.startswith(re.sub(r'/[^/]+/?$', '/', self.dbLec.plonePath))) .filter(db.Lecture.plonePath < self.dbLec.plonePath) .subquery()) query = query.filter(db.Question.questionId.in_(targetQuestions)) else: # Git questions from current lecture query = query.filter(db.Question.lectures.contains(self.dbLec)) # Filter out anything already allocated allocIds = [a['alloc'].questionId for a in allocs] if len(allocIds) > 0: query = query.filter(~db.Question.questionId.in_(allocIds)) # Give a target difficulty if self.targetDifficulty is not None: query = query.order_by(func.abs(round(self.targetDifficulty * 50) - func.round((50.0 * db.Question.timesCorrect) / db.Question.timesAnswered))) for dbQn in query.order_by(func.random()).limit(max(questionCap - len(allocs), 0)): dbAlloc = db.Allocation( studentId=self.student.studentId, questionId=dbQn.questionId, lectureId=self.dbLec.lectureId, allocationTime=datetime.datetime.utcnow(), allocType='historical' if allocType == 'historical' else None, ) Session.add(dbAlloc) allocs.append(dict(alloc=dbAlloc, question=dbQn, new=True)) Session.flush() for allocType, allocs in allocsByType.items(): for a in allocs: yield ( self._questionUrl(a['alloc'].publicId), allocType, a['question'], )
def lat_dist(cls, lat, n): return (func.abs(cls.lat - lat) < n)
def cluster_units(session, clusters, max_iterations=10000): """Cluster all units. Returns the number of iterations Kwargs: session: Sqlalchemy database session clusters (int): Number of clusters """ random_pks = random.sample( xrange(session.query(Unit).count() - 2), clusters) random_features = [session.query(Unit).get(pk + 1).features for pk in random_pks] # initialize centers session.query(Cluster).delete() clusters = [] for features in random_features: cluster = Cluster() for index, _, value in features: setattr(cluster, "feat_{}".format(index), value) session.add(cluster) clusters.append(cluster) session.commit() iterations = 0 feature_labels = ["feat_{}".format(slot) for slot in xrange(FEATURE_SLOTS)] while True: # Assign each unit to nearest cluster for features in session.query(Features).all(): dist_func = func.abs(Cluster.feat_0 - features.feat_0) # TODO: Use euclidean distance? for slot in xrange(FEATURE_SLOTS - 1): col_name = feature_labels[slot + 1] dist_func += func.abs(getattr(Cluster, col_name) - getattr(features, col_name)) cluster = session.query(Cluster.id) \ .order_by(dist_func).limit(1).one() features.cluster = cluster[0] session.flush() # Calculate new centroids for each cluster for cluster in clusters: query = [] for slot in xrange(FEATURE_SLOTS): col_name = feature_labels[slot] query.append(func.avg(getattr(Features, col_name))) averages = session.query(*query).filter( Features.cluster == cluster.id).one() for index, value in enumerate(averages): col_name = feature_labels[index] setattr(cluster, col_name, value) session.flush() iterations += 1 # Check if centroids have converged equal = True for cluster in clusters: for slot in xrange(FEATURE_SLOTS): col_name = feature_labels[slot] prv_name = "previous_feat_{}".format(slot) current = getattr(cluster, col_name) if current != getattr(cluster, prv_name): equal = False break if equal is False: break # Update previous values for cluster in clusters: for slot in xrange(FEATURE_SLOTS): col_name = feature_labels[slot] prv_name = "previous_feat_{}".format(slot) setattr(cluster, prv_name, getattr(cluster, col_name)) session.flush() if iterations == max_iterations or equal is True: break return iterations
def lng_dist(cls, lng, n): return func.abs(cls.lng - lng) < n
# CUSTOM SELECT FOR ACTIVITY CLIFFS THAT WILL BE MAPPED AGAINST A CLASS A1, A2 = metadata.tables['chembl.activities'].alias( ), metadata.tables['chembl.activities'].alias() FP1, FP2 = metadata.tables['chembl.compound_rdfps'].alias( ), metadata.tables['chembl.compound_rdfps'].alias() join = A1.join(A2, A2.c.assay_id == A1.c.assay_id).join( FP1, FP1.c.molregno == A1.c.molregno).join(FP2, FP2.c.molregno == A2.c.molregno) delta_tanimoto = 1 - func.rdkit.tanimoto_sml(FP1.c.circular_fp, FP2.c.circular_fp) delta_activity = func.abs( func.log(A1.c.standard_value) - func.log(A2.c.standard_value)).label('delta_activity') sali = (delta_activity / delta_tanimoto).label('sali') whereclause = and_(A1.c.activity_id != A2.c.activity_id, A1.c.molregno < A2.c.molregno, A1.c.standard_type == A2.c.standard_type, A1.c.standard_value > 0, A2.c.standard_value > 0, A1.c.standard_flag > 0, A2.c.standard_flag > 0, A1.c.standard_units == 'nM', A2.c.standard_units == 'nM', A1.c.relation == '=', A1.c.relation == '=', sali >= 1.5) activity_cliffs = select([ A1.c.assay_id, A1.c.activity_id.label('activity_bgn_id'), A2.c.activity_id.label('activity_end_id'),
def get_controversial_stories(timediff = timedelta(hours = 48), contro_threshold = 5, contro_min = 10): """ Get a story list that is suitable for display as "controversial". @param timediff: timedelta representing controversial eligibility age @param contro_threshold: the maximum difference between up and down votes to be considered "controversial" @param contro_min: the minimum number of points required for something to appear as controversial @return: SA query ready to get list of stuff """ stories = dbsession.query(Submission).options(joinedload('submitter')).filter(Submission.deleted == False).filter(Submission.added_on > (now_in_utc() - timediff)).filter(func.abs(Submission.upvote_tally - Submission.downvote_tally) <= contro_threshold).filter(Submission.total_vote_tally > contro_min).order_by(Submission.added_on.desc()) return stories