def get_mis_subastas_disponibles(self, idUsuario): misOfertasMin = db.session.query(func.min(Pujas.precioPuja).label("miMinOferta"), Subastas.idSubasta).\ join(Subastas, Subastas.idSubasta == Pujas.idSubasta).\ join(Estado, Estado.idEstado == Subastas.idEstado).\ join(Usuarios, Usuarios.idUsuario == Pujas.idUsuario).\ filter(Pujas.idUsuario == idUsuario).\ filter(Estado.codEstado == AdditionalConfig.ESTADO2).\ group_by(Pujas.idSubasta, Subastas.idSubasta).all() ofertaMin = db.session.query(func.min(Pujas.precioPuja).label("ofertaMin"), Subastas.idSubasta ). \ join(Subastas, Subastas.idSubasta == Pujas.idSubasta). \ join(Estado, Estado.idEstado == Subastas.idEstado).\ filter(Estado.codEstado == AdditionalConfig.ESTADO2).\ group_by(Pujas.idSubasta, Subastas.idSubasta).all() misSubastasP = db.session.query(Subastas.idSubasta, Usuarios.nombreUsuario, Usuarios.apellidoPatUsuario, Subastas.fechaSubasta, Estado.nombreEstado). \ join(Usuarios, Usuarios.idUsuario == Subastas.idUsuario). \ join(Estado, Estado.idEstado == Subastas.idEstado).\ filter(Estado.codEstado == AdditionalConfig.ESTADO2).all() arr = [] for data in misOfertasMin: dicc = {} miOfertaMinima = data[0] idSubasta = data[1] for ofertaMinima in ofertaMin: if ofertaMinima[1] == idSubasta: ofertaMinimaSubasta = ofertaMinima[0] for subasta in misSubastasP: if subasta[0] == idSubasta: nombreUsuario = subasta[1] apellidoPatUsuario = subasta[2] fechaSubasta = subasta[3].strftime("%Y-%m-%d %H:%M:%S") estadoSubasta = subasta[4] dicc = {"Subastas.idSubasta":idSubasta, "Usuarios.nombreUsuario":nombreUsuario, "Usuarios.apellidoPatUsuario":apellidoPatUsuario, "Subastas.fechaSubasta":fechaSubasta, "Pujas.miOferta":miOfertaMinima, "Pujas.ofertaMinimaSubasta":ofertaMinimaSubasta, "Estado.nombreEstado":estadoSubasta} arr.append(dicc) return arr
def post_process(cls, db, **kwargs): ''' delete all 'depature_time' values that appear for the last stop time of a given trip (e.g., the trip ends there, so there isn't a further vehicle departure for that stop time / trip pair)... NOTE: we know this breaks the current GTFS spec, which states that departure & arrival times must both exist for every stop time. Sadly, GTFS is wrong... ''' log.debug('{0}.post_process'.format(cls.__name__)) # remove the departure times at the end of a trip log.info("QUERY StopTime") sq = db.session.query( StopTime.trip_id, func.max(StopTime.stop_sequence).label('end_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.end_sequence) for r in q: r.departure_time = None # remove the arrival times at the start of a trip log.info("QUERY StopTime") sq = db.session.query( StopTime.trip_id, func.min(StopTime.stop_sequence).label('start_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.start_sequence) for r in q: r.arrival_time = None db.session.commit()
def set_leg_waypoints(): t = time.time() dd = db.metadata.tables["device_data"] legs = db.metadata.tables["legs"] glue = db.metadata.tables["leg_waypoints"] legpoints = select( [legs.c.id, dd.c.waypoint_id, dd.c.time, dd.c.snapping_time], from_obj=dd.join(legs, and_( dd.c.device_id == legs.c.device_id, dd.c.time.between(legs.c.time_start, legs.c.time_end)))) \ .alias("legpoints") done = select([glue.c.leg], distinct=True) nounsnapped = select([legpoints.c.id], legpoints.c.id.notin_(done), group_by=legpoints.c.id, having=func.bool_and( legpoints.c.snapping_time.isnot(None))) newitems = select( [legpoints.c.id, legpoints.c.waypoint_id, func.min(legpoints.c.time)], legpoints.c.id.in_(nounsnapped), group_by=[legpoints.c.id, legpoints.c.waypoint_id]).alias("newitems") ins = glue.insert().from_select(["leg", "waypoint", "first"], newitems) rowcount = db.engine.execute(ins).rowcount print "set_leg_waypoints on %d rows in %.2g seconds" % (rowcount, time.time() - t)
def get_detalle_subasta1(self, idSubasta): filtro = db.session.query(func.min(Productos_Supermercados.precioOnline).label("precioMin"), Productos.idProducto). \ join(Productos, Productos.idProducto == Productos_Supermercados.idProducto).\ join(Subastas_Productos, Subastas_Productos.idProducto == Productos.idProducto).\ filter(Subastas_Productos.idSubasta == idSubasta).group_by(Productos.idProducto).all() filtro1 = db.session.query(Subastas_Productos, Productos). \ join(Productos, Productos.idProducto == Subastas_Productos.idProducto). \ filter(Subastas_Productos.idSubasta == idSubasta).all() arr = [] for data in filtro: dicc = {} precioMin = data[0] idProducto = data[1] for subasta_productos in filtro1: if idProducto == subasta_productos[0].idProducto: cantidad = subasta_productos[0].Cantidad nombreProducto = subasta_productos[1].nombreProducto dicc = { "Subastas_Productos.Cantidad": cantidad, "Productos_Supermercados.idSupermercado": 3, "Subastas_Productos.idSubasta": idSubasta, "Productos_Supermercados.precioOnline": precioMin, "Productos.nombreProducto": nombreProducto, "Productos.idProducto": idProducto } arr.append(dicc) return arr
def get_earliest_block_in_database(self): """ Obtains the datetime (in UTC) of the earliest block in the database. Returns blocktime. """ results = self.session.query(func.min(Block.timestamp)).all() return results[0][0]
def null_out_last_stop_departures(cls, db): """ delete all 'depature_time' values that appear for the last stop time of a given trip (e.g., the trip ends there, so there isn't a further vehicle departure / customer pickup for that stop time / trip pair)... -- query below shows null'd out stop times select * from ott.stop_times where COALESCE(arrival_time,'')='' or COALESCE(departure_time,'')='' NOTE: we know this breaks the current GTFS spec, which states that departure & arrival times must both exist for every stop time. Sadly, GTFS is kinda wrong... """ # step 1: remove the departure times at the end of a trip log.info("QUERY StopTime for all trip end times") sq = db.session.query(StopTime.trip_id, func.max(StopTime.stop_sequence).label('end_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.end_sequence) for st in q: if st.pickup_type == 1: st.departure_time = None # remove the arrival times at the start of a trip log.info("QUERY StopTime for all trip start times") sq = db.session.query(StopTime.trip_id, func.min(StopTime.stop_sequence).label('start_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.start_sequence) for st in q: if st.drop_off_type == 1: st.arrival_time = None db.session.flush() db.session.commit() db.session.close()
def null_out_last_stop_departures(cls, db): ''' delete all 'depature_time' values that appear for the last stop time of a given trip (e.g., the trip ends there, so there isn't a further vehicle departure / customer pickup for that stop time / trip pair)... -- query below shows null'd out stop times select * from ott.stop_times where COALESCE(arrival_time,'')='' or COALESCE(departure_time,'')='' NOTE: we know this breaks the current GTFS spec, which states that departure & arrival times must both exist for every stop time. Sadly, GTFS is kinda wrong... ''' # step 1: remove the departure times at the end of a trip log.info("QUERY StopTime for all trip end times") sq = db.session.query(StopTime.trip_id, func.max(StopTime.stop_sequence).label('end_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.end_sequence) for st in q: if st.pickup_type == 1: st.departure_time = None # remove the arrival times at the start of a trip log.info("QUERY StopTime for all trip start times") sq = db.session.query(StopTime.trip_id, func.min(StopTime.stop_sequence).label('start_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.start_sequence) for st in q: if st.drop_off_type == 1: st.arrival_time = None db.session.flush() db.session.commit() db.session.close()
def getReplyStats(username, start_time_t): retval = {} retval["min_reply_time_sec"] = session.query( func.min(Tweets.reply_age).label("min")).filter( Tweets.username == username).filter( Tweets.time_t >= start_time_t).filter( Tweets.reply_tweet_id != None).filter( Tweets.reply_age != 0).first().min retval["min_reply_time"] = round(retval["min_reply_time_sec"] / 60, 0) retval["max_reply_time_sec"] = session.query( func.max(Tweets.reply_age).label("max")).filter( Tweets.username == username).filter( Tweets.time_t >= start_time_t).filter( Tweets.reply_tweet_id != None).filter( Tweets.reply_age != 0).first().max retval["max_reply_time"] = round(retval["max_reply_time_sec"] / 60, 0) retval["avg_reply_time_sec"] = session.query( func.avg(Tweets.reply_age).label("avg")).filter( Tweets.username == username).filter( Tweets.time_t >= start_time_t).filter( Tweets.reply_tweet_id != None).filter( Tweets.reply_age != 0).first().avg retval["avg_reply_time_sec"] = round(retval["avg_reply_time_sec"], 2) retval["avg_reply_time"] = round(retval["avg_reply_time_sec"] / 60, 0) median_stats = getReplyStatsMedian(start_time_t, retval) retval = {**retval, **median_stats} return (retval)
def _attributes(request): ''' This service exposes preview values based on a layer Id and an attribute name (mapped in the model) ''' MAX_ATTR_VALUES = 50 attributesValues = [] params = _get_attributes_params(request) models = models_from_bodid(params.layerId) if models is None: raise exc.HTTPBadRequest('No Vector Table was found for %s' % params.layerId) # Check that the attribute provided is found at least in one model modelToQuery = None for model in models: attributes = model().getAttributesKeys() if params.attribute in attributes: modelToQuery = model break if modelToQuery is None: raise exc.HTTPBadRequest('No attribute %s was found for %s' % (params.attribute, params.layerId)) col = modelToQuery.get_column_by_property_name(params.attribute) colType = str(col.type) if colType in ['DATE', 'INTEGER', 'NUMERIC']: query = request.db.query(func.max(col).label('max'), func.min(col).label('min')) res = query.one() return {'values': [res.min, res.max]} else: query = request.db.query(col).distinct().order_by(col) query = query.limit(MAX_ATTR_VALUES) for attr in query: if len(attr): attributesValues.append(attr[0]) return {'values': sorted(attributesValues)}
def get_product_for_catgeory_view(self, category_id_): log.info('\n\n Jestem tutaj, szukam dla Category ID: %r', category_id_) product = db.session.query( TagProduct.tag_product_def_id.label('product_id'), func.string_agg(Tag.value, ' ').label('title')).join( TagProduct, TagProduct.tag_id == Tag.id).group_by( TagProduct.tag_product_def_id).cte('product') max_date = db.session.query( func.max(Ofert.creation_date.cast(Date)).cast(Date)) return db.session.query( product.c.product_id, product.c.title, func.max(Ofert.price).label('max_price'), func.min(Ofert.price).label('min_price'), func.count(Ofert.id).label('count'), func.max(Image.image).label('image'), Brand.name.label('brand')).join( TagOfert, TagOfert.tag_product_def_id == product.c.product_id ).join(Ofert, Ofert.id == TagOfert.ofert_id).join( Image, Image.image == Ofert.image).join( TagProductDef, and_(TagProductDef.id == TagOfert.tag_product_def_id, TagProductDef.category_id == category_id_)).join( Brand, Brand.id == TagProductDef.brand_id).filter( and_( TagOfert.creation_date.cast(Date) == max_date, # func.current_date(), Ofert.creation_date.cast(Date) == max_date # func.current_date() )).group_by(product.c.product_id, product.c.title, Brand.name).order_by( desc(func.count(Ofert.id)))
def upgrade(): # create the users table and the artpieces.user_id column UserModel.__table__.create(op.get_bind()) op.add_column( 'artpieces', sa.Column('user_id', sa.Integer, sa.ForeignKey('users.id'), nullable=True)) # create users from unique email addresses with session_scope() as session: artpieces_with_unique_email = (session.query( ArtpieceModel.email, func.min(ArtpieceModel.submit_date).label('submit_date')).group_by( ArtpieceModel.email)) users = { artpiece.email: UserModel(email=artpiece.email, created_at=artpiece.submit_date, verified=True) for artpiece in artpieces_with_unique_email } session.add_all(users.values()) # set artpiece user(/creator) based on email address for artpiece in session.query(ArtpieceModel): artpiece.user = users[artpiece.email] op.alter_column('artpieces', 'user_id', nullable=False) # don't need email now that user relationship is set up op.drop_column('artpieces', 'email')
def set_leg_waypoints(): t = time.time() dd = db.metadata.tables["device_data"] legs = db.metadata.tables["legs"] glue = db.metadata.tables["leg_waypoints"] legpoints = select( [legs.c.id, dd.c.waypoint_id, dd.c.time, dd.c.snapping_time], from_obj=dd.join(legs, and_( dd.c.device_id == legs.c.device_id, dd.c.time.between(legs.c.time_start, legs.c.time_end)))) \ .alias("legpoints") done = select([glue.c.leg], distinct=True) nounsnapped = select( [legpoints.c.id], legpoints.c.id.notin_(done), group_by=legpoints.c.id, having=func.bool_and(legpoints.c.snapping_time.isnot(None))) newitems = select( [legpoints.c.id, legpoints.c.waypoint_id, func.min(legpoints.c.time)], legpoints.c.id.in_(nounsnapped), group_by=[legpoints.c.id, legpoints.c.waypoint_id]).alias("newitems") ins = glue.insert().from_select(["leg", "waypoint", "first"], newitems) rowcount = db.engine.execute(ins).rowcount print("set_leg_waypoints on %d rows in %.2g seconds" % ( rowcount, time.time() - t))
def post_process(cls, db, **kwargs): ''' delete all 'depature_time' values that appear for the last stop time of a given trip (e.g., the trip ends there, so there isn't a further vehicle departure for that stop time / trip pair)... NOTE: we know this breaks the current GTFS spec, which states that departure & arrival times must both exist for every stop time. Sadly, GTFS is wrong... ''' log.debug('{0}.post_process'.format(cls.__name__)) # remove the departure times at the end of a trip log.info("QUERY StopTime") sq = db.session.query(StopTime.trip_id, func.max(StopTime.stop_sequence).label('end_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.end_sequence) for r in q: r.departure_time = None # remove the arrival times at the start of a trip log.info("QUERY StopTime") sq = db.session.query(StopTime.trip_id, func.min(StopTime.stop_sequence).label('start_sequence')) sq = sq.group_by(StopTime.trip_id).subquery() q = db.session.query(StopTime) q = q.filter_by(trip_id=sq.c.trip_id, stop_sequence=sq.c.start_sequence) for r in q: r.arrival_time = None db.session.commit()
def St_End(start, end): start_end_date = session.query( func.max(Measurement.tobs), func.min(Measurement.tobs), func.avg(Measurement.tobs)).filter(Measurement.date >= start).filter( Measurement.date <= end).all() Start_End = list(np.ravel(start_end_date)) return jsonify(Start_End)
def shape(self): try: if self.data.type.python_type is list: df = self.ctx.read_sql(sa.select([func.min(func.array_length(self.data, 1)).label("min"), func.max(func.array_length(self.data, 1)).label("max")])) return df.values[0] except: return 1
def get_least_loaded(self, sess, servertype): ''' This method finds the least loaded server of the given type :param servertype: the server type for which we want to find the least loaded server. ''' min_loaded_server = None min_load = sess.query(func.min(self.Server.load)).filter(self.Server.server_type == servertype ).one() if(min_load[0] < 100): min_loaded_server = sess.query(self.Server).filter(self.Server.load == min_load[0] ).first() return min_loaded_server
def get_min_tweet_id(session, username): retval = None row = session.query(func.min(Tweets.tweet_id).label("min")).filter( Tweets.username == username).first() if row: retval = row.min return(retval)
def get_fetcher_hash_ranges(db, uuid): """ ordered by start_hash, ascending """ fh0 = aliased(db_models.FetcherHash) fh1 = aliased(db_models.FetcherHash) fh2 = aliased(db_models.FetcherHash) return ( db.query( fh1.fetcher_uuid, fh1.fetcher_hash, func.coalesce(func.min(fh2.fetcher_hash), func.min(fh0.fetcher_hash)), ) .join(fh2, fh2.fetcher_hash > fh1.fetcher_hash, isouter=True) .filter(fh1.fetcher_uuid == uuid) .group_by(fh1.fetcher_uuid, fh1.fetcher_hash) .order_by(fh1.fetcher_hash) ).all()
def check_reserva(session, bk_code: str) -> Reserva: """ Checa se há reserva pelo livro. Se tiver, retorna a reserva, senão, retorna None """ bk = session.query(Livro).filter_by(ID=bk_code).first() if bk is None: raise ValueError("Livro não encontrado") reserva = session.query(Reserva, func.min( Reserva.data)).filter_by(livro_ID=bk_code).first() return reserva[0]
def last_rated(session, limit=10): sq = session.query(Rating.movie_id.label('movie_id'), func.min(Rating.rated).label('min_rated'))\ .group_by(Rating.movie_id).subquery() res = session.query(User, Movie.id, Movie.title, Rating.rating)\ .join((Rating, Rating.user_id==User.id))\ .join((Movie, Movie.id==Rating.movie_id))\ .join((sq, and_(sq.c.movie_id==Movie.id, sq.c.min_rated==Rating.rated)))\ .order_by(sq.c.min_rated.desc()).limit(limit).all() return res
def last_rated(session, limit=10): sq = session.query(Rating.movie_id.label('movie_id'), func.min(Rating.rated).label('min_rated'))\ .group_by(Rating.movie_id).subquery() res = session.query(User, Movie.id, Movie.title, Rating.rating)\ .join((Rating, Rating.user_id==User.id))\ .join((Movie, Movie.id==Rating.movie_id))\ .join((sq, and_(sq.c.movie_id==Movie.id, sq.c.min_rated==Rating.rated)))\ .order_by(sq.c.min_rated.desc()).limit(limit).all() return res
def calc_temps(start_date, end_date): """TMIN, TAVG, and TMAX for a list of dates. Args: start_date (string): A date string in the format %Y-%m-%d end_date (string): A date string in the format %Y-%m-%d Returns: TMIN, TAVE, and TMAX """ return db.session.query(func.min(Hawaii.tobs), func.avg(Hawaii.tobs), func.max(Hawaii.tobs)).\ filter(Hawaii.date >= start_date).filter(Hawaii.date <= end_date).all()
def db_check_for_expiring_submissions(self, session): for tid in self.state.tenant_state: threshold = datetime_now() + timedelta( hours=self.state.tenant_cache[tid].notification. tip_expiration_threshold) for user in session.query(models.User).filter( models.User.role == u'receiver', models.UserTenant.user_id == models.User.id, models.UserTenant.tenant_id == tid): itip_ids = [ id[0] for id in session.query(models.InternalTip.id).filter( models.InternalTip.tid == tid, models.ReceiverTip. internaltip_id == models.InternalTip.id, models.InternalTip.expiration_date < threshold, models.ReceiverTip.receiver_id == user.id) ] if not len(itip_ids): continue earliest_expiration_date = session.query(func.min(models.InternalTip.expiration_date)) \ .filter(models.InternalTip.id.in_(itip_ids)).one()[0] user_desc = user_serialize_user(session, user, user.language) data = { 'type': u'tip_expiration_summary', 'node': db_admin_serialize_node(session, tid, user.language), 'notification': db_get_notification(session, tid, user.language), 'user': user_desc, 'expiring_submission_count': len(itip_ids), 'earliest_expiration_date': datetime_to_ISO8601(earliest_expiration_date) } subject, body = Templating().get_mail_subject_and_body(data) session.add( models.Mail({ 'tid': tid, 'address': user_desc['mail_address'], 'subject': subject, 'body': body }))
def insert_feed(cls, feed, source): link = source['link'] title = source['title'] subtitle = source['subtitle'] new_source = None if Source.query.filter(Source.feed==feed).count()<1: new_source = Source(feed=feed, link=link, title=title, subtitle=subtitle) db.session.add(new_source) db.session.commit() count = db.session.query(func.count(Source.title)).scalar() if count>10: db.session.query(func.min(Source.date_added)).delete() db.session.commit() return new_source
def get_rps_for_test_id(self, test_id): min_time_stamp_1 = select([ func.min(func.date_trunc( 'minute', tests_overall_data.c.timestamp)).label('min_ts') ]).where(tests_overall_data.c.test_id == test_id) mts1 = min_time_stamp_1.alias('mts1') statement = select([ (func.date_trunc('minute', tests_overall_data.c.timestamp) - mts1.c.min_ts).label('timestamp'), (tests_overall_data.c.count / 60).label('rps') ]).where(tests_overall_data.c.test_id == test_id).order_by( asc(tests_overall_data.c.timestamp)) return self.execute_statement(statement, True)
def get_column_min(self, table_name: str, column: str): """ 返回数据库表中某列的最小值 :param table_name: 表名 :param column: 列名 :return: 列的最小值 """ assert table_name.lower() in self.meta.tables.keys(), f'数据库中无名为 {table_name} 的表' table = self.meta.tables[table_name.lower()] if 'DateTime' in table.columns.keys(): session = Session(self.engine) q = session.query(func.min(table.c[column])) ret = q.one()[0] session.close() return ret
def extract_from_db(topic_id=''): '''Вади от днешната база данни последния коментар за дадената новина ''' date = datetime.datetime.now().date() from sqlalchemy import create_engine engine = create_engine('sqlite:///comments.db', echo=False) #базата данни с която ще работим sqlites.Base.metadata.create_all(engine) from sqlalchemy.orm import sessionmaker # създава сесията Session = sessionmaker(bind=engine) #връзва сесията към базата данни session = Session() #result = session.query(sqlites.comment).filter_by(_seconds=u'007').first() result_max = session.query (func.max(sqlites.comment._seconds)).filter(sqlites.comment._url.like('%'+topic_id+'%')) result_min = session.query (func.min(sqlites.comment._seconds)) result_max= session.query(sqlites.comment).filter_by(_seconds=result_max) #result_min= session.query(sqlites.comment).filter_by(_seconds=result_min[0]) session.close() return result_max#,result_min
def timerange(self): """ Get the timerange of the dataset (based on the time attribute). Returns a tuple of (first timestamp, last timestamp) where timestamp is a datetime object """ try: # Get the time column time = self.key('time') # We use SQL's min and max functions to get the timestamps query = db.session.query(func.min(time), func.max(time)) # We just need one result to get min and max time return [datetime.strptime(date, '%Y-%m-%d') if date else None for date in query.one()] except: return (None, None)
def db_check_for_expiring_submissions(self, session, tid): threshold = datetime_now() + timedelta( hours=self.state.tenant_cache[tid].notification. tip_expiration_threshold) result = session.query(models.User, func.count(models.InternalTip.id), func.min(models.InternalTip.expiration_date)) \ .filter(models.InternalTip.tid == tid, models.ReceiverTip.internaltip_id == models.InternalTip.id, models.InternalTip.expiration_date < threshold, models.User.id == models.ReceiverTip.receiver_id) \ .group_by(models.User.id) \ .having(func.count(models.InternalTip.id) > 0) \ .all() for x in result: user = x[0] expiring_submission_count = x[1] earliest_expiration_date = x[2] user_desc = user_serialize_user(session, user, user.language) data = { 'type': u'tip_expiration_summary', 'node': db_admin_serialize_node(session, tid, user.language), 'notification': db_get_notification(session, tid, user.language), 'user': user_desc, 'expiring_submission_count': expiring_submission_count, 'earliest_expiration_date': datetime_to_ISO8601(earliest_expiration_date) } subject, body = Templating().get_mail_subject_and_body(data) session.add( models.Mail({ 'tid': tid, 'address': user_desc['mail_address'], 'subject': subject, 'body': body }))
def get_rtot(self, test_id): '''Return response times over time for test_id''' min_time_stamp_2 = select([ func.min(func.date_trunc( 'minute', tests_overall_data.c.timestamp)).label('min_ts') ]).where(tests_overall_data.c.test_id == test_id) mts2 = min_time_stamp_2.alias('mts2') stmt2 = select([ (func.date_trunc('minute', tests_overall_data.c.timestamp) - mts2.c.min_ts).label('timestamp'), tests_overall_data.c.avg.label('average'), tests_overall_data.c.median.label('median'), (tests_overall_data.c.count / 60).label('rps') ]).where(tests_overall_data.c.test_id == test_id).order_by( asc(tests_overall_data.c.timestamp)) #.where(tests_data.c.test_id == test_id) # where(tests_data.c.metric == metric) return self.execute_statement(stmt2, True)
def find_activity( self, employee_id: int, start_date: date, end_date: date ) -> KeyedTuplesSequence([date, float], ['day', 'duration']): """ Actiivity (work done, not presence) between two dates, as a list of tuples. Each tuple represents a day. * tuple.day = the day * tuple.first_time : the time of the first time record * tuple.duration : the duration of the activity """ mainlog.debug("find_activity from {} to {}".format( start_date, end_date)) begin = datetime(start_date.year, start_date.month, start_date.day, 0, 0, 0) end = datetime(end_date.year, end_date.month, end_date.day, 23, 59, 59, 999999) presence_task = dao.task_action_report_dao.presence_task() t = session().query(func.DATE(TimeTrack.start_time).label('day'), func.min(TimeTrack.start_time).label('first_time'), func.sum(TimeTrack.duration).label('duration')).\ filter( and_ ( TimeTrack.task_id != presence_task.task_id, TimeTrack.employee_id == employee_id, TimeTrack.start_time.between(begin,end))).\ group_by('day').order_by("day").subquery() vdays = session().query(( date(begin.year, begin.month, begin.day) + func.generate_series(0, (end - begin).days)).label("day")).subquery() t = session().query( vdays.c.day, t.c.duration).select_from(vdays).outerjoin( t, vdays.c.day == t.c.day).order_by(vdays.c.day).all() session().commit() return t
def insert_feed(cls, source_id, feed_articles): insert = Article.__table__.insert().prefix_with('IGNORE') article_list = [] for position in feed_articles: distress = filter_dir.binary_filter.classify_bert([position['title']]) article_list.append({ 'title': position['title'], 'body': position['summary'], 'link': position['link'], 'guid': position['id'], 'distress': int(distress), 'source_id': source_id, 'date_published': position['published'], 'img_link': position['img_link'], 'img_credit': position['img_credit'], 'tags': position['tags'] }) db.engine.execute(insert, article_list) count = db.session.query(func.count(Article.title)).scalar() if count>100: db.session.query(func.min(Article.date_added)).one().delete() db.session.commit()
def _attributes(request): ''' This service exposes preview values based on a layer Id and an attribute name (mapped in the model) ''' MAX_ATTR_VALUES = 50 attributesValues = [] params = _get_attributes_params(request) models = models_from_bodid(params.layerId) if models is None: raise exc.HTTPBadRequest('No Vector Table was found for %s' % params.layerId) # Check that the attribute provided is found at least in one model modelToQuery = None for model in models: attributes = model().getAttributesKeys() if params.attribute in attributes: modelToQuery = model break if modelToQuery is None: raise exc.HTTPBadRequest('No attribute %s was found for %s' % (params.attribute, params.layerId)) col = modelToQuery.get_column_by_property_name(params.attribute) colType = str(col.type) if colType in ['DATE', 'INTEGER', 'NUMERIC']: query = request.db.query( func.max(col).label('max'), func.min(col).label('min')) res = query.one() return {'values': [res.min, res.max]} else: query = request.db.query(col).distinct().order_by(col) query = query.limit(MAX_ATTR_VALUES) for attr in query: if len(attr): attributesValues.append(attr[0]) return {'values': sorted(attributesValues)}
def db_check_for_expiring_submissions(self, session): for tid in self.state.tenant_state: threshold = datetime_now() + timedelta(hours=self.state.tenant_cache[tid].notification.tip_expiration_threshold) for user in session.query(models.User).filter(models.User.role == u'receiver', models.UserTenant.user_id == models.User.id, models.UserTenant.tenant_id == tid): itip_ids = [id[0] for id in session.query(models.InternalTip.id) .filter(models.InternalTip.tid == tid, models.ReceiverTip.internaltip_id == models.InternalTip.id, models.InternalTip.expiration_date < threshold, models.ReceiverTip.receiver_id == user.id)] if not len(itip_ids): continue earliest_expiration_date = session.query(func.min(models.InternalTip.expiration_date)) \ .filter(models.InternalTip.id.in_(itip_ids)).one()[0] user_desc = user_serialize_user(session, user, user.language) data = { 'type': u'tip_expiration_summary', 'node': db_admin_serialize_node(session, tid, user.language), 'notification': db_get_notification(session, tid, user.language), 'user': user_desc, 'expiring_submission_count': len(itip_ids), 'earliest_expiration_date': datetime_to_ISO8601(earliest_expiration_date) } subject, body = Templating().get_mail_subject_and_body(data) session.add(models.Mail({ 'tid': tid, 'address': user_desc['mail_address'], 'subject': subject, 'body': body }))
def convert_evidence(old_session_maker, new_session_maker, chunk_size): from model_new_schema.phenotype import Phenotypeevidence as NewPhenotypeevidence from model_new_schema.reference import Reference as NewReference from model_new_schema.evelement import Experiment as NewExperiment, Strain as NewStrain from model_new_schema.bioentity import Bioentity as NewBioentity from model_new_schema.misc import Allele as NewAllele from model_new_schema.phenotype import Phenotype as NewPhenotype from model_old_schema.reference import Reflink as OldReflink from model_old_schema.phenotype import PhenotypeFeature as OldPhenotypeFeature log = logging.getLogger('convert.phenotype.evidence') log.info('begin') output_creator = OutputCreator(log) try: new_session = new_session_maker() old_session = old_session_maker() #Values to check values_to_check = ['experiment_id', 'reference_id', 'strain_id', 'source', 'bioentity_id', 'bioconcept_id', 'date_created', 'created_by', 'reporter', 'reporter_desc', 'strain_details', 'conditions', 'details', 'experiment_details', 'allele_info', 'allele_id'] #Grab cached dictionaries key_to_experiment = dict([(x.unique_key(), x) for x in new_session.query(NewExperiment).all()]) key_to_phenotype = dict([(x.unique_key(), x) for x in new_session.query(NewPhenotype).all()]) key_to_strain = dict([(x.unique_key(), x) for x in new_session.query(NewStrain).all()]) key_to_allele = dict([(x.unique_key(), x) for x in new_session.query(NewAllele).all()]) bioent_ids = set([x.id for x in new_session.query(NewBioentity).all()]) reference_ids = set([x.id for x in new_session.query(NewReference).all()]) old_reflinks = old_session.query(OldReflink).all() key_to_reflink = dict([((x.col_name, x.primary_key), x) for x in old_reflinks]) min_id = old_session.query(func.min(OldPhenotypeFeature.id)).first()[0] count = old_session.query(func.max(OldPhenotypeFeature.id)).first()[0] - min_id num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): #Grab all current objects current_objs = new_session.query(NewPhenotypeevidence).filter(NewPhenotypeevidence.id >= create_evidence_id(min_id)).filter(NewPhenotypeevidence.id < create_evidence_id(min_id+chunk_size)).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) #Grab old objects old_objs = old_session.query(OldPhenotypeFeature).filter( OldPhenotypeFeature.id >= min_id).filter( OldPhenotypeFeature.id < min_id+chunk_size).options( joinedload('experiment')).all() for old_obj in old_objs: #Convert old objects into new ones newly_created_objs = create_evidence(old_obj, key_to_reflink, key_to_phenotype, reference_ids, bioent_ids, key_to_strain, key_to_experiment, key_to_allele) if newly_created_objs is not None: #Edit or add new objects for newly_created_obj in newly_created_objs: current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) #Delete untouched objs for untouched_obj_id in untouched_obj_ids: new_session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) new_session.commit() min_id = min_id+chunk_size except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: new_session.close() old_session.close() log.info('complete')
def convert_bioentity_reference(new_session_maker, evidence_class, class_type, label, chunk_size, get_bioent_ids_f, filter_f=None): from model_new_schema.auxiliary import BioentityReference from model_new_schema.bioentity import Paragraph log = logging.getLogger(label) log.info('begin') output_creator = OutputCreator(log) try: new_session = new_session_maker() #Values to check values_to_check = [] #Grab all current objects current_objs = new_session.query(BioentityReference).filter(BioentityReference.class_type == class_type).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) used_unique_keys = set() min_id = new_session.query(func.min(evidence_class.id)).first()[0] count = new_session.query(func.max(evidence_class.id)).first()[0] - min_id num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): old_objs = new_session.query(evidence_class).filter(evidence_class.id >= min_id, evidence_class.id <= min_id+chunk_size).all() for old_obj in old_objs: if filter_f is None or filter_f(old_obj): #Convert old objects into new ones newly_created_objs = create_bioentity_reference(old_obj, get_bioent_ids_f, class_type) if newly_created_objs is not None: #Edit or add new objects for newly_created_obj in newly_created_objs: unique_key = newly_created_obj.unique_key() if unique_key not in used_unique_keys: current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator) used_unique_keys.add(unique_key) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) new_session.commit() min_id = min_id+chunk_size #Add paragraph-related bioent_references. old_objs = new_session.query(Paragraph).filter(Paragraph.class_type == class_type).options(joinedload('paragraph_references')).all() for old_obj in old_objs: if filter_f is None or filter_f(old_obj): #Convert old objects into new ones newly_created_objs = create_bioentity_reference_from_paragraph(old_obj, class_type) if newly_created_objs is not None: #Edit or add new objects for newly_created_obj in newly_created_objs: unique_key = newly_created_obj.unique_key() if unique_key not in used_unique_keys: current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator) used_unique_keys.add(unique_key) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) #Delete untouched objs for untouched_obj_id in untouched_obj_ids: new_session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished() new_session.commit() except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: new_session.close() log.info('complete')
def generate_legs(keepto=None, maxtime=None, repair=False): """Record legs from stops and mobile activity found in device telemetry. keepto -- keep legs before this time, except last two or so for restart maxtime -- process device data up to this time repair -- re-evaluate and replace all changed legs""" now = datetime.datetime.now() if not keepto: keepto = now if not maxtime: maxtime = now print "generate_legs up to", maxtime dd = db.metadata.tables["device_data"] legs = db.metadata.tables["legs"] # Find first and last point sent from each device. devmax = select([ dd.c.device_id, func.min(dd.c.time).label("firstpoint"), func.max(dd.c.time).label("lastpoint") ], dd.c.time < maxtime, group_by=dd.c.device_id).alias("devmax") # The last recorded leg transition may be to phantom move that, given more # future context, will be merged into a preceding stop. Go back two legs # for the rewrite start point. # Due to activity summing window context and stabilization, and stop # entry/exit refinement, the first transition after starting the filter # process is not necessarily yet in sync with the previous run. Go back # another two legs to start the process. # (The window bounds expression is not supported until sqlalchemy 1.1 so # sneak it in in the order expression...) order = text("""time_start DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING""") rrlegs = select( [ legs.c.device_id, func.nth_value(legs.c.time_start, 4) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewind"), func.nth_value(legs.c.time_start, 2) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewrite")], and_(legs.c.activity != None, legs.c.time_start <= keepto), distinct=True).alias("rrlegs") # Find end of processed legs, including terminator for each device. lastleg = select( [legs.c.device_id, func.max(legs.c.time_end).label("time_end")], legs.c.time_start < keepto, group_by=legs.c.device_id).alias("lastleg") # If trailing points exist, start from rewind leg, or first point starts = select( [ devmax.c.device_id, func.coalesce(rrlegs.c.rewind, devmax.c.firstpoint), func.coalesce(rrlegs.c.rewrite, devmax.c.firstpoint)], or_(lastleg.c.time_end == None, devmax.c.lastpoint > lastleg.c.time_end), devmax \ .outerjoin(rrlegs, devmax.c.device_id == rrlegs.c.device_id) \ .outerjoin(lastleg, devmax.c.device_id == lastleg.c.device_id)) # In repair mode, just start from the top. if repair: starts = select([ devmax.c.device_id, devmax.c.firstpoint.label("rewind"), devmax.c.firstpoint.label("start") ]) starts = starts.order_by(devmax.c.device_id) for device, rewind, start in db.engine.execute(starts): query = select([ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), dd.c.accuracy, dd.c.time, dd.c.device_id, dd.c.activity_1, dd.c.activity_1_conf, dd.c.activity_2, dd.c.activity_2_conf, dd.c.activity_3, dd.c.activity_3_conf ], and_(dd.c.device_id == device, dd.c.time >= rewind, dd.c.time < maxtime), order_by=dd.c.time) points = db.engine.execute(query).fetchall() print "d"+str(device), "resume", str(start)[:19], \ "rewind", str(rewind)[:19], str(len(points))+"p" filterer = DeviceDataFilterer() # not very objecty rly lastend = None newlegs = filterer.generate_device_legs(points, start) for (prevleg, _), (leg, legmodes) in pairwise(chain([(None, None)], newlegs)): with db.engine.begin() as t: lastend = leg["time_end"] print " ".join([ "d" + str(device), str(leg["time_start"])[:19], str(leg["time_end"])[:19], leg["activity"] ]), # Adjust leg for db entry gj0 = leg.pop("geojson_start", None) gj1 = leg.pop("geojson_end", None) leg.update({ "device_id": device, "coordinate_start": gj0 and func.ST_GeomFromGeoJSON(gj0), "coordinate_end": gj1 and func.ST_GeomFromGeoJSON(gj1) }) # Deal with overlapping legs on rewind/repair legid = t.execute( select([legs.c.id], and_(*(legs.c[c] == leg[c] for c in leg.keys())))).scalar() if legid: print "-> unchanged", else: overlapstart = prevleg and prevleg["time_end"] or start overlaps = [ x[0] for x in t.execute( select([legs.c.id], and_(legs.c.device_id == leg["device_id"], legs.c.time_start < leg["time_end"], legs.c.time_end > overlapstart), order_by=legs.c.time_start)) ] if overlaps: legid, dels = overlaps[0], overlaps[1:] t.execute(legs.update(legs.c.id == legid, leg)) print "-> update", if dels: t.execute(legs.delete(legs.c.id.in_(dels))) print "-> delete %d" % len(dels) else: ins = legs.insert(leg).returning(legs.c.id) legid = t.execute(ins).scalar() print "-> insert", # Delete mismatching modes, add new modes modes = db.metadata.tables["modes"] exmodes = { x[0]: x[1:] for x in t.execute( select([modes.c.source, modes.c.mode, modes.c.line], legs.c.id == legid, legs.join(modes))) } for src in set(exmodes).union(legmodes): ex, nu = exmodes.get(src), legmodes.get(src) if nu == ex: continue if ex is not None: print "-> del", src, ex, t.execute( modes.delete( and_(modes.c.leg == legid, modes.c.source == src))) if nu is not None: print "-> ins", src, nu, t.execute(modes.insert().values(leg=legid, source=src, mode=nu[0], line=nu[1])) print # Emit null activity terminator leg to mark trailing undecided points, # if any, to avoid unnecessary reprocessing on resume. rejects = [x for x in points if not lastend or x["time"] > lastend] if rejects: db.engine.execute( legs.delete( and_(legs.c.device_id == device, legs.c.time_start <= rejects[-1]["time"], legs.c.time_end >= rejects[0]["time"]))) db.engine.execute( legs.insert({ "device_id": device, "time_start": rejects[0]["time"], "time_end": rejects[-1]["time"], "activity": None })) # Attach device legs to users. devices = db.metadata.tables["devices"] # Real legs from devices with the owner added in, also when unattached owned = select([ devices.c.user_id.label("owner"), legs.c.id, legs.c.user_id, legs.c.time_start, legs.c.time_end ], and_(legs.c.activity != None, legs.c.time_end < maxtime), devices.join(legs, devices.c.id == legs.c.device_id)) detached = owned.where(legs.c.user_id.is_(None)).alias("detached") attached = owned.where(legs.c.user_id.isnot(None)).alias("attached") owned = owned.alias("owned") # Find most recently received leg attached per user maxattached = select( [attached.c.owner, func.max(attached.c.id).label("id")], group_by=attached.c.owner).alias("maxattached") # Find start of earliest unattached leg received later mindetached = select([ detached.c.owner, func.min(detached.c.time_start).label("time_start") ], or_(maxattached.c.id.is_(None), detached.c.id > maxattached.c.id), detached.outerjoin( maxattached, detached.c.owner == maxattached.c.owner), group_by=detached.c.owner).alias("mindetached") # Find start of attached overlapping leg to make it visible to the process overattached = select( [ attached.c.owner, func.min(attached.c.time_start).label("time_start") ], from_obj=attached.join( mindetached, and_(attached.c.owner == mindetached.c.owner, attached.c.time_end > mindetached.c.time_start)), group_by=attached.c.owner).alias("overattached") # Find restart point starts = select([ mindetached.c.owner, func.least(mindetached.c.time_start, overattached.c.time_start) ], from_obj=mindetached.outerjoin( overattached, mindetached.c.owner == overattached.c.owner)) # In repair mode, just start from the top. if repair: starts = select( [owned.c.owner, func.min(owned.c.time_start)], group_by=owned.c.owner) for user, start in db.engine.execute(starts.order_by(column("owner"))): # Ignore the special legacy user linking userless data if user == 0: continue print "u" + str(user), "start attach", start # Get legs from user's devices in end time order, so shorter # legs get attached in favor of longer legs from a more idle device. s = select([ owned.c.id, owned.c.time_start, owned.c.time_end, owned.c.user_id ], and_(owned.c.owner == user, owned.c.time_start >= start), order_by=owned.c.time_end) lastend = None for lid, lstart, lend, luser in db.engine.execute(s): print " ".join(["u" + str(user), str(lstart)[:19], str(lend)[:19]]), if lastend and lstart < lastend: if luser is None: print "-> detached" continue db.engine.execute( legs.update(legs.c.id == lid).values( user_id=None)) # detach print "-> detach" continue lastend = lend if luser == user: print "-> attached" continue db.engine.execute( legs.update(legs.c.id == lid).values(user_id=user)) # attach print "-> attach" # Cluster backlog in batches cluster_legs(1000) # Reverse geocode labels for places created or shifted by new legs label_places(60)
def convert_evidence(old_session_maker, new_session_maker, chunk_size): from model_new_schema.go import Goevidence as NewGoevidence from model_new_schema.reference import Reference as NewReference from model_new_schema.bioentity import Bioentity as NewBioentity from model_new_schema.go import Go as NewGo from model_old_schema.go import GoRef as OldGoRef log = logging.getLogger('convert.go.evidence') log.info('begin') output_creator = OutputCreator(log) try: new_session = new_session_maker() old_session = old_session_maker() #Values to check values_to_check = ['experiment_id', 'reference_id', 'strain_id', 'source', 'go_evidence', 'annotation_type', 'date_last_reviewed', 'qualifier', 'bioentity_id', 'bioconcept_id', 'date_created', 'created_by'] #Grab cached dictionaries bioent_ids = set([x.id for x in new_session.query(NewBioentity).all()]) reference_ids = set([x.id for x in new_session.query(NewReference).all()]) key_to_go = dict([(x.unique_key(), x) for x in new_session.query(NewGo).all()]) already_used_keys = set() min_id = old_session.query(func.min(OldGoRef.id)).first()[0] count = old_session.query(func.max(OldGoRef.id)).first()[0] - min_id num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): #Grab all current objects current_objs = new_session.query(NewGoevidence).filter(NewGoevidence.id >= create_evidence_id(min_id)).filter(NewGoevidence.id < create_evidence_id(min_id+chunk_size)).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) #Grab old objects old_objs = old_session.query(OldGoRef).filter( OldGoRef.id >= min_id).filter( OldGoRef.id < min_id+chunk_size).options( joinedload('go_annotation')).all() for old_obj in old_objs: #Convert old objects into new ones newly_created_objs = create_evidence(old_obj, key_to_go, reference_ids, bioent_ids) if newly_created_objs is not None: #Edit or add new objects for newly_created_obj in newly_created_objs: key = newly_created_obj.unique_key() if key not in already_used_keys: current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if key not in key_to_current_obj else key_to_current_obj[key] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator) already_used_keys.add(key) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) #Delete untouched objs for untouched_obj_id in untouched_obj_ids: new_session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) new_session.commit() min_id = min_id+chunk_size except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: new_session.close() old_session.close() log.info('complete')
def lowest_order(self): return DBSession.query(func.min( Image.order)).filter(Image.gallery_id == self.id).scalar()
def convert_interaction(new_session_maker, evidence_class, class_type, label, chunk_size, directed): from model_new_schema.auxiliary import Interaction from model_new_schema.bioentity import Bioentity log = logging.getLogger(label) log.info('begin') output_creator = OutputCreator(log) try: new_session = new_session_maker() #Values to check values_to_check = ['display_name', 'bioentity1_id', 'bioentity2_id', 'evidence_count'] #Grab all current objects current_objs = new_session.query(Interaction).filter(Interaction.class_type == class_type).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) #Grab cached dictionaries id_to_bioent = dict([(x.id, x) for x in new_session.query(Bioentity).all()]) untouched_obj_ids = set(id_to_current_obj.keys()) used_unique_keys = set() #Precomp evidence count format_name_to_evidence_count = {} min_id = new_session.query(func.min(evidence_class.id)).first()[0] count = new_session.query(func.max(evidence_class.id)).first()[0] - min_id num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): more_old_objs = new_session.query(evidence_class).filter(evidence_class.id >= min_id).filter(evidence_class.id < min_id+chunk_size).all() interaction_precomp(format_name_to_evidence_count, more_old_objs, id_to_bioent, directed) min_id = min_id + chunk_size #Create interactions min_id = new_session.query(func.min(evidence_class.id)).first()[0] count = new_session.query(func.max(evidence_class.id)).first()[0] - min_id num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): old_objs = new_session.query(evidence_class).filter(evidence_class.id >= min_id).filter(evidence_class.id < min_id+chunk_size).all() for old_obj in old_objs: #Convert old objects into new ones if directed: format_name = create_directed_key(old_obj) else: format_name = create_undirected_interaction_format_name(old_obj, id_to_bioent) evidence_count = format_name_to_evidence_count[format_name] newly_created_objs = create_interaction(old_obj, evidence_count, id_to_bioent, directed) if newly_created_objs is not None: #Edit or add new objects for newly_created_obj in newly_created_objs: unique_key = newly_created_obj.unique_key() if unique_key not in used_unique_keys: current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if unique_key not in key_to_current_obj else key_to_current_obj[unique_key] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator) used_unique_keys.add(unique_key) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) new_session.commit() min_id = min_id+chunk_size #Delete untouched objs for untouched_obj_id in untouched_obj_ids: new_session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished() new_session.commit() except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: new_session.close() log.info('complete')
def persist_session(self, session_type, session_json): db_session = database_setup.get_session() if self.max_session_count == 0: return elif db_session.query(Session).count() == self.max_session_count: session_to_delete = db_session.query(Session, func.min(Session.timestamp)).first()[0] db_session.delete(session_to_delete) try: data = json.loads(session_json) except UnicodeDecodeError: data = json.loads(unicode(session_json, "ISO-8859-1")) logger.debug('Persisting {0} session: {1}'.format(session_type, data)) classification = db_session.query(Classification).filter(Classification.type == 'pending').one() assert data['honeypot_id'] is not None _honeypot = db_session.query(Honeypot).filter(Honeypot.id == data['honeypot_id']).one() if session_type == Messages.SESSION_HONEYPOT.value: session = Session() for entry in data['transcript']: transcript_timestamp = datetime.strptime(entry['timestamp'], '%Y-%m-%dT%H:%M:%S.%f') transcript = Transcript(timestamp=transcript_timestamp, direction=entry['direction'], data=entry['data']) session.transcript.append(transcript) for auth in data['login_attempts']: authentication = self.extract_auth_entity(auth) session.authentication.append(authentication) elif session_type == Messages.SESSION_CLIENT.value: ignore_failed_bait_sessions = self.send_config_request('{0} {1}'.format(Messages.GET_CONFIG_ITEM.value, 'ignore_failed_bait_session')) if not data['did_complete'] and ignore_failed_bait_sessions: logger.debug('Ignore failed bait session.') return session = BaitSession() client = db_session.query(Client).filter(Client.id == data['client_id']).one() client.last_activity = datetime.now() session.did_connect = data['did_connect'] session.did_login = data['did_login'] session.did_complete = data['did_complete'] session.client = client for auth in data['login_attempts']: authentication = self.extract_auth_entity(auth) session.authentication.append(authentication) else: logger.warn('Unknown message type: {0}'.format(session_type)) return session.id = data['id'] session.classification = classification session.timestamp = datetime.strptime(data['timestamp'], '%Y-%m-%dT%H:%M:%S.%f') session.received = datetime.utcnow() session.protocol = data['protocol'] session.destination_ip = data['destination_ip'] session.destination_port = data['destination_port'] session.source_ip = data['source_ip'] session.source_port = data['source_port'] session.honeypot = _honeypot db_session.add(session) db_session.commit() matching_session = self.get_matching_session(session, db_session) if session_type == Messages.SESSION_HONEYPOT.value: if matching_session: self.merge_bait_and_session(session, matching_session, db_session) elif session_type == Messages.SESSION_CLIENT.value: if matching_session: self.merge_bait_and_session(matching_session, session, db_session) else: assert False
def generate_legs(keepto=None, maxtime=None, repair=False): """Record legs from stops and mobile activity found in device telemetry. keepto -- keep legs before this time, except last two or so for restart maxtime -- process device data up to this time repair -- re-evaluate and replace all changed legs""" now = datetime.datetime.now() if not keepto: keepto = now if not maxtime: maxtime = now print("generate_legs up to", maxtime) dd = db.metadata.tables["device_data"] legs = db.metadata.tables["legs"] # Find first and last point sent from each device. devmax = select( [ dd.c.device_id, func.min(dd.c.time).label("firstpoint"), func.max(dd.c.time).label("lastpoint")], dd.c.time < maxtime, group_by=dd.c.device_id).alias("devmax") # The last recorded leg transition may be to phantom move that, given more # future context, will be merged into a preceding stop. Go back two legs # for the rewrite start point. # Due to activity summing window context and stabilization, and stop # entry/exit refinement, the first transition after starting the filter # process is not necessarily yet in sync with the previous run. Go back # another two legs to start the process. # (The window bounds expression is not supported until sqlalchemy 1.1 so # sneak it in in the order expression...) order = text("""time_start DESC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING""") rrlegs = select( [ legs.c.device_id, func.nth_value(legs.c.time_start, 4) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewind"), func.nth_value(legs.c.time_start, 2) \ .over(partition_by=legs.c.device_id, order_by=order) \ .label("rewrite")], and_(legs.c.activity != None, legs.c.time_start <= keepto), distinct=True).alias("rrlegs") # Find end of processed legs, including terminator for each device. lastleg = select( [legs.c.device_id, func.max(legs.c.time_end).label("time_end")], legs.c.time_start < keepto, group_by=legs.c.device_id).alias("lastleg") # If trailing points exist, start from rewind leg, or first point starts = select( [ devmax.c.device_id, func.coalesce(rrlegs.c.rewind, devmax.c.firstpoint), func.coalesce(rrlegs.c.rewrite, devmax.c.firstpoint)], or_(lastleg.c.time_end == None, devmax.c.lastpoint > lastleg.c.time_end), devmax \ .outerjoin(rrlegs, devmax.c.device_id == rrlegs.c.device_id) \ .outerjoin(lastleg, devmax.c.device_id == lastleg.c.device_id)) # In repair mode, just start from the top. if repair: starts = select([ devmax.c.device_id, devmax.c.firstpoint.label("rewind"), devmax.c.firstpoint.label("start")]) starts = starts.order_by(devmax.c.device_id) for device, rewind, start in db.engine.execute(starts): query = select( [ func.ST_AsGeoJSON(dd.c.coordinate).label("geojson"), dd.c.accuracy, dd.c.time, dd.c.device_id, dd.c.activity_1, dd.c.activity_1_conf, dd.c.activity_2, dd.c.activity_2_conf, dd.c.activity_3, dd.c.activity_3_conf], and_( dd.c.device_id == device, dd.c.time >= rewind, dd.c.time < maxtime), order_by=dd.c.time) points = db.engine.execute(query).fetchall() print("d"+str(device), "resume", str(start)[:19], \ "rewind", str(rewind)[:19], str(len(points))+"p") filterer = DeviceDataFilterer() # not very objecty rly lastend = None newlegs = filterer.generate_device_legs(points, start) for (prevleg, _), (leg, legmodes) in pairwise( chain([(None, None)], newlegs)): with db.engine.begin() as t: lastend = leg["time_end"] print(" ".join([ "d"+str(device), str(leg["time_start"])[:19], str(leg["time_end"])[:19], leg["activity"]]), end=' ') # Adjust leg for db entry gj0 = leg.pop("geojson_start", None) gj1 = leg.pop("geojson_end", None) leg.update({ "device_id": device, "coordinate_start": gj0 and func.ST_GeomFromGeoJSON(gj0), "coordinate_end": gj1 and func.ST_GeomFromGeoJSON(gj1)}) # Deal with overlapping legs on rewind/repair legid = t.execute(select( [legs.c.id], and_(*(legs.c[c] == leg[c] for c in list(leg.keys()))))).scalar() if legid: print("-> unchanged", end=' ') else: overlapstart = prevleg and prevleg["time_end"] or start overlaps = [x[0] for x in t.execute(select( [legs.c.id], and_( legs.c.device_id == leg["device_id"], legs.c.time_start < leg["time_end"], legs.c.time_end > overlapstart), order_by=legs.c.time_start))] if overlaps: legid, dels = overlaps[0], overlaps[1:] t.execute(legs.update(legs.c.id == legid, leg)) print("-> update", end=' ') if dels: t.execute(legs.delete(legs.c.id.in_(dels))) print("-> delete %d" % len(dels)) else: ins = legs.insert(leg).returning(legs.c.id) legid = t.execute(ins).scalar() print("-> insert", end=' ') # Delete mismatching modes, add new modes modes = db.metadata.tables["modes"] exmodes = {x[0]: x[1:] for x in t.execute(select( [modes.c.source, modes.c.mode, modes.c.line], legs.c.id == legid, legs.join(modes)))} for src in set(exmodes).union(legmodes): ex, nu = exmodes.get(src), legmodes.get(src) if nu == ex: continue if ex is not None: print("-> del", src, ex, end=' ') t.execute(modes.delete(and_( modes.c.leg == legid, modes.c.source == src))) if nu is not None: print("-> ins", src, nu, end=' ') t.execute(modes.insert().values( leg=legid, source=src, mode=nu[0], line=nu[1])) print() # Emit null activity terminator leg to mark trailing undecided points, # if any, to avoid unnecessary reprocessing on resume. rejects = [x for x in points if not lastend or x["time"] > lastend] if rejects: db.engine.execute(legs.delete(and_( legs.c.device_id == device, legs.c.time_start <= rejects[-1]["time"], legs.c.time_end >= rejects[0]["time"]))) db.engine.execute(legs.insert({ "device_id": device, "time_start": rejects[0]["time"], "time_end": rejects[-1]["time"], "activity": None})) # Attach device legs to users. devices = db.metadata.tables["devices"] # Real legs from devices with the owner added in, also when unattached owned = select( [ devices.c.user_id.label("owner"), legs.c.id, legs.c.user_id, legs.c.time_start, legs.c.time_end], and_(legs.c.activity != None, legs.c.time_end < maxtime), devices.join(legs, devices.c.id == legs.c.device_id)) detached = owned.where(legs.c.user_id.is_(None)).alias("detached") attached = owned.where(legs.c.user_id.isnot(None)).alias("attached") owned = owned.alias("owned") # Find most recently received leg attached per user maxattached = select( [attached.c.owner, func.max(attached.c.id).label("id")], group_by=attached.c.owner).alias("maxattached") # Find start of earliest unattached leg received later mindetached = select( [ detached.c.owner, func.min(detached.c.time_start).label("time_start")], or_(maxattached.c.id.is_(None), detached.c.id > maxattached.c.id), detached.outerjoin( maxattached, detached.c.owner == maxattached.c.owner), group_by=detached.c.owner).alias("mindetached") # Find start of attached overlapping leg to make it visible to the process overattached = select( [ attached.c.owner, func.min(attached.c.time_start).label("time_start")], from_obj=attached.join(mindetached, and_( attached.c.owner == mindetached.c.owner, attached.c.time_end > mindetached.c.time_start)), group_by=attached.c.owner).alias("overattached") # Find restart point starts = select( [ mindetached.c.owner, func.least(mindetached.c.time_start, overattached.c.time_start)], from_obj=mindetached.outerjoin( overattached, mindetached.c.owner == overattached.c.owner)) # In repair mode, just start from the top. if repair: starts = select( [owned.c.owner, func.min(owned.c.time_start)], group_by=owned.c.owner) for user, start in db.engine.execute(starts.order_by(column("owner"))): # Ignore the special legacy user linking userless data if user == 0: continue print("u"+str(user), "start attach", start) # Get legs from user's devices in end time order, so shorter # legs get attached in favor of longer legs from a more idle device. s = select( [ owned.c.id, owned.c.time_start, owned.c.time_end, owned.c.user_id], and_(owned.c.owner == user, owned.c.time_start >= start), order_by=owned.c.time_end) lastend = None for lid, lstart, lend, luser in db.engine.execute(s): print(" ".join(["u"+str(user), str(lstart)[:19], str(lend)[:19]]), end=' ') if lastend and lstart < lastend: if luser is None: print("-> detached") continue db.engine.execute(legs.update( legs.c.id==lid).values(user_id=None)) # detach print("-> detach") continue lastend = lend if luser == user: print("-> attached") continue db.engine.execute(legs.update( legs.c.id==lid).values(user_id=user)) # attach print("-> attach") # Cluster backlog in batches cluster_legs(1000) # Reverse geocode labels for places created or shifted by new legs label_places(60)
def convert_evidence_chemical(old_session_maker, new_session_maker, chunk_size): from model_new_schema.phenotype import Phenotypeevidence as NewPhenotypeevidence from model_new_schema.chemical import Chemical as NewChemical from model_new_schema.evidence import EvidenceChemical as NewEvidenceChemical from model_old_schema.phenotype import PhenotypeFeature as OldPhenotypeFeature log = logging.getLogger('convert.phenotype.evidence_chemical') log.info('begin') output_creator = OutputCreator(log) try: new_session = new_session_maker() old_session = old_session_maker() #Values to check values_to_check = ['chemical_amt'] #Grab cached dictionaries key_to_chemical = dict([(x.unique_key(), x) for x in new_session.query(NewChemical).all()]) min_id = old_session.query(func.min(OldPhenotypeFeature.id)).first()[0] count = old_session.query(func.max(OldPhenotypeFeature.id)).first()[0] - min_id num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): #Grab all current objects current_objs = new_session.query(NewEvidenceChemical).filter(NewEvidenceChemical.evidence_id >= create_evidence_id(min_id)).filter(NewEvidenceChemical.evidence_id < create_evidence_id(min_id+chunk_size)).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) id_to_evidence = dict([(x.id, x) for x in new_session.query(NewPhenotypeevidence).filter(NewPhenotypeevidence.id >= create_evidence_id(min_id)).filter(NewPhenotypeevidence.id < create_evidence_id(min_id+chunk_size)).all()]) untouched_obj_ids = set(id_to_current_obj.keys()) #Grab old objects old_objs = old_session.query(OldPhenotypeFeature).filter( OldPhenotypeFeature.id >= min_id).filter( OldPhenotypeFeature.id < min_id+chunk_size).options( joinedload('experiment')).all() for old_obj in old_objs: #Convert old objects into new ones newly_created_objs = create_evidence_chemical(old_obj, key_to_chemical, id_to_evidence) if newly_created_objs is not None: #Edit or add new objects for newly_created_obj in newly_created_objs: current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) #Delete untouched objs for untouched_obj_id in untouched_obj_ids: new_session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) new_session.commit() min_id = min_id+chunk_size except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: new_session.close() old_session.close() log.info('complete')
def lowest_order(self): return DBSession.query(func.min(Image.order)).filter(Image.gallery_id == self.id).scalar()
def get_lowest_vendor_price(product_id): vendors = db.session.query(User).filter(User.type=='Vendor').all() vendor_ids = [vendor.id for vendor in vendors] return db.session.query(func.min(UserProduct.price)).filter(and_(UserProduct.product_id==product_id, UserProduct.user_id.in_(vendor_ids))).scalar()
def getStatements( self, subject=None, predicate=None, object=None, objecttype=None, context=None, asQuad=True, hints=None ): """ Return all the statements in the model that match the given arguments. Any combination of subject and predicate can be None, and any None slot is treated as a wildcard that matches any value in the model. """ fs = subject is not None fp = predicate is not None fo = object is not None fot = objecttype is not None fc = context is not None hints = hints or {} limit = hints.get("limit") offset = hints.get("offset") log.debug("s p o ot c quad lim offset: ", fs, fp, fo, fot, fc, asQuad, limit, offset) if fo: if isinstance(object, ResourceUri): object = object.uri fot = True objecttype = OBJECT_TYPE_RESOURCE elif not fot: objecttype = OBJECT_TYPE_LITERAL if not asQuad and not fc: query = select( [ self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object, self.vesper_stmts.c.objecttype, func.min(self.vesper_stmts.c.context).label("context"), ] ) else: # asQuad is True query = self.vesper_stmts.select() if fs: query = query.where(self.vesper_stmts.c.subject == subject) if fp: query = query.where(self.vesper_stmts.c.predicate == predicate) if fo: query = query.where(self.vesper_stmts.c.object == object) if fot: query = query.where(self.vesper_stmts.c.objecttype == objecttype) if fc: query = query.where(self.vesper_stmts.c.context == context) if not asQuad and not fc: query = query.group_by( self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object, self.vesper_stmts.c.objecttype, ) if limit is not None: query = query.limit(limit) if offset is not None: query = query.offset(offset) stmts = [] self._checkConnection() result = self.conn.execute(query) for r in result: stmts.append(Statement(r["subject"], r["predicate"], r["object"], r["objecttype"], r["context"])) log.debug("stmts returned: ", len(stmts), stmts) return stmts
async def add_missing_logs(bot, sess): try: channels = Cs.auto_cleanup_targets.gets() except KeyError: return all_logs: LOGS = set() for channel in channels: logs: LOGS = set() try: latest = sess.execute( Select([func.min(EventLog.ts) ]).where(EventLog.channel == channel.id)).scalar() except NoResultFound: latest = None has_more = True cursor = None while has_more and len(logs) < 1600: try: resp = await bot.api.conversations.history( channel, cursor=cursor, latest=latest, ) except APICallError as e: await report(bot, exception=e) break history = resp.body if not history['ok']: break has_more = history['has_more'] if has_more: cursor = history['response_metadata']['next_cursor'] messages = {(m.get('reply_count', 0), m['ts']) for m in history['messages']} while messages: reply_count, ts = messages.pop() if reply_count: has_more_replies = True replies_cursor = None while has_more_replies: try: r = await bot.api.conversations.replies( channel, cursor=replies_cursor, ts=ts, ) except APICallError as e: await report(bot, exception=e) break replies = r.body if not replies['ok']: break has_more_replies = replies['has_more'] if has_more_replies: replies_cursor = replies['response_metadata'][ 'next_cursor'] messages |= {(m.get('reply_count', 0), m['ts']) for m in replies.get('messages', [])} logs.add((channel.id, ts)) all_logs |= logs if all_logs: with sess.begin(): sess.execute( Insert(EventLog).values([{ 'channel': c, 'ts': t } for c, t in all_logs]).on_conflict_do_nothing())
def persist_session(self, session_type, session_json): db_session = database_setup.get_session() if self.max_session_count == 0: return elif db_session.query(Session).count() == self.max_session_count: session_to_delete = db_session.query(Session, func.min(Session.timestamp)).first()[0] db_session.delete(session_to_delete) try: data = json.loads(session_json) except UnicodeDecodeError: data = json.loads(unicode(session_json, "ISO-8859-1")) logger.debug('Persisting {0} session: {1}'.format(session_type, data)) classification = db_session.query(Classification).filter(Classification.type == 'pending').one() assert data['honeypot_id'] is not None _honeypot = db_session.query(Honeypot).filter(Honeypot.id == data['honeypot_id']).one() if session_type == Messages.SESSION_HONEYPOT.value: session = Session() for entry in data['transcript']: transcript_timestamp = isoformatToDatetime(entry['timestamp']) transcript = Transcript(timestamp=transcript_timestamp, direction=entry['direction'], data=entry['data']) session.transcript.append(transcript) for auth in data['login_attempts']: authentication = self.extract_auth_entity(auth) session.authentication.append(authentication) elif session_type == Messages.SESSION_CLIENT.value: ignore_failed_bait_sessions = self.send_config_request('{0} {1}'.format(Messages.GET_CONFIG_ITEM.value, 'ignore_failed_bait_session')) if not data['did_complete'] and ignore_failed_bait_sessions: logger.debug('Ignore failed bait session.') return session = BaitSession() client = db_session.query(Client).filter(Client.id == data['client_id']).one() client.last_activity = datetime.now() session.did_connect = data['did_connect'] session.did_login = data['did_login'] session.did_complete = data['did_complete'] session.client = client for auth in data['login_attempts']: authentication = self.extract_auth_entity(auth) session.authentication.append(authentication) else: logger.warn('Unknown message type: {0}'.format(session_type)) return session.id = data['id'] session.classification = classification session.timestamp = isoformatToDatetime(data['timestamp']) session.received = datetime.utcnow() session.protocol = data['protocol'] session.destination_ip = data['destination_ip'] session.destination_port = data['destination_port'] session.source_ip = data['source_ip'] session.source_port = data['source_port'] session.honeypot = _honeypot _dronename = db_session.query(Drone).filter(Drone.id==_honeypot.id).first().name db_session.add(session) db_session.commit() matching_session = self.get_matching_session(session, db_session) if session_type == Messages.SESSION_HONEYPOT.value: if matching_session: matching_session.name = _dronename self.merge_bait_and_session(session, matching_session, db_session) elif session_type == Messages.SESSION_CLIENT.value: if matching_session: session.name = _dronename self.merge_bait_and_session(matching_session, session, db_session) else: assert False
def convert_litevidence(old_session_maker, new_session_maker, chunk_size): from model_new_schema.literature import Literatureevidence as NewLiteratureevidence from model_new_schema.reference import Reference as NewReference from model_new_schema.bioentity import Bioentity as NewBioentity from model_old_schema.reference import LitguideFeat as OldLitguideFeat log = logging.getLogger('convert.literature.evidence') log.info('begin') output_creator = OutputCreator(log) try: new_session = new_session_maker() old_session = old_session_maker() #Values to check values_to_check = ['experiment_id', 'reference_id', 'class_type', 'strain_id', 'source', 'topic', 'bioentity_id', 'date_created', 'created_by'] #Grab cached dictionaries bioent_ids = set([x.id for x in new_session.query(NewBioentity).all()]) reference_ids = set([x.id for x in new_session.query(NewReference).all()]) min_id = old_session.query(func.min(OldLitguideFeat.id)).first()[0] count = old_session.query(func.max(OldLitguideFeat.id)).first()[0] - min_id num_chunks = ceil(1.0*count/chunk_size) for i in range(0, num_chunks): #Grab all current objects current_objs = new_session.query(NewLiteratureevidence).filter(NewLiteratureevidence.id >= create_litevidence_id(min_id)).filter(NewLiteratureevidence.id < create_litevidence_id(min_id+chunk_size)).all() id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) #Grab old objects old_objs = old_session.query(OldLitguideFeat).filter( OldLitguideFeat.id >= min_id).filter( OldLitguideFeat.id < min_id+chunk_size).filter( or_(OldLitguideFeat.topic=='Additional Literature', OldLitguideFeat.topic=='Primary Literature', OldLitguideFeat.topic=='Omics', OldLitguideFeat.topic=='Reviews')).options( joinedload('litguide')).all() for old_obj in old_objs: #Convert old objects into new ones newly_created_objs = create_litevidence(old_obj, reference_ids, bioent_ids) if newly_created_objs is not None: #Edit or add new objects for newly_created_obj in newly_created_objs: current_obj_by_id = None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] current_obj_by_key = None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()] create_or_update(newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) #Delete untouched objs for untouched_obj_id in untouched_obj_ids: new_session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() #Commit output_creator.finished(str(i+1) + "/" + str(int(num_chunks))) new_session.commit() min_id = min_id+chunk_size except Exception: log.exception('Unexpected error:' + str(sys.exc_info()[0])) finally: new_session.close() old_session.close() log.info('complete')
def convert_genetic_interevidence(old_session_maker, new_session_maker, chunk_size): from model_new_schema.interaction import Geninteractionevidence as NewGeninteractionevidence from model_new_schema.reference import Reference as NewReference from model_new_schema.evelement import Experiment as NewExperiment from model_new_schema.bioentity import Bioentity as NewBioentity from model_new_schema.phenotype import Phenotype as NewPhenotype from model_old_schema.interaction import Interaction as OldInteraction log = logging.getLogger("convert.genetic_interaction.evidence") log.info("begin") output_creator = OutputCreator(log) try: new_session = new_session_maker() old_session = old_session_maker() # Values to check values_to_check = [ "experiment_id", "reference_id", "strain_id", "source", "bioentity1_id", "bioentity2_id", "phenotype_id", "note", "annotation_type", "date_created", "created_by", ] # Grab cached dictionaries key_to_experiment = dict([(x.unique_key(), x) for x in new_session.query(NewExperiment).all()]) key_to_phenotype = dict([(x.unique_key(), x) for x in new_session.query(NewPhenotype).all()]) bioent_ids = dict([(x.unique_key(), x) for x in new_session.query(NewBioentity).all()]) reference_ids = set([x.id for x in new_session.query(NewReference).all()]) min_id = old_session.query(func.min(OldInteraction.id)).first()[0] count = old_session.query(func.max(OldInteraction.id)).first()[0] - min_id num_chunks = ceil(1.0 * count / chunk_size) for i in range(0, num_chunks): # Grab all current objects current_objs = ( new_session.query(NewGeninteractionevidence) .filter(NewGeninteractionevidence.id >= create_genetic_evidence_id(min_id)) .filter(NewGeninteractionevidence.id < create_genetic_evidence_id(min_id + chunk_size)) .all() ) id_to_current_obj = dict([(x.id, x) for x in current_objs]) key_to_current_obj = dict([(x.unique_key(), x) for x in current_objs]) untouched_obj_ids = set(id_to_current_obj.keys()) # Grab old objects old_objs = ( old_session.query(OldInteraction) .filter(OldInteraction.id >= min_id) .filter(OldInteraction.id < min_id + chunk_size) .options( joinedload("interaction_references"), joinedload("interaction_phenotypes"), joinedload("feature_interactions"), ) ) for old_obj in old_objs: # Convert old objects into new ones newly_created_objs = create_genetic_interevidence( old_obj, key_to_experiment, key_to_phenotype, reference_ids, bioent_ids ) if newly_created_objs is not None: # Edit or add new objects for newly_created_obj in newly_created_objs: current_obj_by_id = ( None if newly_created_obj.id not in id_to_current_obj else id_to_current_obj[newly_created_obj.id] ) current_obj_by_key = ( None if newly_created_obj.unique_key() not in key_to_current_obj else key_to_current_obj[newly_created_obj.unique_key()] ) create_or_update( newly_created_obj, current_obj_by_id, current_obj_by_key, values_to_check, new_session, output_creator, ) if current_obj_by_id is not None and current_obj_by_id.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_id.id) if current_obj_by_key is not None and current_obj_by_key.id in untouched_obj_ids: untouched_obj_ids.remove(current_obj_by_key.id) # Delete untouched objs for untouched_obj_id in untouched_obj_ids: new_session.delete(id_to_current_obj[untouched_obj_id]) output_creator.removed() # Commit output_creator.finished(str(i + 1) + "/" + str(int(num_chunks))) new_session.commit() min_id = min_id + chunk_size except Exception: log.exception("Unexpected error:" + str(sys.exc_info()[0])) finally: new_session.close() old_session.close() log.info("complete")
def perform(self, *args, **kwargs): is_dry_run: bool = kwargs.get('is_dry_run') or False tweeter = TrafficViolationsTweeter() repeat_camera_offender: Optional[ RepeatCameraOffender] = RepeatCameraOffender.query.filter( and_(RepeatCameraOffender.times_featured == 0, RepeatCameraOffender.total_camera_violations >= 25)).order_by(func.random()).first() if repeat_camera_offender: # get the number of vehicles that have the same number # of violations tied_with = RepeatCameraOffender.query.filter( RepeatCameraOffender.total_camera_violations == repeat_camera_offender.total_camera_violations).count() # since the vehicles are in descending order of violations, # we find the record that has the same number of violations # and the lowest id... min_id = RepeatCameraOffender.query.session.query( func.min(RepeatCameraOffender.id)).filter( RepeatCameraOffender.total_camera_violations == repeat_camera_offender.total_camera_violations).one()[0] # nth place is simply the sum of the two values minus one. nth_place = tied_with + min_id - 1 red_light_camera_violations = \ repeat_camera_offender.red_light_camera_violations speed_camera_violations = \ repeat_camera_offender.speed_camera_violations vehicle_hashtag: str = L10N.VEHICLE_HASHTAG.format( repeat_camera_offender.state, repeat_camera_offender.plate_id) # one of 'st', 'nd', 'rd', 'th' suffix: str = string_utils.determine_ordinal_indicator(nth_place) # how bad is this vehicle? worst_substring: str = (f'{nth_place}{suffix}-worst' if nth_place > 1 else 'worst') tied_substring: str = ' tied for' if tied_with != 1 else '' spaces_needed: int = twitter_utils.padding_spaces_needed( red_light_camera_violations, speed_camera_violations) featured_string = L10N.REPEAT_CAMERA_OFFENDER_STRING.format( vehicle_hashtag, repeat_camera_offender.total_camera_violations, str(red_light_camera_violations).ljust( spaces_needed - len(str(red_light_camera_violations))), str(speed_camera_violations).ljust( spaces_needed - len(str(speed_camera_violations))), vehicle_hashtag, tied_substring, worst_substring) messages: List[str] = [featured_string] if not is_dry_run: success: bool = tweeter.send_status( message_parts=messages, on_error_message=(f'Error printing featured plate. ' f'Tagging @bdhowald.')) if success: repeat_camera_offender.times_featured += 1 RepeatCameraOffender.query.session.commit() LOG.debug('Featured job plate ran successfully.')