def migrate_db(sql_script): """Migrate database using sql_script.""" logger.info('Start migration %r', __file__) with ENGINE.connect() as conn: conn.autocommit = True conn.execution_options(isolation_level='AUTOCOMMIT') for stmt in convert_to_sqlalchemy_statement(sql_script): logger.debug('Executing sql statement %r', stmt) conn.execute(stmt) logger.info('Migration finished!')
def prepare_queries(engine, w_per_q=5): q = """ SELECT title FROM article ORDER BY RANDOM() LIMIT 1000 """ rs = engine.execute(text(q)) words = [] for title, in rs: words += title.split(' ') np.random.shuffle(words) return [ ' '.join(words[x:x + w_per_q]) for x in range(0, len(words), w_per_q) ]
def streaming_start_at(engine): """Return starting datetime of twitter tracking. Parameters ---------- engine : sqlalchemy connection The database connection, e.g., engine or session. Returns ------- datetime The created_at datetime of the first tweet in the database. """ q = 'SELECT created_at FROM tweet ORDER BY id ASC LIMIT 1' return engine.execute(q).scalar()
def before_request(): """Before request, try to refresh content of searcher.""" now_dt = datetime.utcnow() now_str = now_dt.strftime('%Y-%m-%d %H:%M:%S') q0 = "SELECT value from meta_info WHERE name='searcher_refresh_date'" q1 = "UPDATE meta_info SET value=:now WHERE name='searcher_refresh_date'" try: recent_refresh = engine.execute(q0).scalar() except SQLAlchemyError: recent_refresh = engine.execute(q0).scalar() if recent_refresh: recent_refresh = dateutil.parser.parse(recent_refresh) if now_dt - recent_refresh > REFRESH_INTERVAL: searcher.refresh() engine.execute(sqlalchemy.text(q1), now=now_str) else: engine.execute(MetaInfo.__table__.insert().values( name='searcher_refresh_date', value=now_str, value_type='datetime', description="in api, recent refresh datetime of searcher"))
def run(cls, args): """Overriding method as the entry point of this command.""" session = Session() if args['--volume'] is True: configure_logging('report.volume', console_level=args['--console-log-level'], file_level='WARNING') table_names = ['tweet', 'url', 'article'] table = args['--table'] if table not in table_names: logger.critical('Available tables are: %s', table_names) sys.exit(2) interval_names = [ 'minute', 'hour', 'day', 'week', 'month', 'quarter', 'year' ] interval = args['--interval'] if interval not in interval_names: logger.critical('Available intervals are: %s', interval_names) sys.exit(2) limit = args['--limit'] if int(limit) <= 0: logger.critical('%r should larger than 0', limit) sys.exit(2) sql = """ SELECT count(id) as agg_num, date_trunc(:interval, created_at) as interval FROM %s GROUP BY interval ORDER BY interval DESC LIMIT :limit""" % table stmt = text(sql).bindparams(interval=interval, limit=limit) strf = '%Y-%m-%d %H:%M:%S' with ENGINE.connect() as conn: result = conn.execute(stmt).fetchall() print(('-' * 35)) print(('{0:^20s} | {1:12s}'.format('Timeline (%s)' % interval, 'Aggregation'))) print(('-' * 35)) for v, t in result: print(('{0:^20s} | {1:8d}'.format(t.strftime(strf), v))) print(('-' * 35)) elif args['--status']: configure_logging('report.streaming-status', console_level=args['--console-log-level']) table_name = None if args['--status'] == 'twitter': table_name = 'tweet' if table_name is None: logger.critical('SNS %r has not been implemented!', args['--status']) sys.exit(2) sql = 'SELECT created_at FROM {} ORDER BY id DESC LIMIT 1'.format( 'tweet') with ENGINE.connect() as conn: most_recent, = conn.execute(text(sql)).fetchone() delta_minutes = 30 delta = timedelta(minutes=delta_minutes) current_utc = datetime.utcnow() if current_utc - most_recent > delta: logger.critical( 'No %s streaming update in the past %s minutes!', args['--status'], delta_minutes) else: logger.info('Most recent %s streaming update is %s', args['--status'], str(most_recent) + ' (UTC)') elif args['--top-spreader'] is True: configure_logging('report.top-spreaders', console_level=args['--console-log-level'], file_level='WARNING') # try to create table if (Top20SpreaderMonthly.__table__.exists(bind=ENGINE)) is False: Top20SpreaderMonthly.__table__.create(bind=ENGINE) if args['--force-today'] is True: upper_day = datetime.utcnow().date() elif args['--upper-day'] is None: upper_day = datetime.utcnow().date() - timedelta(days=1) else: try: upper_day = parse(args['--upper-day']).date() except Exception: raise ValueError('Invalid date: %s', args['--upper-day']) if args['--generate'] is True: logger.warning( 'Generating top spreaders for uppder_day=%r ...', upper_day) cls.generate_top_spreaders(session, upper_day) elif args['--look-up'] is True: cls.look_up_top_spreaders(session, upper_day, args['--most-recent']) elif args['--top-article'] is True: configure_logging('report.top-article', console_level=args['--console-log-level'], file_level='WARNING') # try to create table if (Top20ArticleMonthly.__table__.exists(bind=ENGINE)) is False: Top20ArticleMonthly.__table__.create(bind=ENGINE) if args['--force-today'] is True: upper_day = datetime.utcnow().date() elif args['--upper-day'] is None: upper_day = datetime.utcnow().date() - timedelta(days=1) else: try: upper_day = parse(args['--upper-day']).date() except Exception: raise ValueError('Invalid date: %s', args['--upper-day']) if args['--generate'] is True: logger.warning('Generating top articles for uppder_day=%r ...', upper_day) cls.generate_top_articles(session, upper_day) elif args['--look-up'] is True: cls.look_up_top_articles(session, upper_day, args['--most-recent']) session.close()