Beispiel #1
0
def migrate_db(sql_script):
    """Migrate database using sql_script."""
    logger.info('Start migration %r', __file__)
    with ENGINE.connect() as conn:
        conn.autocommit = True
        conn.execution_options(isolation_level='AUTOCOMMIT')
        for stmt in convert_to_sqlalchemy_statement(sql_script):
            logger.debug('Executing sql statement %r', stmt)
            conn.execute(stmt)
    logger.info('Migration finished!')
Beispiel #2
0
def prepare_queries(engine, w_per_q=5):
    q = """ SELECT title
            FROM article
            ORDER BY RANDOM()
            LIMIT 1000
        """
    rs = engine.execute(text(q))
    words = []
    for title, in rs:
        words += title.split(' ')
    np.random.shuffle(words)
    return [
        ' '.join(words[x:x + w_per_q]) for x in range(0, len(words), w_per_q)
    ]
Beispiel #3
0
def streaming_start_at(engine):
    """Return starting datetime of twitter tracking.

    Parameters
    ----------
    engine : sqlalchemy connection
        The database connection, e.g., engine or session.

    Returns
    -------
    datetime
        The created_at datetime of the first tweet in the database.
    """
    q = 'SELECT created_at FROM tweet ORDER BY id ASC LIMIT 1'
    return engine.execute(q).scalar()
Beispiel #4
0
def before_request():
    """Before request, try to refresh content of searcher."""
    now_dt = datetime.utcnow()
    now_str = now_dt.strftime('%Y-%m-%d %H:%M:%S')
    q0 = "SELECT value from meta_info WHERE name='searcher_refresh_date'"
    q1 = "UPDATE meta_info SET value=:now WHERE name='searcher_refresh_date'"
    try:
        recent_refresh = engine.execute(q0).scalar()
    except SQLAlchemyError:
        recent_refresh = engine.execute(q0).scalar()
    if recent_refresh:
        recent_refresh = dateutil.parser.parse(recent_refresh)
        if now_dt - recent_refresh > REFRESH_INTERVAL:
            searcher.refresh()
            engine.execute(sqlalchemy.text(q1), now=now_str)
    else:
        engine.execute(MetaInfo.__table__.insert().values(
            name='searcher_refresh_date',
            value=now_str,
            value_type='datetime',
            description="in api, recent refresh datetime of searcher"))
Beispiel #5
0
 def run(cls, args):
     """Overriding method as the entry point of this command."""
     session = Session()
     if args['--volume'] is True:
         configure_logging('report.volume',
                           console_level=args['--console-log-level'],
                           file_level='WARNING')
         table_names = ['tweet', 'url', 'article']
         table = args['--table']
         if table not in table_names:
             logger.critical('Available tables are: %s', table_names)
             sys.exit(2)
         interval_names = [
             'minute', 'hour', 'day', 'week', 'month', 'quarter', 'year'
         ]
         interval = args['--interval']
         if interval not in interval_names:
             logger.critical('Available intervals are: %s', interval_names)
             sys.exit(2)
         limit = args['--limit']
         if int(limit) <= 0:
             logger.critical('%r should larger than 0', limit)
             sys.exit(2)
         sql = """
         SELECT count(id) as agg_num,
             date_trunc(:interval, created_at) as interval
         FROM %s
         GROUP BY interval
         ORDER BY interval DESC
         LIMIT :limit""" % table
         stmt = text(sql).bindparams(interval=interval, limit=limit)
         strf = '%Y-%m-%d %H:%M:%S'
         with ENGINE.connect() as conn:
             result = conn.execute(stmt).fetchall()
             print(('-' * 35))
             print(('{0:^20s} | {1:12s}'.format('Timeline (%s)' % interval,
                                                'Aggregation')))
             print(('-' * 35))
             for v, t in result:
                 print(('{0:^20s} | {1:8d}'.format(t.strftime(strf), v)))
             print(('-' * 35))
     elif args['--status']:
         configure_logging('report.streaming-status',
                           console_level=args['--console-log-level'])
         table_name = None
         if args['--status'] == 'twitter':
             table_name = 'tweet'
         if table_name is None:
             logger.critical('SNS %r has not been implemented!',
                             args['--status'])
             sys.exit(2)
         sql = 'SELECT created_at FROM {} ORDER BY id DESC LIMIT 1'.format(
             'tweet')
         with ENGINE.connect() as conn:
             most_recent, = conn.execute(text(sql)).fetchone()
             delta_minutes = 30
             delta = timedelta(minutes=delta_minutes)
             current_utc = datetime.utcnow()
             if current_utc - most_recent > delta:
                 logger.critical(
                     'No %s streaming update in the past %s minutes!',
                     args['--status'], delta_minutes)
             else:
                 logger.info('Most recent %s streaming update is %s',
                             args['--status'],
                             str(most_recent) + ' (UTC)')
     elif args['--top-spreader'] is True:
         configure_logging('report.top-spreaders',
                           console_level=args['--console-log-level'],
                           file_level='WARNING')
         # try to create table
         if (Top20SpreaderMonthly.__table__.exists(bind=ENGINE)) is False:
             Top20SpreaderMonthly.__table__.create(bind=ENGINE)
         if args['--force-today'] is True:
             upper_day = datetime.utcnow().date()
         elif args['--upper-day'] is None:
             upper_day = datetime.utcnow().date() - timedelta(days=1)
         else:
             try:
                 upper_day = parse(args['--upper-day']).date()
             except Exception:
                 raise ValueError('Invalid date: %s', args['--upper-day'])
         if args['--generate'] is True:
             logger.warning(
                 'Generating top spreaders for uppder_day=%r ...',
                 upper_day)
             cls.generate_top_spreaders(session, upper_day)
         elif args['--look-up'] is True:
             cls.look_up_top_spreaders(session, upper_day,
                                       args['--most-recent'])
     elif args['--top-article'] is True:
         configure_logging('report.top-article',
                           console_level=args['--console-log-level'],
                           file_level='WARNING')
         # try to create table
         if (Top20ArticleMonthly.__table__.exists(bind=ENGINE)) is False:
             Top20ArticleMonthly.__table__.create(bind=ENGINE)
         if args['--force-today'] is True:
             upper_day = datetime.utcnow().date()
         elif args['--upper-day'] is None:
             upper_day = datetime.utcnow().date() - timedelta(days=1)
         else:
             try:
                 upper_day = parse(args['--upper-day']).date()
             except Exception:
                 raise ValueError('Invalid date: %s', args['--upper-day'])
         if args['--generate'] is True:
             logger.warning('Generating top articles for uppder_day=%r ...',
                            upper_day)
             cls.generate_top_articles(session, upper_day)
         elif args['--look-up'] is True:
             cls.look_up_top_articles(session, upper_day,
                                      args['--most-recent'])
     session.close()