def get_timeline(entity_ids, ref_time, interval_value=4, interval_unit='hours'): format_dict = { 'entity_id_matching_str': ' OR '.join(['entity_id=%s' % eid for eid in entity_ids]), 'interval_unit' : interval_unit, 'interval_value': interval_value, 'double_interval_value': 2*interval_value } conn_args = { 'timeout': 60, 'command_timeout': 20 } results = dict() try: for i in range(25): # past 24 hours data_point_time = ref_time - timedelta(hours=i) # force hourly data points data_point_time = data_point_time.replace(minute=0, second=0, microsecond=0) data_point_time = datetime.strftime(data_point_time, '%Y-%m-%d %H:%M:%S') ref_time_str = "(TIMESTAMP '%s')" % data_point_time format_dict['ref_time'] = ref_time_str hour_stats = asyncio.run( run_query(timeline_fetch, {'format_dict': format_dict}, conn_args=conn_args) ) num_hour_stats = len(hour_stats) if num_hour_stats > 0: for stat in range(num_hour_stats): record = hour_stats[stat] line_stats = results.get(data_point_time, list()) line_stats.append({ 'twt_cnt': record['twt_cnt'], 'acc_cnt': record['acc_cnt'], 'trend' : (float)(record['trend']), 'mean_bs': (float)(record['mean_bs']) }) results[data_point_time] = line_stats elif num_hour_stats == 0: # sparsity in data results[data_point_time] = None else: raise ValueError("timeline_fetch returned more than one entity") except Exception as e: logging.exception("transaction failed") logging.error("failed at fetching timeline") logging.error("query args : {}".format(format_dict)) raise e return results
def get_entity_count(interval_value=4, interval_unit='hours'): format_dict = { 'interval_unit' : interval_unit, 'double_interval_value': 2*interval_value } conn_args = { 'timeout': 60, 'command_timeout': 20 } try: results = asyncio.run( run_query(entity_count_fetch, {'format_dict': format_dict}, conn_args=conn_args) ) except Exception as e: logging.exception("transaction failed") logging.error("failed at fetching entity count") return None return results
def check_disk_space(): total, used, avail = shutil.disk_usage("/") percent = avail * 1.0 / total * 100.0 logging.info("currently {} percent of space left".format(percent)) if percent < 20.0: deleted_tweets = list() try: deleted_tweets = asyncio.run(run_query(batch_psql_tweet_delete, {})) except Exception as e: logging.exception("transaction failed") logging.error("failed at deleting data") logging.info("{} tweets deleted".format(len(deleted_tweets))) total, used, avail = shutil.disk_usage("/") percent = avail * 1.0 / total * 100.0 logging.info("now {} percent of space left".format(percent))
def get_coord_score_report_extended(target_entity_ids, interval_value=4, interval_unit='hours'): format_dict = { 'interval_unit' : interval_unit, 'double_interval_value': 2*interval_value } args = { 'target_entity_ids': target_entity_ids, 'format_dict' : format_dict } conn_args = { 'timeout': 60, 'command_timeout': 20 } try: results = asyncio.run( run_query(coord_score_fetching_extended, args, conn_args=conn_args) ) except Exception as e: logging.exception("transaction failed") logging.error("failed at fetching coord scores extension") logging.error("query args : {}".format(target_entities)) return None return results
def get_hoaxy_data(entity_ids, ref_time, interval_value=4, interval_unit='hours'): format_dict = { 'entitytwt_entity_id_matching_str' : ' OR '.join(['entitytwt.entity_id=%s' % eid for eid in entity_ids]), 'from_user_et_entity_id_matching_str' : ' OR '.join(['from_user_et.entity_id=%s' % eid for eid in entity_ids]), 'to_user_et_entity_id_matching_str' : ' OR '.join(['to_user_et.entity_id=%s' % eid for eid in entity_ids]), 'ref_time' : "(TIMESTAMP '%s')" % ref_time, 'interval_unit' : interval_unit, 'interval_value' : interval_value } conn_args = { 'timeout': 60, 'command_timeout': 20 } try: results = asyncio.run( run_query(hoaxy_fetching, {'format_dict': format_dict}, conn_args=conn_args) ) except Exception as e: logging.exception("transaction failed") logging.error("failed at fetching hoaxy data") logging.error("query args : {}".format(format_dict)) return None return results
def get_coord_score_report(ref_time, interval_value=4, interval_unit='hours', nrows=1000): format_dict = { 'ref_time' : "(TIMESTAMP '%s')" % ref_time, 'interval_unit' : interval_unit, 'interval_value': interval_value, 'double_interval_value': 2*interval_value, 'nrows' : nrows } conn_args = { 'timeout': 60, 'command_timeout': 20 } try: results = asyncio.run( run_query(coord_score_fetching, {'format_dict': format_dict}, conn_args=conn_args) ) except asyncpg.exceptions.DivisionByZeroError as e: # pass the error upward raise e except Exception as e: logging.exception("transaction failed") logging.error("failed at fetching coord scores") logging.error("query args : {}".format(format_dict)) raise e return results