def trends(): full_db = load_db(database_path=DATABASE_PATH) db_init_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['initial_timestamp'], input_format=time_format_full_with_timezone) db_update_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['timestamp'], input_format=time_format_full_with_timezone) print("time since app start: {:.2f} minutes".format((time.time() - start_time) / 60)) print("time since database init: {}".format( (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp))) print("time since last update: {:.2f} minutes".format( (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds / 60)) print('\ndebug:') print('time now: {}'.format(datetime.datetime.now(tz=pytz.utc))) print('db init time: {}'.format(db_init_timestamp)) print('diff: {}'.format(datetime.datetime.now(tz=pytz.utc) - db_init_timestamp)) trends_output = { "results": full_db['trends']['include_hashtags'], "status": 'ok' } return jsonify(trends_output)
def get_top_hashtags_from_twitter(country='Japan', debug=False, cache_duration_mins=15, append_db=True): cache_db = load_db(database_path=db_path, debug=False) hashtags_cache = cache_db['hashtags'] # compare db and now db_timestamp = str_2_datetime(hashtags_cache['timestamp'], input_format=time_format_full_with_timezone) db_timestamp = db_timestamp.astimezone(tz=pytz.utc) rq_timestamp = datetime.datetime.now(tz=pytz.utc) time_diff = rq_timestamp - db_timestamp print('time since last hashtags API call: {}'.format(time_diff)) if time_diff.seconds < cache_duration_mins * 60: # DB output_json = json.dumps(hashtags_cache['content'], ensure_ascii=False) return output_json else: output_json = get_top_hashtags_from_twitter_api(country=country, debug=debug) # update output_list = json.loads(output_json) if append_db: output_list = hashtags_cache['content'] + output_list cache_db['hashtags']['content'] = output_list cache_db['hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) update_db(cache_db, database_path=db_path, debug=debug) return output_json
def images(): full_db = load_db(database_path=TRENDS_DATABASE_PATH) # from trends content # send back only a portion of the db contents = full_db['trends'] del full_db output_content = [] for c in contents: output_media_url = [] try: for t in c['tweets']: if t['media']: output_media_url.append({ "url": t['url'], "images": t['media'] }) except: continue output_content.append({ "label": c['label'], "time": c['time'], "media": output_media_url }) output_results = {"content": output_content} trends_output = {"results": output_results, "status": 'ok'} return jsonify(trends_output)
def all(): """ the full database. use to get updated db before updating container --- responses: 200: description: returns database schema: id: databaseGet properties: results: type: json default: {trends: {include_hashtags: {}, exclude_hashtags: {}, hashtags: {}} status: type: number default: 200 """ args = request.args.get('q') if not args: args = "main" if args == "main": full_db = load_db(database_path=DATABASE_PATH) db_init_timestamp = str_2_datetime( full_db['trends']['include_hashtags']['initial_timestamp'], input_format=time_format_full_with_timezone) db_update_timestamp = str_2_datetime( full_db['trends']['include_hashtags']['timestamp'], input_format=time_format_full_with_timezone) print("time since app start: {:.2f} minutes".format( (time.time() - start_time) / 60)) print("time since database init: {:.2f} hours".format( (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp).seconds / 3600)) print("time since last update: {:.2f} minutes".format( (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds / 60)) elif args == "trends": full_db = load_db(database_path=TRENDS_DATABASE_PATH) elif args == "top_posts": full_db = load_db(database_path=TOP_RETWEETS_DATABASE_PATH) return jsonify(full_db)
def check_db(db_path=top_retweets_db_path): db = load_db(db_path) c = db['top_posts'] [print(repr(x['text'])) for x in c] print('number of posts: ', len(c)) del db del c
def daily(): print("time since app start: {} minutes".format(str((time.time() - start_time) / 60))) print("time since last update: {} minutes".format(str((time.time() - update_start) / 60))) full_db = load_db(database_path=DATABASE_PATH) #print(full_db) return "hello {}".format('WAIT')
def all(): full_db = load_db(database_path=DATABASE_PATH) db_init_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['initial_timestamp'], input_format=time_format_full_with_timezone) db_update_timestamp = str_2_datetime(full_db['trends']['include_hashtags']['timestamp'], input_format=time_format_full_with_timezone) print("time since app start: {:.2f} minutes".format((time.time() - start_time) / 60)) print("time since database init: {:.2f} hours".format((datetime.datetime.now(tz=pytz.utc) - db_init_timestamp).seconds/3600)) print("time since last update: {:.2f} minutes".format((datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds/60)) return jsonify(full_db)
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True): cache_db = load_db(database_path=db_path, debug=False) trends_db = cache_db['trends'] if exclude_hashtags: trends_cache = trends_db['exclude_hashtags'] else: trends_cache = trends_db['include_hashtags'] # compare db and now try: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone) except ValueError: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone) db_timestamp = db_timestamp.astimezone(tz=pytz.utc) rq_timestamp = datetime.datetime.now(tz=pytz.utc) time_diff = rq_timestamp - db_timestamp print('time since last trends API call: {} (h:m:s)'.format(time_diff)) print('time diff in seconds: {}'.format(time_diff.seconds)) print('time in db: {}'.format(db_timestamp)) print('time in rq: {}'.format(rq_timestamp)) if time_diff.seconds < cache_duration_mins*60: print('less than cache duration, returning cache') output_json = json.dumps(trends_cache['content'], ensure_ascii=False) return output_json else: output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags) # update output_list = json.loads(output_json) if append_db: output_list = trends_cache['content'] + output_list if exclude_hashtags: cache_db['trends']['exclude_hashtags']['content'] = output_list cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) else: cache_db['trends']['include_hashtags']['content'] = output_list cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) update_db(cache_db, database_path=db_path, debug=debug) return output_json
def top_posts(): try: arg = int(request.args.get('count')) if not arg: arg = 100 except: arg = 100 full_db = load_db(database_path=TOP_RETWEETS_DATABASE_PATH) # from trends content # send back only a portion of the db contents = full_db['top_posts'] del full_db print('returning {} most recent items from db'.format(arg)) output = {"results": contents[arg * -1:], "status": "ok"} return jsonify(output)
def hashtags_twitter_only(): """ get list of latest tweets, locations, sentiment, and time --- parameters: - name: location in: query type: string required: true default: osaka responses: 200: description: returns a json list of tweets schema: id: predictionGet properties: results: type: json default: setosa status: type: number default: 200 """ print("time since app start: {:.2f} minutes".format( str((time.time() - start_time) / 60))) print("time since last update: {:.2f} minutes".format( str((time.time() - update_start) / 60))) full_db = load_db(database_path=DATABASE_PATH) direct_hashtags_from_trends = full_db['trends']['include_hashtags'][ 'content'] output = [] for t in direct_hashtags_from_trends: output.append(t['label']) output_str = '<br />'.join(output) return textify(output_str)
def get_update_top_posts_from_twitter(min_retweets=10000, cache_duration_mins=15, debug=False, append_db=True): """ also updates daily trends db, but doesn't return it :param country: :param exclude_hashtags: :param debug: :param cache_duration_mins: :param append_db: :return: """ # load retweets db top_retweets_db = load_db(database_path=top_retweets_db_path, debug=False) top_posts_cache = top_retweets_db['top_posts'] output_list = analyze_top_retweets(min_retweets=min_retweets, debug=debug) if append_db: # check for same post for o in output_list: # apparently [:] changes list in place (saves ram) # cuts away duplicates of tweets top_posts_cache[:] = [d for d in top_posts_cache if d.get('url') != o['url']] # adds new tweets at the end output_list = top_posts_cache + output_list top_retweets_db['top_posts'] = output_list update_db(top_retweets_db, database_path=top_retweets_db_path, debug=debug) print('top posts db updated.') del top_retweets_db del top_posts_cache del output_list print('memory freed.')
def trends(): """ loads trends database --- responses: 200: description: returns database schema: id: databaseGet properties: results: type: json default: {content: {}, timestamp: "", initial_timestamp: ""} status: type: string default: ok """ full_db = load_db(database_path=DATABASE_PATH) db_init_timestamp = str_2_datetime( full_db['trends']['include_hashtags']['initial_timestamp'], input_format=time_format_full_with_timezone) db_update_timestamp = str_2_datetime( full_db['trends']['include_hashtags']['timestamp'], input_format=time_format_full_with_timezone) print("time since app start: {:.2f} minutes".format( (time.time() - start_time) / 60)) print("time since database init: {}".format( (datetime.datetime.now(tz=pytz.utc) - db_init_timestamp))) print("time since last update: {:.2f} minutes".format( (datetime.datetime.now(tz=pytz.utc) - db_update_timestamp).seconds / 60)) print('\ndebug:') print('time now: {}'.format(datetime.datetime.now(tz=pytz.utc))) print('db init time: {}'.format(db_init_timestamp)) print('diff: {}'.format( datetime.datetime.now(tz=pytz.utc) - db_init_timestamp)) # send back only a portion of the db results = full_db['trends']['include_hashtags'] del full_db contents = results['content'] output_content = [] for c in contents: output_content.append({ "label": c['label'], "time": c['time'], "volume": c['volume'] }) output_results = { "content": output_content, "timestamp": results['timestamp'], "initial_timestamp": results['initial_timestamp'] } trends_output = {"results": output_results, "status": 'ok'} return jsonify(trends_output)
def backup(): backup_db = load_db(database_path=DATABASE_PATH + '.bak') return jsonify(backup_db)
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True): """ also updates daily trends db, but doesn't return it for trends, timestamp used is in time called :param country: :param exclude_hashtags: :param debug: :param cache_duration_mins: :param append_db: :return: """ # load main db cache_db = load_db(database_path=db_path, debug=False) trends_db = cache_db['trends'] if exclude_hashtags: trends_cache = trends_db['exclude_hashtags'] else: trends_cache = trends_db['include_hashtags'] # load trends + top retweets db trend_search_db = load_db(database_path=trends_db_path, debug=False) # MAIN_DB ONLY try: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone) except ValueError: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone) db_timestamp = db_timestamp.astimezone(tz=pytz.utc) rq_timestamp = datetime.datetime.now(tz=pytz.utc) time_diff = rq_timestamp - db_timestamp print('time since last trends API call: {} (h:m:s)'.format(time_diff)) print('time diff in seconds: {}'.format(time_diff.seconds)) print('time in db: {}'.format(db_timestamp)) print('time in rq: {}'.format(rq_timestamp)) if time_diff.seconds < cache_duration_mins*60: print('less than cache duration, returning cache') output_json = json.dumps(trends_cache['content'], ensure_ascii=False) return output_json else: output_json, img_output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags) # update output_list = json.loads(output_json) trend_search_list = json.loads(img_output_json) if append_db: output_list = trends_cache['content'] + output_list trend_search_list = trend_search_db['trends'] + trend_search_list if exclude_hashtags: cache_db['trends']['exclude_hashtags']['content'] = output_list cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) else: cache_db['trends']['include_hashtags']['content'] = output_list cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) trend_search_db['trends'] = trend_search_list update_db(cache_db, database_path=db_path, debug=debug) update_db(trend_search_db, database_path=trends_db_path, debug=debug) print('trends and image database updated.') del cache_db del trends_db del trends_cache del trend_search_db del trend_search_list del output_list del output_json del img_output_json print('memory freed.')