def get_top_hashtags_from_twitter(country='Japan', debug=False, cache_duration_mins=15, append_db=True): cache_db = load_db(database_path=db_path, debug=False) hashtags_cache = cache_db['hashtags'] # compare db and now db_timestamp = str_2_datetime(hashtags_cache['timestamp'], input_format=time_format_full_with_timezone) db_timestamp = db_timestamp.astimezone(tz=pytz.utc) rq_timestamp = datetime.datetime.now(tz=pytz.utc) time_diff = rq_timestamp - db_timestamp print('time since last hashtags API call: {}'.format(time_diff)) if time_diff.seconds < cache_duration_mins * 60: # DB output_json = json.dumps(hashtags_cache['content'], ensure_ascii=False) return output_json else: output_json = get_top_hashtags_from_twitter_api(country=country, debug=debug) # update output_list = json.loads(output_json) if append_db: output_list = hashtags_cache['content'] + output_list cache_db['hashtags']['content'] = output_list cache_db['hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) update_db(cache_db, database_path=db_path, debug=debug) return output_json
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True): cache_db = load_db(database_path=db_path, debug=False) trends_db = cache_db['trends'] if exclude_hashtags: trends_cache = trends_db['exclude_hashtags'] else: trends_cache = trends_db['include_hashtags'] # compare db and now try: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone) except ValueError: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone) db_timestamp = db_timestamp.astimezone(tz=pytz.utc) rq_timestamp = datetime.datetime.now(tz=pytz.utc) time_diff = rq_timestamp - db_timestamp print('time since last trends API call: {} (h:m:s)'.format(time_diff)) print('time diff in seconds: {}'.format(time_diff.seconds)) print('time in db: {}'.format(db_timestamp)) print('time in rq: {}'.format(rq_timestamp)) if time_diff.seconds < cache_duration_mins*60: print('less than cache duration, returning cache') output_json = json.dumps(trends_cache['content'], ensure_ascii=False) return output_json else: output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags) # update output_list = json.loads(output_json) if append_db: output_list = trends_cache['content'] + output_list if exclude_hashtags: cache_db['trends']['exclude_hashtags']['content'] = output_list cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) else: cache_db['trends']['include_hashtags']['content'] = output_list cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) update_db(cache_db, database_path=db_path, debug=debug) return output_json
def get_top_trends_from_twitter_api(country='Japan', exclude_hashtags=True): """ what is it useful for? participation. from twitter API docs How can I participate in a trend? Simply post a Tweet including the exact word or phrase as it appears in the trends list (with the hashtag, if you see one). Due to the large number of people Tweeting about these specific trends, you may not always be able to find your particular Tweet in search, but your followers will always see your Tweets. twitter Ads API has a keyword insights endpoint ref: https://developer.twitter.com/en/docs/ads/audiences/api-reference/keyword-insights.html# :param filter: :return: """ # this stupid WOEID requires yweather to get (a library), because YAHOO itself has stopped supporting it # WOEID woeid_client = yweather.Client() woeid = woeid_client.fetch_woeid(location=country) if exclude_hashtags : trends = api.GetTrendsWoeid(woeid, exclude='hashtags') else: trends = api.GetTrendsWoeid(woeid, exclude=None) output = [] for trend in trends: trend = trend.AsDict() # get volumes try: tw_volume = int(trend['tweet_volume']), except: tw_volume = [0] # match time with timezone timestamp_str = trend['timestamp'] # this is utc timestamp_dt = str_2_datetime(timestamp_str, input_format=time_format_twitter_trends).replace(tzinfo=pytz.utc) # timestamp_local = timestamp_dt.astimezone(tz=pytz.utc) timestamp_utc_str = datetime_2_str(timestamp_dt, output_format=time_format_full_with_timezone) output.append({ "label": trend['name'], "volume": tw_volume, "time": timestamp_utc_str, "query": trend['query'], "url": trend['url'] }) output_json = json.dumps(output, ensure_ascii=False) return output_json
start_time = time.time() update_start = time.time() time_format_full_no_timezone = '%Y-%m-%d %H:%M:%S' time_format_full_with_timezone = '%Y-%m-%d %H:%M:%S%z' jp_timezone = pytz.timezone('Asia/Tokyo') DATABASE_PATH = './db/daily_database.json' TRENDS_DATABASE_PATH = './db/daily_trend_search_database.json' TOP_RETWEETS_DATABASE_PATH = './db/daily_top_rt_database.json' DATABASE_STRUCTURE = { "trends": { "include_hashtags": { "timestamp": '1999-01-01 00:00:00+0000', "initial_timestamp": datetime_2_str(datetime.datetime.now(tz=pytz.utc), output_format=time_format_full_with_timezone), "content": [] }, "exclude_hashtags": { "timestamp": '1999-01-01 00:00:00+0000', "initial_timestamp": datetime_2_str(datetime.datetime.now(tz=pytz.utc), output_format=time_format_full_with_timezone), "content": [] } }, "hashtags": { "timestamp": '1999-01-01 00:00:00+0000', "initial_timestamp":
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=False, debug=False, cache_duration_mins=15, append_db=True): """ also updates daily trends db, but doesn't return it for trends, timestamp used is in time called :param country: :param exclude_hashtags: :param debug: :param cache_duration_mins: :param append_db: :return: """ # load main db cache_db = load_db(database_path=db_path, debug=False) trends_db = cache_db['trends'] if exclude_hashtags: trends_cache = trends_db['exclude_hashtags'] else: trends_cache = trends_db['include_hashtags'] # load trends + top retweets db trend_search_db = load_db(database_path=trends_db_path, debug=False) # MAIN_DB ONLY try: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_with_timezone) except ValueError: db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone) db_timestamp = db_timestamp.astimezone(tz=pytz.utc) rq_timestamp = datetime.datetime.now(tz=pytz.utc) time_diff = rq_timestamp - db_timestamp print('time since last trends API call: {} (h:m:s)'.format(time_diff)) print('time diff in seconds: {}'.format(time_diff.seconds)) print('time in db: {}'.format(db_timestamp)) print('time in rq: {}'.format(rq_timestamp)) if time_diff.seconds < cache_duration_mins*60: print('less than cache duration, returning cache') output_json = json.dumps(trends_cache['content'], ensure_ascii=False) return output_json else: output_json, img_output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags) # update output_list = json.loads(output_json) trend_search_list = json.loads(img_output_json) if append_db: output_list = trends_cache['content'] + output_list trend_search_list = trend_search_db['trends'] + trend_search_list if exclude_hashtags: cache_db['trends']['exclude_hashtags']['content'] = output_list cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) else: cache_db['trends']['include_hashtags']['content'] = output_list cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_with_timezone) trend_search_db['trends'] = trend_search_list update_db(cache_db, database_path=db_path, debug=debug) update_db(trend_search_db, database_path=trends_db_path, debug=debug) print('trends and image database updated.') del cache_db del trends_db del trends_cache del trend_search_db del trend_search_list del output_list del output_json del img_output_json print('memory freed.')
def process_tweets(tweets_response, keep_all=False, debug=False): """ by default, processing discards tweets with no retweets or likes keep_all=False keeps all tweets, whether they have retweets or not :param tweets_response: :param keep_all: :param debug: :return: """ tweets = tweets_response #print(json.dumps(tweets, indent=4, ensure_ascii=False)) output_tweets = [] for tweet in tweets: # loop through every tweet output_tweet = {} output_tweet['likes'] = 0 for k, v in tweet.items(): if k == "favorite_count" or k == "retweeted_status": # print('checking favorite_count at {}'.format(k)) # print(v) if k == "favorite_count" and v: output_tweet['likes'] = v elif k == "retweeted_status" and v: # print("rt:", v) try: output_tweet['likes'] = v['favorite_count'] except: print('favorites not found') print(v) pass elif k == "media" and v: # turn media dict into img url output_tweet[k] = [] for m in v: output_tweet[k].append(m['media_url_https']) elif k == "id" and v: # make url from id and dispose id output_tweet['url'] = "https://twitter.com/anyuser/status/" + str(v) elif k == "retweet_count": if v: if debug: print(' picking this: ', k, v) output_tweet[k] = v else: if debug: print(' skipping this: ', k, v) # not keeping those with 0 RT output_tweet[k] = 0 elif k == "created_at": tweet_creation_time = str_2_datetime(v, input_format=time_format_twitter_created_at) tweet_checked_time = datetime.datetime.now(tz=pytz.utc) output_tweet['timestamp'] = { "created": datetime_2_str(tweet_creation_time, output_format=time_format_full_with_timezone), "last_checked": datetime_2_str(tweet_checked_time, output_format=time_format_full_with_timezone) } else: # keep k:v same if debug: print('keeping this: ', k, repr(v)) output_tweet[k] = v print('num of likes: ', output_tweet['likes']) output_tweets.append(output_tweet) output = [] if not keep_all: for o in output_tweets: if o['likes'] > 0 and o['retweet_count'] > 0: output.append(o) else: output = output_tweets return output
resources=r'*', supports_credentials=True) start_time = time.time() update_start = time.time() time_format_full_no_timezone = '%Y-%m-%d %H:%M:%S' time_format_full_with_timezone = '%Y-%m-%d %H:%M:%S%z' jp_timezone = pytz.timezone('Asia/Tokyo') DATABASE_PATH = './db/daily_database.json' DATABASE_STRUCTURE = { "trends": { "include_hashtags": { "timestamp": '1999-01-01 00:00:00', "initial_timestamp": datetime_2_str(datetime.datetime.now(), output_format=time_format_full_with_timezone), "content": [] }, "exclude_hashtags": { "timestamp": '1999-01-01 00:00:00', "initial_timestamp": datetime_2_str(datetime.datetime.now(), output_format=time_format_full_with_timezone), "content": [] } }, "hashtags": { "timestamp": '1999-01-01 00:00:00', "initial_timestamp": datetime_2_str(datetime.datetime.now(), output_format=time_format_full_with_timezone), "content": [] } }