def translate_text(text='', target='ja', debug=False): from utils.baseutils import get_filepath from utils.db_utils import load_db, update_db db_dir = "/db" if target == 'ja': db_filename = "translation-to-ja-cache.json" elif target == 'en': db_filename = "translation-to-en-cache.json" else: raise SystemError( 'no translation cache defined. define one before proceeding.') db_filepath = get_filepath(path.join(db_dir, db_filename)) db_keyword_pair = load_db(database_path=db_filepath, debug=debug) try: output = db_keyword_pair[text] if debug: print('local keyword pair found!') return output except KeyError: if debug: print( 'calling google translate to translate (will only happen once per word)' ) response = translate_text_api(text=text, target=target, debug=debug) output = response['translatedText'] db_keyword_pair[text] = output update_db(db_keyword_pair, database_path=db_filepath) return output
def parse_entities(text='', debug=False): """ entity level parsing :param text: :param debug: :return: """ from utils.baseutils import get_filepath from utils.db_utils import load_db, update_db db_dir = "/db" db_filename = "entity-cache.json" db_filepath = get_filepath(path.join(db_dir, db_filename)) db_keyword_pair = load_db(database_path=db_filepath, debug=debug) try: output = db_keyword_pair[text] if debug: print('local keyword pair found!') return output except KeyError: if debug: print( 'calling google translate to translate (will only happen once per word)' ) response = analyze_entities_api(text) print(response) raise response = translate_text_api(text=text, target=target, verbose=debug) output = response['translatedText'] db_keyword_pair[text] = output update_db(db_keyword_pair, database_path=db_filepath) return output
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=True, debug=False, cache_duration_mins=15): cache_db = load_db(database_path=cache_db_filepath, debug=False) trends_db = cache_db['trends'] if exclude_hashtags: trends_cache = trends_db['exclude_hashtags'] else: trends_cache = trends_db['include_hashtags'] # compare db and now db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone) rq_timestamp = datetime.datetime.now() time_diff = rq_timestamp - db_timestamp if time_diff.seconds < cache_duration_mins*60: output_json = json.dumps(trends_cache['content'], ensure_ascii=False) return output_json else: output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags) # update output_dict = json.loads(output_json) if exclude_hashtags: cache_db['trends']['exclude_hashtags']['content'] = output_dict cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_no_timezone) else: cache_db['trends']['include_hashtags']['content'] = output_dict cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_no_timezone) update_db(cache_db, database_path=cache_db_filepath, debug=debug) return output_json
def get_top_hashtags_from_twitter(country='Japan', debug=False, cache_duration_mins=15): cache_db = load_db(database_path=cache_db_filepath, debug=False) hashtags_cache = cache_db['hashtags'] # compare db and now db_timestamp = str_2_datetime(hashtags_cache['timestamp'], input_format=time_format_full_no_timezone) rq_timestamp = datetime.datetime.now() time_diff = rq_timestamp - db_timestamp print('time diff: ', time_diff) if time_diff.seconds < cache_duration_mins * 60: # DB output_json = json.dumps(hashtags_cache['content'], ensure_ascii=False) return output_json else: output_json = get_top_hashtags_from_twitter_api(country=country, debug=debug) # update output_dict = json.loads(output_json) cache_db['hashtags']['content'] = output_dict cache_db['hashtags']['timestamp'] = datetime_2_str( rq_timestamp, output_format=time_format_full_no_timezone) update_db(cache_db, database_path=cache_db_filepath, debug=debug) return output_json
def wrapper(*args, **kwargs): # before db_path = get_filepath(db_filepath) cache_db = load_db(database_path=db_path, debug=debug) try: cached_output = cache_db[cache_key] if debug: print('local keyword pair found!') return cached_output except: print('running function to cache: {}'.format(db_path)) # --------------------------------------------- output_to_cache = function_in(*args, **kwargs) # --------------------------------------------- # after cache_db[cache_key] = output_to_cache update_db(cache_db, database_path=db_filepath) return output_to_cache
def analyze_sentiment(text='', debug=False): global sentiment_db db_entities_cache = sentiment_db db_filepath = sentiment_db_filepath try: output = db_entities_cache[text] if debug: print('entity previously analysed. returning cache') return output except KeyError: if debug: print('calling google API to analyze entities') response = analyze_sentiment_api(text=text) output = response.score print('sentiment {} -> {}'.format(text, output)) db_entities_cache[text] = output update_db(db_entities_cache, database_path=db_filepath) # reload the variable 'entities_db' if updated sentiment_db = load_db(database_path=sentiment_db_filepath, debug=False) return output