コード例 #1
0
def translate_text(text='', target='ja', debug=False):
    from utils.baseutils import get_filepath
    from utils.db_utils import load_db, update_db
    db_dir = "/db"

    if target == 'ja':
        db_filename = "translation-to-ja-cache.json"
    elif target == 'en':
        db_filename = "translation-to-en-cache.json"
    else:
        raise SystemError(
            'no translation cache defined. define one before proceeding.')

    db_filepath = get_filepath(path.join(db_dir, db_filename))

    db_keyword_pair = load_db(database_path=db_filepath, debug=debug)
    try:
        output = db_keyword_pair[text]
        if debug: print('local keyword pair found!')
        return output
    except KeyError:
        if debug:
            print(
                'calling google translate to translate (will only happen once per word)'
            )
        response = translate_text_api(text=text, target=target, debug=debug)
        output = response['translatedText']
        db_keyword_pair[text] = output
        update_db(db_keyword_pair, database_path=db_filepath)
        return output
コード例 #2
0
def parse_entities(text='', debug=False):
    """
    entity level parsing
    :param text:
    :param debug:
    :return:
    """
    from utils.baseutils import get_filepath
    from utils.db_utils import load_db, update_db
    db_dir = "/db"
    db_filename = "entity-cache.json"

    db_filepath = get_filepath(path.join(db_dir, db_filename))

    db_keyword_pair = load_db(database_path=db_filepath, debug=debug)
    try:
        output = db_keyword_pair[text]
        if debug: print('local keyword pair found!')
        return output
    except KeyError:
        if debug:
            print(
                'calling google translate to translate (will only happen once per word)'
            )
        response = analyze_entities_api(text)
        print(response)
        raise
        response = translate_text_api(text=text, target=target, verbose=debug)
        output = response['translatedText']
        db_keyword_pair[text] = output
        update_db(db_keyword_pair, database_path=db_filepath)
        return output
コード例 #3
0
ファイル: twitter_api.py プロジェクト: panzerstadt/aodh-2018
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=True, debug=False, cache_duration_mins=15):
    cache_db = load_db(database_path=cache_db_filepath, debug=False)
    trends_db = cache_db['trends']
    if exclude_hashtags:
        trends_cache = trends_db['exclude_hashtags']
    else:
        trends_cache = trends_db['include_hashtags']

    # compare db and now
    db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone)
    rq_timestamp = datetime.datetime.now()

    time_diff = rq_timestamp - db_timestamp
    if time_diff.seconds < cache_duration_mins*60:
        output_json = json.dumps(trends_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags)
        # update
        output_dict = json.loads(output_json)
        if exclude_hashtags:
            cache_db['trends']['exclude_hashtags']['content'] = output_dict
            cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_no_timezone)
        else:
            cache_db['trends']['include_hashtags']['content'] = output_dict
            cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_no_timezone)

        update_db(cache_db, database_path=cache_db_filepath, debug=debug)
        return output_json
コード例 #4
0
def get_top_hashtags_from_twitter(country='Japan',
                                  debug=False,
                                  cache_duration_mins=15):
    cache_db = load_db(database_path=cache_db_filepath, debug=False)
    hashtags_cache = cache_db['hashtags']

    # compare db and now
    db_timestamp = str_2_datetime(hashtags_cache['timestamp'],
                                  input_format=time_format_full_no_timezone)
    rq_timestamp = datetime.datetime.now()

    time_diff = rq_timestamp - db_timestamp
    print('time diff: ', time_diff)
    if time_diff.seconds < cache_duration_mins * 60:
        # DB
        output_json = json.dumps(hashtags_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_hashtags_from_twitter_api(country=country,
                                                        debug=debug)
        # update
        output_dict = json.loads(output_json)
        cache_db['hashtags']['content'] = output_dict
        cache_db['hashtags']['timestamp'] = datetime_2_str(
            rq_timestamp, output_format=time_format_full_no_timezone)

        update_db(cache_db, database_path=cache_db_filepath, debug=debug)
        return output_json
コード例 #5
0
ファイル: cache_utils.py プロジェクト: panzerstadt/aodh-2018
        def wrapper(*args, **kwargs):
            # before
            db_path = get_filepath(db_filepath)

            cache_db = load_db(database_path=db_path, debug=debug)
            try:
                cached_output = cache_db[cache_key]
                if debug: print('local keyword pair found!')
                return cached_output
            except:
                print('running function to cache: {}'.format(db_path))
                # ---------------------------------------------
                output_to_cache = function_in(*args, **kwargs)
                # ---------------------------------------------
                # after
                cache_db[cache_key] = output_to_cache
                update_db(cache_db, database_path=db_filepath)
                return output_to_cache
コード例 #6
0
def analyze_sentiment(text='', debug=False):
    global sentiment_db

    db_entities_cache = sentiment_db
    db_filepath = sentiment_db_filepath

    try:
        output = db_entities_cache[text]
        if debug: print('entity previously analysed. returning cache')
        return output
    except KeyError:
        if debug: print('calling google API to analyze entities')
        response = analyze_sentiment_api(text=text)
        output = response.score
        print('sentiment {} -> {}'.format(text, output))
        db_entities_cache[text] = output
        update_db(db_entities_cache, database_path=db_filepath)

        # reload the variable 'entities_db' if updated
        sentiment_db = load_db(database_path=sentiment_db_filepath,
                               debug=False)
        return output