def translate_text(text='', target='ja', debug=False):
    from utils.baseutils import get_filepath
    from utils.db_utils import load_db, update_db
    db_dir = "/db"

    if target == 'ja':
        db_filename = "translation-to-ja-cache.json"
    elif target == 'en':
        db_filename = "translation-to-en-cache.json"
    else:
        raise SystemError(
            'no translation cache defined. define one before proceeding.')

    db_filepath = get_filepath(path.join(db_dir, db_filename))

    db_keyword_pair = load_db(database_path=db_filepath, debug=debug)
    try:
        output = db_keyword_pair[text]
        if debug: print('local keyword pair found!')
        return output
    except KeyError:
        if debug:
            print(
                'calling google translate to translate (will only happen once per word)'
            )
        response = translate_text_api(text=text, target=target, debug=debug)
        output = response['translatedText']
        db_keyword_pair[text] = output
        update_db(db_keyword_pair, database_path=db_filepath)
        return output
Exemple #2
0
def get_top_trends_from_twitter(country='Japan', exclude_hashtags=True, debug=False, cache_duration_mins=15):
    cache_db = load_db(database_path=cache_db_filepath, debug=False)
    trends_db = cache_db['trends']
    if exclude_hashtags:
        trends_cache = trends_db['exclude_hashtags']
    else:
        trends_cache = trends_db['include_hashtags']

    # compare db and now
    db_timestamp = str_2_datetime(trends_cache['timestamp'], input_format=time_format_full_no_timezone)
    rq_timestamp = datetime.datetime.now()

    time_diff = rq_timestamp - db_timestamp
    if time_diff.seconds < cache_duration_mins*60:
        output_json = json.dumps(trends_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_trends_from_twitter_api(country=country, exclude_hashtags=exclude_hashtags)
        # update
        output_dict = json.loads(output_json)
        if exclude_hashtags:
            cache_db['trends']['exclude_hashtags']['content'] = output_dict
            cache_db['trends']['exclude_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_no_timezone)
        else:
            cache_db['trends']['include_hashtags']['content'] = output_dict
            cache_db['trends']['include_hashtags']['timestamp'] = datetime_2_str(rq_timestamp, output_format=time_format_full_no_timezone)

        update_db(cache_db, database_path=cache_db_filepath, debug=debug)
        return output_json
def parse_entities(text='', debug=False):
    """
    entity level parsing
    :param text:
    :param debug:
    :return:
    """
    from utils.baseutils import get_filepath
    from utils.db_utils import load_db, update_db
    db_dir = "/db"
    db_filename = "entity-cache.json"

    db_filepath = get_filepath(path.join(db_dir, db_filename))

    db_keyword_pair = load_db(database_path=db_filepath, debug=debug)
    try:
        output = db_keyword_pair[text]
        if debug: print('local keyword pair found!')
        return output
    except KeyError:
        if debug:
            print(
                'calling google translate to translate (will only happen once per word)'
            )
        response = analyze_entities_api(text)
        print(response)
        raise
        response = translate_text_api(text=text, target=target, verbose=debug)
        output = response['translatedText']
        db_keyword_pair[text] = output
        update_db(db_keyword_pair, database_path=db_filepath)
        return output
def get_top_hashtags_from_twitter(country='Japan',
                                  debug=False,
                                  cache_duration_mins=15):
    cache_db = load_db(database_path=cache_db_filepath, debug=False)
    hashtags_cache = cache_db['hashtags']

    # compare db and now
    db_timestamp = str_2_datetime(hashtags_cache['timestamp'],
                                  input_format=time_format_full_no_timezone)
    rq_timestamp = datetime.datetime.now()

    time_diff = rq_timestamp - db_timestamp
    print('time diff: ', time_diff)
    if time_diff.seconds < cache_duration_mins * 60:
        # DB
        output_json = json.dumps(hashtags_cache['content'], ensure_ascii=False)
        return output_json
    else:
        output_json = get_top_hashtags_from_twitter_api(country=country,
                                                        debug=debug)
        # update
        output_dict = json.loads(output_json)
        cache_db['hashtags']['content'] = output_dict
        cache_db['hashtags']['timestamp'] = datetime_2_str(
            rq_timestamp, output_format=time_format_full_no_timezone)

        update_db(cache_db, database_path=cache_db_filepath, debug=debug)
        return output_json
Exemple #5
0
def get_posts_from_hashtags_list(hashtags_list_in):
    cache_db = load_db(database_path=cache_db_filepath, debug=False)
    hashtags_db = cache_db['hashtags']

    post_dict = {}
    for hashtag in hashtags_list_in:
        print('searching hashtag: {}'.format(hashtag))
        response = api.GetSearch(term=hashtag + '-filter:retweets', include_entities=True, return_json=True)
        post_dict[hashtag] = response

    return post_dict
Exemple #6
0
        def wrapper(*args, **kwargs):
            # before
            db_path = get_filepath(db_filepath)

            cache_db = load_db(database_path=db_path, debug=debug)
            try:
                cached_output = cache_db[cache_key]
                if debug: print('local keyword pair found!')
                return cached_output
            except:
                print('running function to cache: {}'.format(db_path))
                # ---------------------------------------------
                output_to_cache = function_in(*args, **kwargs)
                # ---------------------------------------------
                # after
                cache_db[cache_key] = output_to_cache
                update_db(cache_db, database_path=db_filepath)
                return output_to_cache
def analyze_sentiment(text='', debug=False):
    global sentiment_db

    db_entities_cache = sentiment_db
    db_filepath = sentiment_db_filepath

    try:
        output = db_entities_cache[text]
        if debug: print('entity previously analysed. returning cache')
        return output
    except KeyError:
        if debug: print('calling google API to analyze entities')
        response = analyze_sentiment_api(text=text)
        output = response.score
        print('sentiment {} -> {}'.format(text, output))
        db_entities_cache[text] = output
        update_db(db_entities_cache, database_path=db_filepath)

        # reload the variable 'entities_db' if updated
        sentiment_db = load_db(database_path=sentiment_db_filepath,
                               debug=False)
        return output
from utils.baseutils import get_filepath
import os.path as path

# analyzed entities cache
try:
    db_dir = "db"
    sentiment_db_filename = "sentiment-cache.json"
    sentiment_db_filepath = get_filepath(
        path.join(db_dir, sentiment_db_filename))
except:
    db_dir = "aodh-backend/db"
    sentiment_db_filename = "sentiment-cache.json"
    sentiment_db_filepath = get_filepath(
        path.join(db_dir, sentiment_db_filename))

sentiment_db = load_db(database_path=sentiment_db_filepath, debug=False)

from hidden.hidden import GoogleAPI
API_KEY = GoogleAPI().api_key


def translate_text_api(text='', target='ja', debug=False):
    """Translates text into the target language.

    Target must be an ISO 639-1 language code.
    See https://g.co/cloud/translate/v2/translate-reference#supported_languages
    """
    translate_client = translate.Client()

    if isinstance(text, six.binary_type):
        text = text.decode('utf-8')