Beispiel #1
0
def topic_word_counts(user_mc_key, topics_id, **kwargs):
    # Return sampled word counts based on filters.
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    merged_args = {
        'snapshots_id': snapshots_id,
        'timespans_id': timespans_id,
        'foci_id': foci_id,
        'q': q,
        'sample_size': WORD_COUNT_SAMPLE_SIZE,
        'num_words': WORD_COUNT_UI_NUM_WORDS
    }
    merged_args.update(kwargs)    # passed in args override anything pulled form the request.args
    word_data = cached_topic_word_counts(user_mc_key, topics_id, **merged_args)
    words = [w['term'] for w in word_data]
    # and now add in word2vec model position data
    if len(words) > 0:
        google_word2vec_data = base_apicache.word2vec_google_2d(words)
        for i in range(len(google_word2vec_data)):
            word_data[i]['google_w2v_x'] = google_word2vec_data[i]['x']
            word_data[i]['google_w2v_y'] = google_word2vec_data[i]['y']
        topic_word2vec_data = _word2vec_topic_2d_results(topics_id, snapshots_id, words)
        for i in range(len(topic_word2vec_data)):
            word_data[i]['w2v_x'] = topic_word2vec_data[i]['x']
            word_data[i]['w2v_y'] = topic_word2vec_data[i]['y']

    return word_data
Beispiel #2
0
def query_wordcount(q,
                    fq,
                    ngram_size=1,
                    num_words=WORD_COUNT_UI_NUM_WORDS,
                    sample_size=WORD_COUNT_SAMPLE_SIZE):
    word_data = base_apicache.word_count(q,
                                         fq,
                                         ngram_size=ngram_size,
                                         num_words=num_words,
                                         sample_size=sample_size)
    # add in word2vec results
    words = [w['term'] for w in word_data]
    # and now add in word2vec model position data
    if len(words) > 0:
        google_word2vec_data = base_apicache.word2vec_google_2d(words)
        for i in range(len(google_word2vec_data)):
            word_data[i]['google_w2v_x'] = google_word2vec_data[i]['x']
            word_data[i]['google_w2v_y'] = google_word2vec_data[i]['y']

    return word_data
Beispiel #3
0
def _cached_word_count(user_mc_key,
                       q,
                       fq,
                       num_words,
                       sample_size=WORD_COUNT_SAMPLE_SIZE):
    api_client = mc if user_mc_key is None else user_admin_mediacloud_client()
    word_data = api_client.wordCount(q,
                                     fq,
                                     num_words=num_words,
                                     sample_size=sample_size)
    words = [w['term'] for w in word_data]
    word2vec_data = base_apicache.word2vec_google_2d(words)
    try:
        for i in range(len(word2vec_data)):
            word_data[i]['google_w2v_x'] = word2vec_data[i]['x']
            word_data[i]['google_w2v_y'] = word2vec_data[i]['y']
    except KeyError as e:
        logger.warning("Didn't get valid data back from word2vec call")
        logger.exception(e)
    return word_data