Python topic_word_counts 예제들, server.views.topics.apicache.topic_word_counts Python 예제들

예제 #1

0

파일 보기

파일: words.py 프로젝트: c4fcm/MediaMeter-TopicMapper

def topic_words(topics_id):
    sample_size = request.args['sample_size'] if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE

    if access_public_topic(topics_id):
        results = apicache.topic_word_counts(TOOL_API_KEY, topics_id, sample_size=sample_size,
                                             snapshots_id=None, timespans_id=None, foci_id=None, q=None)
    elif is_user_logged_in():
        # grab the top words, respecting all the filters
        results = apicache.topic_word_counts(user_mediacloud_key(), topics_id, sample_size=sample_size)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    totals = []  # important so that these get reset on the client when they aren't requested
    logger.debug(request.args)
    if (is_user_logged_in()) and ('withTotals' in request.args) and (request.args['withTotals'] == "true"):
        # return along with the results for the overall timespan, to facilitate comparison
        snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
        overall_timespan = _find_overall_timespan(topics_id, snapshots_id)
        totals = apicache.topic_word_counts(user_mediacloud_key(), topics_id, sample_size=sample_size,
                                            timespans_id=overall_timespan['timespans_id'], foci_id=None, q=None)

    response = {
        'list': results[:WORD_COUNT_UI_NUM_WORDS],
        'totals': totals[:WORD_COUNT_UI_NUM_WORDS],
        'sample_size': str(sample_size)
    }
    return jsonify(response)

예제 #2

0

파일 보기

파일: words.py 프로젝트: zhufyaxel/MediaCloud-Web-Tools

def topic_words(topics_id):

    if access_public_topic(topics_id):
        results = topic_word_counts(TOOL_API_KEY,
                                    topics_id,
                                    snapshots_id=None,
                                    timespans_id=None,
                                    foci_id=None,
                                    q=None)
    elif is_user_logged_in():
        results = topic_word_counts(user_mediacloud_key(), topics_id)[:200]
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    totals = [
    ]  # important so that these get reset on the client when they aren't requested
    logger.info(request.args)
    if (is_user_logged_in()) and ('withTotals' in request.args) and (
            request.args['withTotals'] == "true"):
        # handle requests to return these results
        # and also data to compare it to for the whole topic focus
        totals = topic_word_counts(user_mediacloud_key(),
                                   topics_id,
                                   timespans_id=None,
                                   q=None)
    response = {'list': results, 'totals': totals}
    return jsonify(response)

예제 #3

0

파일 보기

def topic_compare_subtopic_top_words(topics_id):
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    selected_focal_sets_id = request.args['focal_sets_id']
    word_count = request.args['word_count'] if 'word_count' in request.args else 20
    # first we need to figure out which timespan they are working on
    selected_snapshot_timespans = apicache.cached_topic_timespan_list(user_mediacloud_key(), topics_id, snapshots_id=snapshots_id)
    selected_timespan = None
    for t in selected_snapshot_timespans:
        if t['timespans_id'] == int(timespans_id):
            selected_timespan = t
    focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, selected_focal_sets_id)
    timespans = apicache.matching_timespans_in_foci(topics_id, selected_timespan, focal_set['foci'])
    for idx in range(0, len(timespans)):
        data = apicache.topic_word_counts(user_mediacloud_key(), topics_id,
                                          timespans_id=timespans[idx]['timespans_id'])
        focal_set['foci'][idx]['top_words'] = data
    # stitch together the counts to download now
    data = []
    headers = [f['name'] for f in focal_set['foci']]
    for idx in range(0, word_count):
        row = {f['name']: u"{} ({})".format(f['top_words'][idx]['term'], f['top_words'][idx]['count'])
               for f in focal_set['foci']}
        data.append(row)
    return csv.stream_response(data, headers,
                               'topic-{}-subtopic-{}-{}-top-words-comparison'.format(
                                   topics_id, focal_set['name'], selected_focal_sets_id))

예제 #4

0

파일 보기

def topic_w2v_timespan_embeddings(topics_id):
    args = {
        'snapshots_id': request.args.get('snapshotId'),
        'foci_id': request.args.get('focusId'),
        'q': request.args.get('q'),
    }

    # Retrieve embeddings for overall topic
    overall_word_counts = topic_word_counts(user_mediacloud_key(),
                                            topics_id,
                                            num_words=50,
                                            **args)
    overall_words = [x['term'] for x in overall_word_counts]
    overall_embeddings = {
        x['term']: (x['google_w2v_x'], x['google_w2v_y'])
        for x in overall_word_counts
    }

    # Retrieve top words for each timespan
    timespans = cached_topic_timespan_list(user_mediacloud_key(), topics_id,
                                           args['snapshots_id'],
                                           args['foci_id'])

    # Retrieve embeddings for each timespan
    p = Pool(processes=WORD2VEC_TIMESPAN_POOL_PROCESSES)
    func = partial(grab_timespan_embeddings, user_mediacloud_key(), topics_id,
                   args, overall_words, overall_embeddings)
    ts_embeddings = p.map(func, timespans)

    return jsonify({'list': ts_embeddings})

예제 #5

0

파일 보기

파일: words.py 프로젝트: c4fcm/MediaMeter-TopicMapper

def topic_compare_subtopic_top_words(topics_id):
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    selected_focal_sets_id = request.args['focal_sets_id']
    word_count = request.args['word_count'] if 'word_count' in request.args else 20
    # first we need to figure out which timespan they are working on
    selected_snapshot_timespans = apicache.cached_topic_timespan_list(user_mediacloud_key(), topics_id,
                                                                      snapshots_id=snapshots_id)
    selected_timespan = None
    for t in selected_snapshot_timespans:
        if t['timespans_id'] == int(timespans_id):
            selected_timespan = t
    try:
        focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, selected_focal_sets_id)
    except ValueError:
        return json_error_response('Invalid Focal Set Id')
    timespans = apicache.matching_timespans_in_foci(topics_id, selected_timespan, focal_set['foci'])
    for idx in range(0, len(timespans)):
        data = apicache.topic_word_counts(user_mediacloud_key(), topics_id,
                                          timespans_id=timespans[idx]['timespans_id'])
        focal_set['foci'][idx]['top_words'] = data
    # stitch together the counts to download now
    data = []
    headers = [f['name'] for f in focal_set['foci']]
    for idx in range(0, word_count):
        row = {f['name']: "{} ({})".format(f['top_words'][idx]['term'], f['top_words'][idx]['count'])
               for f in focal_set['foci']}
        data.append(row)
    return csv.stream_response(data, headers,
                               'topic-{}-subtopic-{}-{}-top-words-comparison'.format(
                                   topics_id, focal_set['name'], selected_focal_sets_id))

예제 #6

0

파일 보기

파일: words.py 프로젝트: Fa67/MediaCloud-Web-Tools

def topic_w2v_timespan_embeddings(topics_id):
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    # Retrieve embeddings for overall topic
    overall_word_counts = apicache.topic_word_counts(user_mediacloud_key(),
                                                     topics_id,
                                                     num_words=50,
                                                     snapshots_id=snapshots_id,
                                                     timespans_id=None,
                                                     foci_id=foci_id,
                                                     q=q)
    overall_words = [x['term'] for x in overall_word_counts]
    overall_embeddings = {
        x['term']: (x['google_w2v_x'], x['google_w2v_y'])
        for x in overall_word_counts
    }

    # Retrieve top words for each timespan
    timespans = apicache.cached_topic_timespan_list(user_mediacloud_key(),
                                                    topics_id, snapshots_id,
                                                    foci_id)

    # Retrieve embeddings for each timespan
    jobs = [{
        'api_key': user_mediacloud_key(),
        'topics_id': topics_id,
        'snapshots_id': snapshots_id,
        'foci_id': foci_id,
        'overall_words': overall_words,
        'overall_embeddings': overall_embeddings,
        'q': q,
        'timespan': t,
    } for t in timespans]
    embeddings_by_timespan = _get_all_timespan_embeddings(jobs)
    return jsonify({'list': embeddings_by_timespan})

예제 #7

0

파일 보기

def topic_words(topics_id):
    sample_size = request.args[
        'sample_size'] if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE

    if access_public_topic(topics_id):
        results = apicache.topic_word_counts(TOOL_API_KEY,
                                             topics_id,
                                             sample_size=sample_size,
                                             snapshots_id=None,
                                             timespans_id=None,
                                             foci_id=None,
                                             q=None)
    elif is_user_logged_in():
        # grab the top words, respecting all the filters
        results = apicache.topic_word_counts(user_mediacloud_key(),
                                             topics_id,
                                             sample_size=sample_size)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    totals = [
    ]  # important so that these get reset on the client when they aren't requested
    logger.debug(request.args)
    if (is_user_logged_in()) and ('withTotals' in request.args) and (
            request.args['withTotals'] == "true"):
        # return along with the results for the overall timespan, to facilitate comparison
        snapshots_id, timespans_id, foci_id, q = filters_from_args(
            request.args)
        overall_timespan = _find_overall_timespan(topics_id, snapshots_id)
        totals = apicache.topic_word_counts(
            user_mediacloud_key(),
            topics_id,
            sample_size=sample_size,
            timespans_id=overall_timespan['timespans_id'],
            foci_id=None,
            q=None)

    response = {
        'list': results[:WORD_COUNT_UI_NUM_WORDS],
        'totals': totals[:WORD_COUNT_UI_NUM_WORDS],
        'sample_size': str(sample_size)
    }
    return jsonify(response)

예제 #8

0

파일 보기

파일: topic.py 프로젝트: c4fcm/MediaMeter-TopicMapper

def topic_w2v_timespan_embeddings(topics_id):
    args = {
        'snapshots_id': request.args.get('snapshotId'),
        'foci_id': request.args.get('focusId'),
        'q': request.args.get('q'),
    }

    # Retrieve embeddings for overall topic
    overall_word_counts = topic_word_counts(user_mediacloud_key(), topics_id, num_words=50, **args)
    overall_words = [x['term'] for x in overall_word_counts]
    overall_embeddings = {x['term']: (x['google_w2v_x'], x['google_w2v_y']) for x in overall_word_counts}

    # Retrieve top words for each timespan
    timespans = cached_topic_timespan_list(user_mediacloud_key(), topics_id, args['snapshots_id'], args['foci_id'])

    # Retrieve embeddings for each timespan
    p = Pool(processes=WORD2VEC_TIMESPAN_POOL_PROCESSES)
    func = partial(grab_timespan_embeddings, user_mediacloud_key(), topics_id, args, overall_words, overall_embeddings)
    ts_embeddings = p.map(func, timespans)

    return jsonify({'list': ts_embeddings})

예제 #9

0

파일 보기

파일: media.py 프로젝트: c4fcm/MediaMeter-TopicMapper

def media_words(topics_id, media_id):
    query = apicache.add_to_user_query('media_id:'+media_id)
    word_list = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q=query)[:100]
    return jsonify(word_list)

예제 #10

0

파일 보기

def story_words(topics_id, stories_id):
    word_list = topic_word_counts(user_mediacloud_key(), topics_id, q='stories_id:'+stories_id)[:100]
    return jsonify(word_list)

예제 #11

0

파일 보기

파일: words.py 프로젝트: zhufyaxel/MediaCloud-Web-Tools

def topic_words_csv(topics_id):
    response = topic_word_counts(user_mediacloud_key(), topics_id)
    props = ['term', 'stem', 'count']
    return csv.stream_response(response, props, 'sampled-words')

예제 #12

0

파일 보기

def media_words_csv(topics_id, media_id):
    query = add_to_user_query('media_id:'+media_id)
    word_list = topic_word_counts(user_mediacloud_key(), topics_id, q=query)
    props = ['term', 'stem', 'count']
    return csv.stream_response(word_list, props, 'media-'+str(media_id)+'-words')

예제 #13

0

파일 보기

파일: words.py 프로젝트: zhufyaxel/MediaCloud-Web-Tools

def topic_word(topics_id, word):
    response = topic_word_counts(user_mediacloud_key(), topics_id, q=word)[:1]
    logger.info(response)
    return jsonify(response)

예제 #14

0

파일 보기

파일: words.py 프로젝트: zhufyaxel/MediaCloud-Web-Tools

def topic_word_associated_words_csv(topics_id, word):
    response = topic_word_counts(user_mediacloud_key(), topics_id, q=word)
    props = ['term', 'stem', 'count']
    return csv.stream_response(response, props,
                               'word-' + word + '-sampled-words')

예제 #15

0

파일 보기

파일: words.py 프로젝트: zhufyaxel/MediaCloud-Web-Tools

def topic_word_associated_words(topics_id, word):
    response = topic_word_counts(user_mediacloud_key(), topics_id,
                                 q=word)[:100]
    return jsonify(response)

예제 #16

0

파일 보기

def topic_word_associated_words(topics_id, word):
    query = apicache.add_to_user_query(word)
    response = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q=query)[:100]
    return jsonify(response)

예제 #17

0

파일 보기

파일: media.py 프로젝트: no-wa-ke/MediaCloud-Web-Tools

def media_words(topics_id, media_id):
    query = apicache.add_to_user_query('media_id:' + media_id)
    word_list = apicache.topic_word_counts(user_mediacloud_key(),
                                           topics_id,
                                           q=query)[:100]
    return jsonify(word_list)

예제 #18

0

파일 보기

파일: stories.py 프로젝트: zhufyaxel/MediaCloud-Web-Tools

def story_words_csv(topics_id, stories_id):
    word_list = topic_word_counts(user_mediacloud_key(), topics_id, q='stories_id:'+stories_id)
    props = ['term', 'stem', 'count']
    return csv.stream_response(word_list, props, 'story-'+str(stories_id)+'-words')

예제 #19

0

파일 보기

파일: words.py 프로젝트: c4fcm/MediaMeter-TopicMapper

def topic_word(topics_id, word):
    response = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q=word)[:1]
    return jsonify(response)

예제 #20

0

파일 보기

파일: words.py 프로젝트: c4fcm/MediaMeter-TopicMapper

def topic_word_associated_words(topics_id, word):
    query = apicache.add_to_user_query(word)
    response = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q=query)[:100]
    return jsonify(response)

예제 #21

0

파일 보기

파일: story.py 프로젝트: c4fcm/MediaMeter-TopicMapper

def story_words(topics_id, stories_id):
    word_list = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q='stories_id:'+stories_id)[:100]
    return jsonify(word_list)

예제 #22

0

파일 보기

파일: words.py 프로젝트: Fa67/MediaCloud-Web-Tools

def topic_word(topics_id, word):
    response = apicache.topic_word_counts(user_mediacloud_key(),
                                          topics_id,
                                          q=word)[:1]
    return jsonify(response)