def topic_compare_subtopic_top_words(topics_id):
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    selected_focal_sets_id = request.args['focal_sets_id']
    word_count = request.args['word_count'] if 'word_count' in request.args else 20
    # first we need to figure out which timespan they are working on
    selected_snapshot_timespans = apicache.cached_topic_timespan_list(user_mediacloud_key(), topics_id,
                                                                      snapshots_id=snapshots_id)
    selected_timespan = None
    for t in selected_snapshot_timespans:
        if t['timespans_id'] == int(timespans_id):
            selected_timespan = t
    try:
        focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, selected_focal_sets_id)
    except ValueError:
        return json_error_response('Invalid Focal Set Id')
    timespans = apicache.matching_timespans_in_foci(topics_id, selected_timespan, focal_set['foci'])
    for idx in range(0, len(timespans)):
        data = apicache.topic_word_counts(user_mediacloud_key(), topics_id,
                                          timespans_id=timespans[idx]['timespans_id'])
        focal_set['foci'][idx]['top_words'] = data
    # stitch together the counts to download now
    data = []
    headers = [f['name'] for f in focal_set['foci']]
    for idx in range(0, word_count):
        row = {f['name']: "{} ({})".format(f['top_words'][idx]['term'], f['top_words'][idx]['count'])
               for f in focal_set['foci']}
        data.append(row)
    return csv.stream_response(data, headers,
                               'topic-{}-subtopic-{}-{}-top-words-comparison'.format(
                                   topics_id, focal_set['name'], selected_focal_sets_id))
Exemple #2
0
def topic_compare_subtopic_top_words(topics_id):
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    selected_focal_sets_id = request.args['focal_sets_id']
    word_count = request.args['word_count'] if 'word_count' in request.args else 20
    # first we need to figure out which timespan they are working on
    selected_snapshot_timespans = apicache.cached_topic_timespan_list(user_mediacloud_key(), topics_id, snapshots_id=snapshots_id)
    selected_timespan = None
    for t in selected_snapshot_timespans:
        if t['timespans_id'] == int(timespans_id):
            selected_timespan = t
    focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, selected_focal_sets_id)
    timespans = apicache.matching_timespans_in_foci(topics_id, selected_timespan, focal_set['foci'])
    for idx in range(0, len(timespans)):
        data = apicache.topic_word_counts(user_mediacloud_key(), topics_id,
                                          timespans_id=timespans[idx]['timespans_id'])
        focal_set['foci'][idx]['top_words'] = data
    # stitch together the counts to download now
    data = []
    headers = [f['name'] for f in focal_set['foci']]
    for idx in range(0, word_count):
        row = {f['name']: u"{} ({})".format(f['top_words'][idx]['term'], f['top_words'][idx]['count'])
               for f in focal_set['foci']}
        data.append(row)
    return csv.stream_response(data, headers,
                               'topic-{}-subtopic-{}-{}-top-words-comparison'.format(
                                   topics_id, focal_set['name'], selected_focal_sets_id))
Exemple #3
0
def topic_focal_set_split_stories_compare(topics_id, focal_sets_id):
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    # need the timespan info, to find the appropriate timespan with each focus
    try:
        base_timespan = base_snapshot_timespan(topics_id)
        focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, focal_sets_id)
    except ValueError as e:
        return json_error_response(e.message)
    # collect the story split counts for each foci
    timespans = apicache.matching_timespans_in_foci(topics_id, base_timespan, focal_set['foci'])
    for idx in range(0, len(timespans)):
        data = apicache.topic_split_story_counts(user_mediacloud_key(), topics_id, snapshots_id=snapshots_id,
                                                 timespans_id=timespans[idx]['timespans_id'])
        focal_set['foci'][idx]['split_story_counts'] = data
    return jsonify(focal_set)
def topic_focal_set_split_stories_compare(topics_id, focal_sets_id):
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    # need the timespan info, to find the appropriate timespan with each focus
    base_snapshot_timespans = apicache.cached_topic_timespan_list(
        user_mediacloud_key(), topics_id, snapshots_id=snapshots_id)
    # if they have a focus selected, we need to find the appropriate overall timespan
    if foci_id is not None:
        timespan = apicache.topic_timespan(topics_id, snapshots_id, foci_id,
                                           timespans_id)
        for t in base_snapshot_timespans:
            if apicache.is_timespans_match(timespan, t):
                base_timespan = t
    else:
        base_timespan = None
        for t in base_snapshot_timespans:
            if t['timespans_id'] == int(timespans_id):
                base_timespan = t
                logger.info('base timespan = %s', timespans_id)
    if base_timespan is None:
        return json_error_response("Couldn't find the timespan you specified")
    # iterate through to find the one of interest
    focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id,
                                         snapshots_id, focal_sets_id)
    if focal_set is None:
        return json_error_response('Invalid Focal Set Id')
    # collect the story split counts for each foci
    timespans = apicache.matching_timespans_in_foci(topics_id, base_timespan,
                                                    focal_set['foci'])
    for idx in range(0, len(timespans)):
        data = apicache.topic_split_story_counts(
            user_mediacloud_key(),
            topics_id,
            snapshots_id=snapshots_id,
            timespans_id=timespans[idx]['timespans_id'])
        focal_set['foci'][idx]['split_story_counts'] = data
    return jsonify(focal_set)