def topic_words(topics_id):

    if access_public_topic(topics_id):
        results = topic_word_counts(TOOL_API_KEY,
                                    topics_id,
                                    snapshots_id=None,
                                    timespans_id=None,
                                    foci_id=None,
                                    q=None)
    elif is_user_logged_in():
        results = topic_word_counts(user_mediacloud_key(), topics_id)[:200]
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    totals = [
    ]  # important so that these get reset on the client when they aren't requested
    logger.info(request.args)
    if (is_user_logged_in()) and ('withTotals' in request.args) and (
            request.args['withTotals'] == "true"):
        # handle requests to return these results
        # and also data to compare it to for the whole topic focus
        totals = topic_word_counts(user_mediacloud_key(),
                                   topics_id,
                                   timespans_id=None,
                                   q=None)
    response = {'list': results, 'totals': totals}
    return jsonify(response)
def _topic_snapshot_list(topic):
    if access_public_topic(topic['topics_id']):
        local_mc = mc
        api_key = TOOL_API_KEY
    elif is_user_logged_in():
        local_mc = user_mediacloud_client()
        api_key = user_mediacloud_key()
    else:
        return {}  # prob something smarter we can do here
    snapshots = local_mc.topicSnapshotList(topic['topics_id'])
    snapshots = sorted(snapshots, key=itemgetter('snapshots_id'))
    # add in any missing version numbers
    for idx in range(0, len(snapshots)):
        if snapshots[idx]['note'] in [None, '']:
            snapshots[idx]['note'] = idx + ARRAY_BASE_ONE
    # seed_query story count
    topic['seed_query_story_count'] = _topic_seed_story_count(topic)
    # add foci_count for display
    snapshots = _add_snapshot_foci_count(api_key, topic['topics_id'],
                                         snapshots)
    snapshots = sorted(snapshots, key=lambda d: d['snapshot_date'])
    # extra stuff
    snapshot_status = mc.topicSnapshotGenerateStatus(
        topic['topics_id'])['job_states']  # need to know if one is running
    latest = snapshots[-1] if len(snapshots) > 0 else None
    return {
        'list': snapshots,
        'jobStatus': snapshot_status,
        'latestVersion': latest['note'] if latest else 1,
    }
def topic_words(topics_id):
    sample_size = request.args['sample_size'] if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE

    if access_public_topic(topics_id):
        results = apicache.topic_word_counts(TOOL_API_KEY, topics_id, sample_size=sample_size,
                                             snapshots_id=None, timespans_id=None, foci_id=None, q=None)
    elif is_user_logged_in():
        # grab the top words, respecting all the filters
        results = apicache.topic_word_counts(user_mediacloud_key(), topics_id, sample_size=sample_size)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    totals = []  # important so that these get reset on the client when they aren't requested
    logger.debug(request.args)
    if (is_user_logged_in()) and ('withTotals' in request.args) and (request.args['withTotals'] == "true"):
        # return along with the results for the overall timespan, to facilitate comparison
        snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
        overall_timespan = _find_overall_timespan(topics_id, snapshots_id)
        totals = apicache.topic_word_counts(user_mediacloud_key(), topics_id, sample_size=sample_size,
                                            timespans_id=overall_timespan['timespans_id'], foci_id=None, q=None)

    response = {
        'list': results[:WORD_COUNT_UI_NUM_WORDS],
        'totals': totals[:WORD_COUNT_UI_NUM_WORDS],
        'sample_size': str(sample_size)
    }
    return jsonify(response)
def _topic_summary(topics_id):
    if access_public_topic(topics_id):
        local_mc = mc
    elif is_user_logged_in():
        local_mc = user_admin_mediacloud_client()
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
    topic = local_mc.topic(topics_id)
    # add in snapshot and latest snapshot job status
    topic['snapshots'] = {
        'list': local_mc.topicSnapshotList(topics_id),
        'jobStatus': mc.topicSnapshotGenerateStatus(topics_id)['job_states']    # need to know if one is running
    }
    # add in spider job status
    topic['spiderJobs'] = local_mc.topicSpiderStatus(topics_id)['job_states']
    if is_user_logged_in():
        _add_user_favorite_flag_to_topics([topic])

    '''
    # add in story counts, overall seed and spidered
    feedTotal = topic_story_count(local_mc, topics_id) # with q - but not passed in for summary
    total = topic_story_count(local_mc, topics_id, timespans_id=None, q=None)  # spidered count.. how?
    spidered = total - seedTotal
    topic['seedStories'] = seedTotal
    topic['spideredStories'] = spidered
    topic['totaltories'] = total
    '''
    return topic
Exemple #5
0
def map_files(topics_id):
    files = { 
        'wordMap': 'unsupported',
        'linkMap': 'not rendered'
    }

    if access_public_topic(topics_id) or is_user_logged_in():
        snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
        map_type = MAP_TYPES[0]  # no linkMaps yet
        prefix = _get_file_prefix(map_type, topics_id, timespans_id)
        lock_filename = prefix+".lock"
        rendered_filename = prefix+".gexf"
        # check if rendered file is there
        is_rendered = os.path.isfile(os.path.join(DATA_DIR, rendered_filename))
        # logger.warn(os.path.join(DATA_DIR,rendered_filename))
        # logger.warn(is_rendered)
        if is_rendered:
            status = 'rendered'
        else:
            lockfile_path = os.path.join(DATA_DIR, lock_filename)
            is_generating = os.path.isfile(lockfile_path)
            if not is_generating:
                status = 'starting'
                _start_generating_map_file(map_type, topics_id, timespans_id)
            else:
                status = 'generating'
        files[map_type] = status
        return jsonify(files)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
def map_files(topics_id):
    files = {'wordMap': 'unsupported', 'linkMap': 'not rendered'}

    if access_public_topic(topics_id) or is_user_logged_in():
        snapshots_id, timespans_id, foci_id, q = filters_from_args(
            request.args)
        map_type = MAP_TYPES[0]  # no linkMaps yet
        status = None
        prefix = _get_file_prefix(map_type, topics_id, timespans_id)
        lock_filename = prefix + ".lock"
        rendered_filename = prefix + ".gexf"
        # check if rendered file is there
        is_rendered = os.path.isfile(os.path.join(DATA_DIR, rendered_filename))
        #logger.warn(os.path.join(DATA_DIR,rendered_filename))
        #logger.warn(is_rendered)
        if is_rendered:
            status = 'rendered'
        else:
            is_generating = os.path.isfile(
                os.path.join(DATA_DIR, lock_filename))
            if not is_generating:
                _start_generating_map_file(map_type, topics_id, timespans_id)
            status = 'generating'
        files[map_type] = status
        return jsonify(files)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
def topic_tag_coverage(topics_id, tags_id):
    '''
    Useful for seeing how many stories in the topic are tagged with a specific tag
    '''
    if isinstance(tags_id, list):  # doesn't repect duck-typing, but quick fix
        tags_id_str = "({})".format(" ".join([str(tid) for tid in tags_id]))
    else:
        tags_id_str = str(tags_id)
    # respect any query filter the user has set
    query_with_tag = add_to_user_query(
        "tags_id_stories:{}".format(tags_id_str))
    # now get the counts
    if access_public_topic(topics_id):
        total = topic_story_count(TOOL_API_KEY, topics_id)
        tagged = topic_story_count(
            TOOL_API_KEY, topics_id,
            q=query_with_tag)  # force a count with just the query
    elif is_user_logged_in():
        total = topic_story_count(user_mediacloud_key(), topics_id)
        tagged = topic_story_count(
            user_mediacloud_key(), topics_id,
            q=query_with_tag)  # force a count with just the query
    else:
        return None
    return {'counts': {'count': tagged['count'], 'total': total['count']}}
Exemple #8
0
def topic_stories(topics_id):
    if access_public_topic(topics_id):
        stories = topic_story_list(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None, q=None)
    elif is_user_logged_in():
        stories = topic_story_list(user_mediacloud_key(), topics_id)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    return jsonify(stories)
def topic_split_story_count(topics_id):
    if access_public_topic(topics_id):
        results = apicache.topic_split_story_counts(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None,q=None)
    elif is_user_logged_in():
        results = apicache.topic_split_story_counts(user_mediacloud_key(), topics_id)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    return jsonify({'results': results})
def topic_split_story_count(topics_id):
    if access_public_topic(topics_id):
        results = apicache.topic_split_story_counts(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None,q=None)
    elif is_user_logged_in():
        results = apicache.topic_split_story_counts(user_mediacloud_key(), topics_id)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    return jsonify({'results': results})
Exemple #11
0
def topic_media(topics_id):
    if access_public_topic(topics_id):
        media_list = topic_media_list(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None, sort=None, limit=None, link_id=None)
    elif is_user_logged_in():
        media_list = topic_media_list(user_mediacloud_key(), topics_id)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    return jsonify(media_list)
Exemple #12
0
def story_counts(topics_id):
    if access_public_topic(topics_id):
        local_key = TOOL_API_KEY
    elif is_user_logged_in():
        local_key = user_mediacloud_key()
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
    total = topic_story_count(local_key, topics_id, timespans_id=None, q=None)
    filtered = topic_story_count(local_key, topics_id)  # force a count with just the query
    return jsonify({'counts': {'count': filtered['count'], 'total': total['count']}})
Exemple #13
0
def topic_media(topics_id):
    if access_public_topic(topics_id):
        media_list = apicache.topic_media_list(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None,
                                               foci_id=None, sort=None, limit=None, link_id=None)
    elif is_user_logged_in():
        media_list = apicache.topic_media_list(user_mediacloud_key(), topics_id)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    return jsonify(media_list)
Exemple #14
0
def _public_safe_topic_story_count(topics_id, q):
    if access_public_topic(topics_id):
        total = topic_story_count(TOOL_API_KEY, topics_id, q=add_to_user_query(None))
        matching = topic_story_count(TOOL_API_KEY, topics_id, q=add_to_user_query(q))  # force a count with just the query
    elif is_user_logged_in():
        total = topic_story_count(user_mediacloud_key(), topics_id, q=add_to_user_query(None))
        matching = topic_story_count(user_mediacloud_key(), topics_id, q=add_to_user_query(q))  # force a count with just the query
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
    return jsonify({'counts': {'count': matching['count'], 'total': total['count']}})
def story_counts(topics_id):
    if access_public_topic(topics_id):
        local_key = TOOL_API_KEY
    elif is_user_logged_in():
        local_key = user_mediacloud_key()
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
    total = apicache.topic_story_count(local_key, topics_id, timespans_id=None, snapshots_id=None, q=None, foci_id=None)
    filtered = apicache.topic_story_count(local_key, topics_id)
    return jsonify({'counts': {'count': filtered['count'], 'total': total['count']}})
def _public_safe_topic_story_count(topics_id, q):
    if access_public_topic(topics_id):
        total = apicache.topic_story_count(TOOL_API_KEY, topics_id, q=apicache.add_to_user_query(None))
        # force a count with just the query
        matching = apicache.topic_story_count(TOOL_API_KEY, topics_id, q=apicache.add_to_user_query(q))
    elif is_user_logged_in():
        total = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(None))
        # force a count with just the query
        matching = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(q))
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
    return jsonify({'counts': {'count': matching['count'], 'total': total['count']}})
def _topic_summary(topics_id):
    if access_public_topic(topics_id):
        local_mc = mc
    elif is_user_logged_in():
        local_mc = user_mediacloud_client()
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
    topic = local_mc.topic(topics_id)
    # add in snapshot list (with version numbers, by date)
    topic['snapshots'] = _topic_snapshot_list(topic)
    if is_user_logged_in():
        add_user_favorite_flag_to_topics([topic])
    return topic
def topic_sentence_count(topics_id):
    if access_public_topic(topics_id):
        response = topic_sentence_counts(TOOL_API_KEY,
                                         topics_id,
                                         snapshots_id=None,
                                         timespans_id=None,
                                         foci_id=None,
                                         q=None)
    elif is_user_logged_in():
        response = topic_sentence_counts(user_mediacloud_key(), topics_id)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    return jsonify(response)
def _topic_summary(topics_id):
    if access_public_topic(topics_id):
        local_mc = mc
    elif is_user_logged_in():
        local_mc = user_admin_mediacloud_client()
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
    topic = local_mc.topic(topics_id)
    # add in snapshot and latest snapshot job status
    topic['snapshots'] = {
        'list': local_mc.topicSnapshotList(topics_id),
        'jobStatus': mc.topicSnapshotGenerateStatus(topics_id)[
            'job_states']  # need to know if one is running
    }
    if is_user_logged_in():
        _add_user_favorite_flag_to_topics([topic])
    return topic
def story(topics_id, stories_id):

    local_mc = None
    if access_public_topic(topics_id):
        local_mc = mc
        story_topic_info = topic_story_list(TOOL_API_KEY, topics_id, stories_id=stories_id)['stories'][0]
    elif is_user_logged_in():
        local_mc = user_mediacloud_client()
        story_topic_info = topic_story_list(user_mediacloud_key(), topics_id, stories_id=stories_id)['stories'][0]
        '''
        all_fb_count = []
        more_fb_count = True
        link_id = 0
        while more_fb_count:
            fb_page = local_mc.topicStoryListFacebookData(topics_id, limit=100, link_id=link_id)

            all_fb_count = all_fb_count + fb_page['counts']
            if 'next' in fb_page['link_ids']:
                link_id = fb_page['link_ids']['next']
                more_fb_count = True
            else:
                more_fb_count = False
   
        for fb_item in all_fb_count:
            if int(fb_item['stories_id']) == int(stories_id):
                story_topic_info['facebook_collection_date'] = fb_item['facebook_api_collect_date']
        '''
    else:
        return jsonify({'status':'Error', 'message': 'Invalid attempt'})

    
    story_info = local_mc.story(stories_id)  # add in other fields from regular call
    for k in story_info.keys():
        if k not in story_topic_info.keys():
            story_topic_info[k] = story_info[k]
    for tag in story_info['story_tags']:
        if tag['tag_sets_id'] == tag_util.GEO_TAG_SET:
            geonames_id = int(tag['tag'][9:])
            try:
                tag['geoname'] = _cached_geoname(geonames_id)
            except Exception as e:
                # query to CLIFF failed :-( handle it gracefully
                logger.exception(e)
                tag['geoname'] = {}
    return jsonify(story_topic_info)
def _topic_seed_story_count(topic):
    try:
        if access_public_topic(topic['topics_id']):
            api_key = TOOL_API_KEY
        else:
            api_key = user_mediacloud_key()
        seed_query_count = shared_apicache.story_count(
            api_key,
            q=concatenate_query_for_solr(
                solr_seed_query=topic['solr_seed_query'],
                media_ids=[m['media_id'] for m in topic['media']],
                tags_ids=[t['tags_id'] for t in topic['media_tags']]),
            fq=concatenate_solr_dates(start_date=topic['start_date'],
                                      end_date=topic['end_date']))['count']
    except mediacloud.error.MCException:
        # the query syntax is wrong (perhaps pre-story-level search
        seed_query_count = None
    return seed_query_count
Exemple #22
0
def topic_words(topics_id):
    sample_size = request.args[
        'sample_size'] if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE

    if access_public_topic(topics_id):
        results = apicache.topic_word_counts(TOOL_API_KEY,
                                             topics_id,
                                             sample_size=sample_size,
                                             snapshots_id=None,
                                             timespans_id=None,
                                             foci_id=None,
                                             q=None)
    elif is_user_logged_in():
        # grab the top words, respecting all the filters
        results = apicache.topic_word_counts(user_mediacloud_key(),
                                             topics_id,
                                             sample_size=sample_size)
    else:
        return jsonify({'status': 'Error', 'message': 'Invalid attempt'})

    totals = [
    ]  # important so that these get reset on the client when they aren't requested
    logger.debug(request.args)
    if (is_user_logged_in()) and ('withTotals' in request.args) and (
            request.args['withTotals'] == "true"):
        # return along with the results for the overall timespan, to facilitate comparison
        snapshots_id, timespans_id, foci_id, q = filters_from_args(
            request.args)
        overall_timespan = _find_overall_timespan(topics_id, snapshots_id)
        totals = apicache.topic_word_counts(
            user_mediacloud_key(),
            topics_id,
            sample_size=sample_size,
            timespans_id=overall_timespan['timespans_id'],
            foci_id=None,
            q=None)

    response = {
        'list': results[:WORD_COUNT_UI_NUM_WORDS],
        'totals': totals[:WORD_COUNT_UI_NUM_WORDS],
        'sample_size': str(sample_size)
    }
    return jsonify(response)
def topic_tag_coverage(topics_id, tags_id):
    '''
    Useful for seeing how many stories in the topic are tagged with a specific tag
    '''
    if isinstance(tags_id, list):   # doesn't repect duck-typing, but quick fix
        tags_id_str = "({})".format(" ".join([str(tid) for tid in tags_id]))
    else:
        tags_id_str = str(tags_id)
    # respect any query filter the user has set
    query_with_tag = add_to_user_query("tags_id_stories:{}".format(tags_id_str))
    # now get the counts
    if access_public_topic(topics_id):
        total = topic_story_count(TOOL_API_KEY, topics_id)
        tagged = topic_story_count(TOOL_API_KEY, topics_id, q=query_with_tag)  # force a count with just the query
    elif is_user_logged_in():
        total = topic_story_count(user_mediacloud_key(), topics_id)
        tagged = topic_story_count(user_mediacloud_key(), topics_id, q=query_with_tag)   # force a count with just the query
    else:
        return None
    return {'counts': {'count': tagged['count'], 'total': total['count']}}
def topic_tag_coverage(topics_id, tags_id):
    '''
    Useful for seeing how many stories in the topic are tagged with a specific tag
    '''
    # respect any query filter the user has set
    snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args)
    query_with_tag = add_to_user_query("tags_id_stories:{}".format(tags_id))
    # now get the counts
    if access_public_topic(topics_id):
        total = topic_story_count(TOOL_API_KEY, topics_id)
        tagged = topic_story_count(
            TOOL_API_KEY, topics_id,
            q=query_with_tag)  # force a count with just the query
    elif is_user_logged_in():
        total = topic_story_count(user_mediacloud_key(), topics_id)
        tagged = topic_story_count(
            user_mediacloud_key(), topics_id,
            q=query_with_tag)  # force a count with just the query
    else:
        return None
    return {'counts': {'count': tagged['count'], 'total': total['count']}}