def topic_words(topics_id): if access_public_topic(topics_id): results = topic_word_counts(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None, q=None) elif is_user_logged_in(): results = topic_word_counts(user_mediacloud_key(), topics_id)[:200] else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) totals = [ ] # important so that these get reset on the client when they aren't requested logger.info(request.args) if (is_user_logged_in()) and ('withTotals' in request.args) and ( request.args['withTotals'] == "true"): # handle requests to return these results # and also data to compare it to for the whole topic focus totals = topic_word_counts(user_mediacloud_key(), topics_id, timespans_id=None, q=None) response = {'list': results, 'totals': totals} return jsonify(response)
def _topic_snapshot_list(topic): if access_public_topic(topic['topics_id']): local_mc = mc api_key = TOOL_API_KEY elif is_user_logged_in(): local_mc = user_mediacloud_client() api_key = user_mediacloud_key() else: return {} # prob something smarter we can do here snapshots = local_mc.topicSnapshotList(topic['topics_id']) snapshots = sorted(snapshots, key=itemgetter('snapshots_id')) # add in any missing version numbers for idx in range(0, len(snapshots)): if snapshots[idx]['note'] in [None, '']: snapshots[idx]['note'] = idx + ARRAY_BASE_ONE # seed_query story count topic['seed_query_story_count'] = _topic_seed_story_count(topic) # add foci_count for display snapshots = _add_snapshot_foci_count(api_key, topic['topics_id'], snapshots) snapshots = sorted(snapshots, key=lambda d: d['snapshot_date']) # extra stuff snapshot_status = mc.topicSnapshotGenerateStatus( topic['topics_id'])['job_states'] # need to know if one is running latest = snapshots[-1] if len(snapshots) > 0 else None return { 'list': snapshots, 'jobStatus': snapshot_status, 'latestVersion': latest['note'] if latest else 1, }
def topic_words(topics_id): sample_size = request.args['sample_size'] if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE if access_public_topic(topics_id): results = apicache.topic_word_counts(TOOL_API_KEY, topics_id, sample_size=sample_size, snapshots_id=None, timespans_id=None, foci_id=None, q=None) elif is_user_logged_in(): # grab the top words, respecting all the filters results = apicache.topic_word_counts(user_mediacloud_key(), topics_id, sample_size=sample_size) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) totals = [] # important so that these get reset on the client when they aren't requested logger.debug(request.args) if (is_user_logged_in()) and ('withTotals' in request.args) and (request.args['withTotals'] == "true"): # return along with the results for the overall timespan, to facilitate comparison snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) overall_timespan = _find_overall_timespan(topics_id, snapshots_id) totals = apicache.topic_word_counts(user_mediacloud_key(), topics_id, sample_size=sample_size, timespans_id=overall_timespan['timespans_id'], foci_id=None, q=None) response = { 'list': results[:WORD_COUNT_UI_NUM_WORDS], 'totals': totals[:WORD_COUNT_UI_NUM_WORDS], 'sample_size': str(sample_size) } return jsonify(response)
def _topic_summary(topics_id): if access_public_topic(topics_id): local_mc = mc elif is_user_logged_in(): local_mc = user_admin_mediacloud_client() else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) topic = local_mc.topic(topics_id) # add in snapshot and latest snapshot job status topic['snapshots'] = { 'list': local_mc.topicSnapshotList(topics_id), 'jobStatus': mc.topicSnapshotGenerateStatus(topics_id)['job_states'] # need to know if one is running } # add in spider job status topic['spiderJobs'] = local_mc.topicSpiderStatus(topics_id)['job_states'] if is_user_logged_in(): _add_user_favorite_flag_to_topics([topic]) ''' # add in story counts, overall seed and spidered feedTotal = topic_story_count(local_mc, topics_id) # with q - but not passed in for summary total = topic_story_count(local_mc, topics_id, timespans_id=None, q=None) # spidered count.. how? spidered = total - seedTotal topic['seedStories'] = seedTotal topic['spideredStories'] = spidered topic['totaltories'] = total ''' return topic
def map_files(topics_id): files = { 'wordMap': 'unsupported', 'linkMap': 'not rendered' } if access_public_topic(topics_id) or is_user_logged_in(): snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) map_type = MAP_TYPES[0] # no linkMaps yet prefix = _get_file_prefix(map_type, topics_id, timespans_id) lock_filename = prefix+".lock" rendered_filename = prefix+".gexf" # check if rendered file is there is_rendered = os.path.isfile(os.path.join(DATA_DIR, rendered_filename)) # logger.warn(os.path.join(DATA_DIR,rendered_filename)) # logger.warn(is_rendered) if is_rendered: status = 'rendered' else: lockfile_path = os.path.join(DATA_DIR, lock_filename) is_generating = os.path.isfile(lockfile_path) if not is_generating: status = 'starting' _start_generating_map_file(map_type, topics_id, timespans_id) else: status = 'generating' files[map_type] = status return jsonify(files) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
def map_files(topics_id): files = {'wordMap': 'unsupported', 'linkMap': 'not rendered'} if access_public_topic(topics_id) or is_user_logged_in(): snapshots_id, timespans_id, foci_id, q = filters_from_args( request.args) map_type = MAP_TYPES[0] # no linkMaps yet status = None prefix = _get_file_prefix(map_type, topics_id, timespans_id) lock_filename = prefix + ".lock" rendered_filename = prefix + ".gexf" # check if rendered file is there is_rendered = os.path.isfile(os.path.join(DATA_DIR, rendered_filename)) #logger.warn(os.path.join(DATA_DIR,rendered_filename)) #logger.warn(is_rendered) if is_rendered: status = 'rendered' else: is_generating = os.path.isfile( os.path.join(DATA_DIR, lock_filename)) if not is_generating: _start_generating_map_file(map_type, topics_id, timespans_id) status = 'generating' files[map_type] = status return jsonify(files) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'})
def topic_tag_coverage(topics_id, tags_id): ''' Useful for seeing how many stories in the topic are tagged with a specific tag ''' if isinstance(tags_id, list): # doesn't repect duck-typing, but quick fix tags_id_str = "({})".format(" ".join([str(tid) for tid in tags_id])) else: tags_id_str = str(tags_id) # respect any query filter the user has set query_with_tag = add_to_user_query( "tags_id_stories:{}".format(tags_id_str)) # now get the counts if access_public_topic(topics_id): total = topic_story_count(TOOL_API_KEY, topics_id) tagged = topic_story_count( TOOL_API_KEY, topics_id, q=query_with_tag) # force a count with just the query elif is_user_logged_in(): total = topic_story_count(user_mediacloud_key(), topics_id) tagged = topic_story_count( user_mediacloud_key(), topics_id, q=query_with_tag) # force a count with just the query else: return None return {'counts': {'count': tagged['count'], 'total': total['count']}}
def topic_stories(topics_id): if access_public_topic(topics_id): stories = topic_story_list(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None, q=None) elif is_user_logged_in(): stories = topic_story_list(user_mediacloud_key(), topics_id) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify(stories)
def topic_split_story_count(topics_id): if access_public_topic(topics_id): results = apicache.topic_split_story_counts(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None,q=None) elif is_user_logged_in(): results = apicache.topic_split_story_counts(user_mediacloud_key(), topics_id) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify({'results': results})
def topic_split_story_count(topics_id): if access_public_topic(topics_id): results = apicache.topic_split_story_counts(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None,q=None) elif is_user_logged_in(): results = apicache.topic_split_story_counts(user_mediacloud_key(), topics_id) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify({'results': results})
def topic_media(topics_id): if access_public_topic(topics_id): media_list = topic_media_list(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None, sort=None, limit=None, link_id=None) elif is_user_logged_in(): media_list = topic_media_list(user_mediacloud_key(), topics_id) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify(media_list)
def story_counts(topics_id): if access_public_topic(topics_id): local_key = TOOL_API_KEY elif is_user_logged_in(): local_key = user_mediacloud_key() else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) total = topic_story_count(local_key, topics_id, timespans_id=None, q=None) filtered = topic_story_count(local_key, topics_id) # force a count with just the query return jsonify({'counts': {'count': filtered['count'], 'total': total['count']}})
def topic_media(topics_id): if access_public_topic(topics_id): media_list = apicache.topic_media_list(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None, sort=None, limit=None, link_id=None) elif is_user_logged_in(): media_list = apicache.topic_media_list(user_mediacloud_key(), topics_id) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify(media_list)
def _public_safe_topic_story_count(topics_id, q): if access_public_topic(topics_id): total = topic_story_count(TOOL_API_KEY, topics_id, q=add_to_user_query(None)) matching = topic_story_count(TOOL_API_KEY, topics_id, q=add_to_user_query(q)) # force a count with just the query elif is_user_logged_in(): total = topic_story_count(user_mediacloud_key(), topics_id, q=add_to_user_query(None)) matching = topic_story_count(user_mediacloud_key(), topics_id, q=add_to_user_query(q)) # force a count with just the query else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify({'counts': {'count': matching['count'], 'total': total['count']}})
def story_counts(topics_id): if access_public_topic(topics_id): local_key = TOOL_API_KEY elif is_user_logged_in(): local_key = user_mediacloud_key() else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) total = apicache.topic_story_count(local_key, topics_id, timespans_id=None, snapshots_id=None, q=None, foci_id=None) filtered = apicache.topic_story_count(local_key, topics_id) return jsonify({'counts': {'count': filtered['count'], 'total': total['count']}})
def _public_safe_topic_story_count(topics_id, q): if access_public_topic(topics_id): total = apicache.topic_story_count(TOOL_API_KEY, topics_id, q=apicache.add_to_user_query(None)) # force a count with just the query matching = apicache.topic_story_count(TOOL_API_KEY, topics_id, q=apicache.add_to_user_query(q)) elif is_user_logged_in(): total = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(None)) # force a count with just the query matching = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(q)) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify({'counts': {'count': matching['count'], 'total': total['count']}})
def _topic_summary(topics_id): if access_public_topic(topics_id): local_mc = mc elif is_user_logged_in(): local_mc = user_mediacloud_client() else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) topic = local_mc.topic(topics_id) # add in snapshot list (with version numbers, by date) topic['snapshots'] = _topic_snapshot_list(topic) if is_user_logged_in(): add_user_favorite_flag_to_topics([topic]) return topic
def topic_sentence_count(topics_id): if access_public_topic(topics_id): response = topic_sentence_counts(TOOL_API_KEY, topics_id, snapshots_id=None, timespans_id=None, foci_id=None, q=None) elif is_user_logged_in(): response = topic_sentence_counts(user_mediacloud_key(), topics_id) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify(response)
def _topic_summary(topics_id): if access_public_topic(topics_id): local_mc = mc elif is_user_logged_in(): local_mc = user_admin_mediacloud_client() else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) topic = local_mc.topic(topics_id) # add in snapshot and latest snapshot job status topic['snapshots'] = { 'list': local_mc.topicSnapshotList(topics_id), 'jobStatus': mc.topicSnapshotGenerateStatus(topics_id)[ 'job_states'] # need to know if one is running } if is_user_logged_in(): _add_user_favorite_flag_to_topics([topic]) return topic
def story(topics_id, stories_id): local_mc = None if access_public_topic(topics_id): local_mc = mc story_topic_info = topic_story_list(TOOL_API_KEY, topics_id, stories_id=stories_id)['stories'][0] elif is_user_logged_in(): local_mc = user_mediacloud_client() story_topic_info = topic_story_list(user_mediacloud_key(), topics_id, stories_id=stories_id)['stories'][0] ''' all_fb_count = [] more_fb_count = True link_id = 0 while more_fb_count: fb_page = local_mc.topicStoryListFacebookData(topics_id, limit=100, link_id=link_id) all_fb_count = all_fb_count + fb_page['counts'] if 'next' in fb_page['link_ids']: link_id = fb_page['link_ids']['next'] more_fb_count = True else: more_fb_count = False for fb_item in all_fb_count: if int(fb_item['stories_id']) == int(stories_id): story_topic_info['facebook_collection_date'] = fb_item['facebook_api_collect_date'] ''' else: return jsonify({'status':'Error', 'message': 'Invalid attempt'}) story_info = local_mc.story(stories_id) # add in other fields from regular call for k in story_info.keys(): if k not in story_topic_info.keys(): story_topic_info[k] = story_info[k] for tag in story_info['story_tags']: if tag['tag_sets_id'] == tag_util.GEO_TAG_SET: geonames_id = int(tag['tag'][9:]) try: tag['geoname'] = _cached_geoname(geonames_id) except Exception as e: # query to CLIFF failed :-( handle it gracefully logger.exception(e) tag['geoname'] = {} return jsonify(story_topic_info)
def _topic_seed_story_count(topic): try: if access_public_topic(topic['topics_id']): api_key = TOOL_API_KEY else: api_key = user_mediacloud_key() seed_query_count = shared_apicache.story_count( api_key, q=concatenate_query_for_solr( solr_seed_query=topic['solr_seed_query'], media_ids=[m['media_id'] for m in topic['media']], tags_ids=[t['tags_id'] for t in topic['media_tags']]), fq=concatenate_solr_dates(start_date=topic['start_date'], end_date=topic['end_date']))['count'] except mediacloud.error.MCException: # the query syntax is wrong (perhaps pre-story-level search seed_query_count = None return seed_query_count
def topic_words(topics_id): sample_size = request.args[ 'sample_size'] if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE if access_public_topic(topics_id): results = apicache.topic_word_counts(TOOL_API_KEY, topics_id, sample_size=sample_size, snapshots_id=None, timespans_id=None, foci_id=None, q=None) elif is_user_logged_in(): # grab the top words, respecting all the filters results = apicache.topic_word_counts(user_mediacloud_key(), topics_id, sample_size=sample_size) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) totals = [ ] # important so that these get reset on the client when they aren't requested logger.debug(request.args) if (is_user_logged_in()) and ('withTotals' in request.args) and ( request.args['withTotals'] == "true"): # return along with the results for the overall timespan, to facilitate comparison snapshots_id, timespans_id, foci_id, q = filters_from_args( request.args) overall_timespan = _find_overall_timespan(topics_id, snapshots_id) totals = apicache.topic_word_counts( user_mediacloud_key(), topics_id, sample_size=sample_size, timespans_id=overall_timespan['timespans_id'], foci_id=None, q=None) response = { 'list': results[:WORD_COUNT_UI_NUM_WORDS], 'totals': totals[:WORD_COUNT_UI_NUM_WORDS], 'sample_size': str(sample_size) } return jsonify(response)
def topic_tag_coverage(topics_id, tags_id): ''' Useful for seeing how many stories in the topic are tagged with a specific tag ''' if isinstance(tags_id, list): # doesn't repect duck-typing, but quick fix tags_id_str = "({})".format(" ".join([str(tid) for tid in tags_id])) else: tags_id_str = str(tags_id) # respect any query filter the user has set query_with_tag = add_to_user_query("tags_id_stories:{}".format(tags_id_str)) # now get the counts if access_public_topic(topics_id): total = topic_story_count(TOOL_API_KEY, topics_id) tagged = topic_story_count(TOOL_API_KEY, topics_id, q=query_with_tag) # force a count with just the query elif is_user_logged_in(): total = topic_story_count(user_mediacloud_key(), topics_id) tagged = topic_story_count(user_mediacloud_key(), topics_id, q=query_with_tag) # force a count with just the query else: return None return {'counts': {'count': tagged['count'], 'total': total['count']}}
def topic_tag_coverage(topics_id, tags_id): ''' Useful for seeing how many stories in the topic are tagged with a specific tag ''' # respect any query filter the user has set snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) query_with_tag = add_to_user_query("tags_id_stories:{}".format(tags_id)) # now get the counts if access_public_topic(topics_id): total = topic_story_count(TOOL_API_KEY, topics_id) tagged = topic_story_count( TOOL_API_KEY, topics_id, q=query_with_tag) # force a count with just the query elif is_user_logged_in(): total = topic_story_count(user_mediacloud_key(), topics_id) tagged = topic_story_count( user_mediacloud_key(), topics_id, q=query_with_tag) # force a count with just the query else: return None return {'counts': {'count': tagged['count'], 'total': total['count']}}