def _public_safe_topic_story_count(topics_id, q): if access_public_topic(topics_id): total = topic_story_count(TOOL_API_KEY, topics_id, q=add_to_user_query(None)) matching = topic_story_count(TOOL_API_KEY, topics_id, q=add_to_user_query(q)) # force a count with just the query elif is_user_logged_in(): total = topic_story_count(user_mediacloud_key(), topics_id, q=add_to_user_query(None)) matching = topic_story_count(user_mediacloud_key(), topics_id, q=add_to_user_query(q)) # force a count with just the query else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify({'counts': {'count': matching['count'], 'total': total['count']}})
def _public_safe_topic_story_count(topics_id, q): if access_public_topic(topics_id): total = apicache.topic_story_count(TOOL_API_KEY, topics_id, q=apicache.add_to_user_query(None)) # force a count with just the query matching = apicache.topic_story_count(TOOL_API_KEY, topics_id, q=apicache.add_to_user_query(q)) elif is_user_logged_in(): total = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(None)) # force a count with just the query matching = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(q)) else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) return jsonify({'counts': {'count': matching['count'], 'total': total['count']}})
def _public_safe_topic_story_count(topics_id, q): total = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(None)) # force a count with just the query matching = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=apicache.add_to_user_query(q)) return jsonify( {'counts': { 'count': matching['count'], 'total': total['count'] }})
def story_words_csv(topics_id, stories_id): query = add_to_user_query('stories_id:'+stories_id) ngram_size = request.args['ngram_size'] if 'ngram_size' in request.args else 1 # default to word count word_counts = topic_ngram_counts(user_mediacloud_key(), topics_id, ngram_size, q=query, num_words=WORD_COUNT_DOWNLOAD_NUM_WORDS) return csv.stream_response(word_counts, WORD_COUNT_DOWNLOAD_COLUMNS, 'topic-{}-story-{}-sampled-ngrams-{}-word'.format(topics_id, stories_id, ngram_size))
def _matching_ratio(topics_id, query_clause): total = apicache.topic_story_count(user_mediacloud_key(), topics_id) sub_query_clause = None if query_clause: sub_query_clause = apicache.add_to_user_query(query_clause) matching = apicache.topic_story_count(user_mediacloud_key(), topics_id, q=sub_query_clause) return {'count': matching['count'], 'total': total['count']}
def media_words_csv(topics_id, media_id): query = apicache.add_to_user_query('media_id:'+media_id) ngram_size = request.args['ngram_size'] if 'ngram_size' in request.args else 1 # default to word count word_counts = apicache.topic_ngram_counts(user_mediacloud_key(), topics_id, ngram_size=ngram_size, q=query, num_words=WORD_COUNT_DOWNLOAD_NUM_WORDS, sample_size=WORD_COUNT_DOWNLOAD_SAMPLE_SIZE) return csv.stream_response(word_counts, apicache.WORD_COUNT_DOWNLOAD_COLUMNS, 'topic-{}-media-{}-sampled-ngrams-{}-word'.format(topics_id, media_id, ngram_size))
def topic_words_csv(topics_id): query = apicache.add_to_user_query(None) sample_size = request.args['sample_size'] if 'sample_size' in request.args else WORD_COUNT_SAMPLE_SIZE ngram_size = request.args['ngram_size'] if 'ngram_size' in request.args else 1 # default to word count word_counts = apicache.topic_ngram_counts(user_mediacloud_key(), topics_id, ngram_size=ngram_size, q=query, num_words=WORD_COUNT_DOWNLOAD_NUM_WORDS, sample_size=sample_size) return csv.stream_response(word_counts, apicache.WORD_COUNT_DOWNLOAD_COLUMNS, 'topic-{}-sampled-ngrams-{}-word'.format(topics_id, ngram_size))
def topic_word_usage_sample(topics_id, word): # gotta respect the manual query if there is one q = apicache.add_to_user_query(word) # need to use tool API key here because non-admin users can't pull sentences results = apicache.topic_sentence_sample(TOOL_API_KEY, topics_id, sample_size=1000, q=q) # only pull the 5 words before and after so we aren't leaking full content to users fragments = [_sentence_fragment_around(word, s['sentence']) for s in results if s['sentence'] is not None] fragments = [f for f in fragments if f is not None] return jsonify({'fragments': fragments})
def topic_word_usage_sample(topics_id, word): # gotta respect the manual query if there is one q = apicache.add_to_user_query(word) # need to use tool API key here because non-admin users can't pull sentences results = apicache.topic_sentence_sample(TOOL_API_KEY, topics_id, sample_size=1000, q=q) # TODO: only pull the 5 words before and after so we fragments = [_sentence_fragment_around(word, s['sentence']) for s in results if s['sentence'] is not None] fragments = [f for f in fragments if f is not None] return jsonify({'fragments': fragments})
def topic_word_associated_words_csv(topics_id, word): query = apicache.add_to_user_query(word) ngram_size = request.args[ 'ngram_size'] if 'ngram_size' in request.args else 1 # default to word count word_counts = apicache.topic_ngram_counts(user_mediacloud_key(), topics_id, ngram_size=ngram_size, q=query) return csv.stream_response( word_counts, apicache.WORD_COUNT_DOWNLOAD_COLUMNS, 'topic-{}-{}-sampled-ngrams-{}-word'.format(topics_id, word, ngram_size))
def media_words(topics_id, media_id): query = apicache.add_to_user_query('media_id:' + media_id) word_list = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q=query)[:100] return jsonify(word_list)
def media_words(topics_id, media_id): query = apicache.add_to_user_query('media_id:'+media_id) word_list = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q=query)[:100] return jsonify(word_list)
def topic_word_associated_words(topics_id, word): query = apicache.add_to_user_query(word) response = apicache.topic_word_counts(user_mediacloud_key(), topics_id, q=query)[:100] return jsonify(response)
def media_words_csv(topics_id, media_id): query = add_to_user_query('media_id:'+media_id) word_list = topic_word_counts(user_mediacloud_key(), topics_id, q=query) props = ['term', 'stem', 'count'] return csv.stream_response(word_list, props, 'media-'+str(media_id)+'-words')