def normalized_and_story_count(q, fq, open_q): results = {} mc_api_key = base_cache.api_key() results['total'] = _cached_total_story_count(mc_api_key, q, fq)['count'] results['normalized_total'] = _cached_total_story_count( mc_api_key, open_q, fq)['count'] return results
def normalized_and_story_split_count(q, fq, open_q): results = {} counts = [] mc_api_key = base_cache.api_key() data = cached_story_split_count(mc_api_key, q, fq) all_stories = cached_story_split_count(mc_api_key, open_q, fq) for day in all_stories['counts']: day_info = { 'date': trimSolrDate(day['date']), 'total_count': day['count'] } matching = [d for d in data['counts'] if d['date'] == day['date']] if len(matching) == 0: day_info['count'] = 0 else: day_info['count'] = matching[0]['count'] if day_info['count'] == 0 or day['count'] == 0: day_info['ratio'] = 0 else: day_info['ratio'] = float(day_info['count']) / float(day['count']) counts.append(day_info) results['counts'] = sorted(counts, key=itemgetter('date')) results['total'] = sum([day['count'] for day in data['counts']]) results['normalized_total'] = sum([day['count'] for day in all_stories['counts']]) return results
def tag_set_coverage(total_q, subset_q, fq): api_key = base_cache.api_key() coverage = { 'totals': _cached_total_story_count(api_key, total_q, fq)['count'], 'counts': _cached_total_story_count(api_key, subset_q, fq)['count'], } coverage['coverage_percentage'] = 0 if coverage['totals'] is 0 else float(coverage['counts'])/float(coverage['totals']) return coverage
def story_list_page(q, fq, last_processed_stories_id=None, stories_per_page=1000, sort=mc.SORT_PROCESSED_STORIES_ID): return _cached_story_list_page(base_cache.api_key(), q, fq, last_processed_stories_id, stories_per_page, sort)
def _most_used_tags(q, fq, tag_sets_id): # top tags used in stories matching query (pass in None for no limit) api_key = base_cache.api_key() tags = _cached_most_used_tags(api_key, q, fq, tag_sets_id, 1000) # extract bogus NYT tags for t in tags: if is_bad_theme(t['tags_id']): tags.remove(t) return tags
def tag_set_coverage(total_q, subset_q, fq): api_key = base_cache.api_key() coverage = { 'totals': _cached_total_story_count(api_key, total_q, fq)['count'], 'counts': _cached_total_story_count(api_key, subset_q, fq)['count'], } coverage['coverage_percentage'] = 0 if coverage['totals'] is 0 else float( coverage['counts']) / float(coverage['totals']) return coverage
def _most_used_tags(q, fq, tag_sets_id): # top tags used in stories matching query (pass in None for no limit) api_key = base_cache.api_key() tags = _cached_most_used_tags(api_key, q, fq, tag_sets_id, TAG_COUNT_SAMPLE_SIZE) # extract bogus NYT tags for t in tags: if is_bad_theme(t['tags_id']): tags.remove(t) return tags
def normalized_and_story_split_count(q, open_q, start_date, end_date): results = {} fq = dates_as_filter_query(start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")) mc_api_key = base_cache.api_key() matching = cached_story_split_count(mc_api_key, q, fq) matching = add_missing_dates_to_split_story_counts(matching['counts'], start_date, end_date) total = cached_story_split_count(mc_api_key, open_q, fq) total = add_missing_dates_to_split_story_counts(total['counts'], start_date, end_date) results['counts'] = combined_split_and_normalized_counts(matching, total) results['total'] = sum([day['count'] for day in matching]) results['normalized_total'] = sum([day['count'] for day in total]) return results
def tag_set(tag_sets_id): return _cached_tag_set(base_cache.api_key(), tag_sets_id)
def word_count(q, fq, ngram_size, num_words, sample_size): api_key = base_cache.api_key() return _cached_word_count(api_key, q, fq, ngram_size, num_words, sample_size)
def normalized_and_story_count(q, fq, open_q): results = {} mc_api_key = base_cache.api_key() results['total'] = _cached_total_story_count(mc_api_key, q, fq)['count'] results['normalized_total'] = _cached_total_story_count(mc_api_key, open_q, fq)['count'] return results
def story_count(q, fq): api_key = base_cache.api_key() return _cached_total_story_count(api_key, q, fq)