def update_metadata_for_sources(source_list): tags = [] for m in VALID_METADATA_IDS: mid = list(m.values())[0] mkey = list(m.keys())[0] tag_codes = tags_in_tag_set(TOOL_API_KEY, mid) for source in source_list: if mkey in source: metadata_tag_name = source[mkey] if metadata_tag_name not in ['', None]: # hack until we have a better match check if mkey == METADATA_PUB_COUNTRY_NAME: # template pub_### matching = [ t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name ] else: matching = [ t for t in tag_codes if t['tag'] == metadata_tag_name ] if matching and matching not in ['', None]: metadata_tag_id = matching[0]['tags_id'] logger.debug('found metadata to add %s', metadata_tag_id) tags.append( MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD)) # now do all the tags in parallel batches so it happens quickly if len(tags) > 0: chunks = [tags[x:x + 50] for x in range(0, len(tags), 50) ] # do 50 tags in each request _tag_media_job.map(chunks)
def update_metadata_for_sources(source_list): tags = [] for m in VALID_METADATA_IDS: mid = m.values()[0] mkey = m.keys()[0] tag_codes = tags_in_tag_set(TOOL_API_KEY, mid) for source in source_list: if mkey in source: metadata_tag_name = source[mkey] if metadata_tag_name not in ['', None]: # hack until we have a better match check matching = [] if mkey == METADATA_PUB_COUNTRY_NAME: # template pub_### matching = [t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name] else: matching = [t for t in tag_codes if t['tag'] == metadata_tag_name] if matching and matching not in ['', None]: metadata_tag_id = matching[0]['tags_id'] logger.debug('found metadata to add %s', metadata_tag_id) tags.append(MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD)) # now do all the tags in parallel batches so it happens quickly if len(tags) > 0: chunks = [tags[x:x + 50] for x in xrange(0, len(tags), 50)] # do 50 tags in each request use_pool = True if use_pool: pool = Pool(processes=MEDIA_METADATA_UPDATE_POOL_SIZE ) # process updates in parallel with worker function pool.map(_tag_media_worker, chunks) # blocks until they are all done pool.terminate() # extra safe garbage collection else: [_tag_media_worker(job) for job in chunks]
def media_type_story_counts(topics_id): tag_story_counts = [] media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) # grab the total stories total_stories = topic_story_count(user_mediacloud_key(), topics_id)['count'] # make a count for each tag based on media_id for tag in media_type_tags: query_clause = "tags_id_media:{}".format(tag['tags_id']) tagged_story_count = topic_story_count(user_mediacloud_key(), topics_id, q=query_clause)['count'] tag_story_counts.append({ 'label': tag['label'], 'tags_id': tag['tags_id'], 'count': tagged_story_count, 'pct': float(tagged_story_count) / float(total_stories) if total_stories > 0 else 0, # protect against div by zero }) return jsonify({'story_counts': tag_story_counts})
def update_metadata_for_sources(source_list): tags = [] for m in VALID_METADATA_IDS: mid = list(m.values())[0] mkey = list(m.keys())[0] tag_codes = tags_in_tag_set(TOOL_API_KEY, mid) for source in source_list: if mkey in source: metadata_tag_name = source[mkey] if metadata_tag_name not in ['', None]: # hack until we have a better match check if mkey == METADATA_PUB_COUNTRY_NAME: # template pub_### matching = [t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name] else: matching = [t for t in tag_codes if t['tag'] == metadata_tag_name] if matching and matching not in ['', None]: metadata_tag_id = matching[0]['tags_id'] logger.debug('found metadata to add %s', metadata_tag_id) tags.append(MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD)) # now do all the tags in parallel batches so it happens quickly if len(tags) > 0: chunks = [tags[x:x + 50] for x in range(0, len(tags), 50)] # do 50 tags in each request use_pool = False if use_pool: pool = Pool(processes=MEDIA_METADATA_UPDATE_POOL_SIZE) # process updates in parallel with worker function pool.map(_tag_media_worker, chunks) # blocks until they are all done pool.terminate() # extra safe garbage collection else: [_tag_media_worker(job) for job in chunks]
def media_type_coverage(topics_id): media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) # grab the total stories total_stories = topic_story_count(user_mediacloud_key(), topics_id)['count'] # count the stories in any media in tagged as media_type tags_ids = " ".join(str(tag['tags_id']) for tag in media_type_tags) query_clause = "tags_id_media:({})".format(tags_ids) tagged_story_count = topic_story_count(user_mediacloud_key(), topics_id, q=query_clause)['count'] return jsonify({'counts': {'count': tagged_story_count, 'total': total_stories}})
def media_type_coverage(topics_id): media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) # grab the total stories total_stories = topic_story_count(user_mediacloud_key(), topics_id)['count'] # count the stories in any media in tagged as media_type tags_ids = " ".join(str(tag['tags_id']) for tag in media_type_tags) query_clause = "tags_id_media:({})".format(tags_ids) tagged_story_count = topic_story_count(user_mediacloud_key(), topics_id, q=query_clause)['count'] return jsonify( {'counts': { 'count': tagged_story_count, 'total': total_stories }})
def media_type_story_counts(topics_id): tag_story_counts = [] media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) # grab the total stories total_stories = topic_story_count(user_mediacloud_key(), topics_id)['count'] # make a count for each tag based on media_id for tag in media_type_tags: query_clause = "tags_id_media:{}".format(tag['tags_id']) tagged_story_count = topic_story_count(user_mediacloud_key(), topics_id, q=query_clause)['count'] tag_story_counts.append({ 'label': tag['label'], 'tags_id': tag['tags_id'], 'count': tagged_story_count, 'pct': float(tagged_story_count)/float(total_stories) }) return jsonify({'story_counts': tag_story_counts})
def create_media_type_focal_set(topics_id): user_mc = user_mediacloud_client() # grab the focalSetName and focalSetDescription and then make one focal_set_name = request.form['focalSetName'] focal_set_description = request.form['focalSetDescription'] media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY new_focal_set = user_mc.topicFocalSetDefinitionCreate(topics_id, focal_set_name, focal_set_description, focal_technique) if 'focal_set_definitions_id' not in new_focal_set: return json_error_response('Unable to create the subtopic set') # now make the foci in it - one for each media type focus_def_results = [] for tag in media_type_tags: params = { 'name': tag['label'], 'description': "Stories from {} sources".format(tag['label']), 'query': "tags_id_media:{}".format(tag['tags_id']), 'focal_set_definitions_id': new_focal_set['focal_set_definitions_id'], } result = user_mc.topicFocusDefinitionCreate(topics_id, **params) focus_def_results.append(result) return {'success': True}
def create_media_type_focal_set(topics_id): user_mc = user_mediacloud_client() # grab the focalSetName and focalSetDescription and then make one focal_set_name = request.form['focalSetName'] focal_set_description = request.form['focalSetDescription'] media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY new_focal_set = user_mc.topicFocalSetDefinitionCreate( topics_id, focal_set_name, focal_set_description, focal_technique) if 'focal_set_definitions_id' not in new_focal_set: return json_error_response('Unable to create the subtopic set') # now make the foci in it - one for each media type focus_def_results = [] for tag in media_type_tags: params = { 'name': tag['label'], 'description': "Stories from {} sources".format(tag['label']), 'query': "tags_id_media:{}".format(tag['tags_id']), 'focal_set_definitions_id': new_focal_set['focal_set_definitions_id'], } result = user_mc.topicFocusDefinitionCreate(topics_id, **params) focus_def_results.append(result) return {'success': True}
def _cached_media_tags(tag_sets_id): partisanship_tags = tags_in_tag_set(TOOL_API_KEY, tag_sets_id) for tag in partisanship_tags: tag['query'] = "tags_id_media:{}".format(tag['tags_id']) return partisanship_tags
def get_media_types(): media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) return jsonify({'list': media_type_tags})