def testTagMedia(self): media_to_tag = 4451 # ESPN.com test_tag_id1 = '9172171' # [email protected]:test_tag1 tag_set_name = TEST_USER_EMAIL # add a tag desired_tag = MediaTag(media_to_tag, tag_set_name, 'test_tag1', TAG_ACTION_ADD) response = self._mc.tagMedia([desired_tag]) self.assertTrue('success' in response) self.assertEqual(response['success'], 1) # and check it story = self._mc.media(media_to_tag) tags_on_media = [ t['tags_id'] for t in story['media_source_tags'] if t['tag_set'] == tag_set_name ] self.assertTrue(int(test_tag_id1) in tags_on_media) # and remove it desired_tag = MediaTag(media_to_tag, tag_set_name, 'test_tag1', TAG_ACTION_REMOVE) response = self._mc.tagMedia([desired_tag]) self.assertTrue('success' in response) self.assertEqual(response['success'], 1) story = self._mc.media(media_to_tag) tags_on_media = [ t['tags_id'] for t in story['media_source_tags'] if t['tag_set'] == tag_set_name ] self.assertEqual(0, len(tags_on_media))
def remove_sources_from_collection(collection_id): source_ids_to_remove = request.form['sources[]'].split(',') source_ids_to_remove = [int(s) for s in source_ids_to_remove] user_mc = user_admin_mediacloud_client() # get the sources in the collection first, then remove and add as needed existing_source_ids = [ int(m['media_id']) for m in media_with_tag(collection_id) ] source_ids_to_remain = list( set(existing_source_ids) - set(source_ids_to_remove)) media_to_remove = [ MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove ] media_to_remain = [ MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_remain ] # do I need to run similar or TAG_ACTION_REMOVE? current_media = media_to_remove + media_to_remain results = {} if len(current_media) > 0: results = user_mc.tagMedia(current_media) apicache.invalidate_collection_source_representation_cache( user_mediacloud_key(), collection_id) return jsonify(results)
def collection_update(collection_id): user_mc = user_admin_mediacloud_client() label = '{}'.format(request.form['name']) description = request.form['description'] static = request.form['static'] if 'static' in request.form else None show_on_stories = request.form['showOnStories'] if 'showOnStories' in request.form else None show_on_media = request.form['showOnMedia'] if 'showOnMedia' in request.form else None formatted_name = format_name_from_label(label) source_ids = [] if len(request.form['sources[]']) > 0: source_ids = [int(sid) for sid in request.form['sources[]'].split(',')] # first update the collection updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description, is_static=(static == 'true'), show_on_stories=(show_on_stories == 'true'), show_on_media=(show_on_media == 'true')) # get the sources in the collection first, then remove and add as needed existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)] source_ids_to_remove = list(set(existing_source_ids) - set(source_ids)) source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids] # logger.debug(existing_source_ids) # logger.debug(source_ids_to_add) # logger.debug(source_ids_to_remove) # then go through and tag all the sources specified with the new collection id tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add] tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove] tags = tags_to_add + tags_to_remove if len(tags) > 0: user_mc.tagMedia(tags) return jsonify(updated_collection['tag'])
def collection_create(): user_mc = user_admin_mediacloud_client( ) # has to be admin to call createTag label = '{}'.format(request.form['name']) description = request.form['description'] static = request.form['static'] if 'static' in request.form else None show_on_stories = request.form[ 'showOnStories'] if 'showOnStories' in request.form else None show_on_media = request.form[ 'showOnMedia'] if 'showOnMedia' in request.form else None source_ids = [] if len(request.form['sources[]']) > 0: source_ids = request.form['sources[]'].split(',') formatted_name = format_name_from_label(label) # first create the collection new_collection = user_mc.createTag( TAG_SETS_ID_COLLECTIONS, formatted_name, label, description, is_static=(static == 'true'), show_on_stories=(show_on_stories == 'true'), show_on_media=(show_on_media == 'true')) # then go through and tag all the sources specified with the new collection id tags = [ MediaTag(sid, tags_id=new_collection['tag']['tags_id'], action=TAG_ACTION_ADD) for sid in source_ids ] if len(tags) > 0: user_mc.tagMedia(tags) return jsonify(new_collection['tag'])
def update_metadata_for_sources(source_list): tags = [] for m in VALID_METADATA_IDS: mid = list(m.values())[0] mkey = list(m.keys())[0] tag_codes = tags_in_tag_set(TOOL_API_KEY, mid) for source in source_list: if mkey in source: metadata_tag_name = source[mkey] if metadata_tag_name not in ['', None]: # hack until we have a better match check if mkey == METADATA_PUB_COUNTRY_NAME: # template pub_### matching = [ t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name ] else: matching = [ t for t in tag_codes if t['tag'] == metadata_tag_name ] if matching and matching not in ['', None]: metadata_tag_id = matching[0]['tags_id'] logger.debug('found metadata to add %s', metadata_tag_id) tags.append( MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD)) # now do all the tags in parallel batches so it happens quickly if len(tags) > 0: chunks = [tags[x:x + 50] for x in range(0, len(tags), 50) ] # do 50 tags in each request _tag_media_job.map(chunks)
def update_metadata_for_sources(source_list): tags = [] for m in VALID_METADATA_IDS: mid = m.values()[0] mkey = m.keys()[0] tag_codes = tags_in_tag_set(TOOL_API_KEY, mid) for source in source_list: if mkey in source: metadata_tag_name = source[mkey] if metadata_tag_name not in ['', None]: # hack until we have a better match check matching = [] if mkey == METADATA_PUB_COUNTRY_NAME: # template pub_### matching = [t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name] else: matching = [t for t in tag_codes if t['tag'] == metadata_tag_name] if matching and matching not in ['', None]: metadata_tag_id = matching[0]['tags_id'] logger.debug('found metadata to add %s', metadata_tag_id) tags.append(MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD)) # now do all the tags in parallel batches so it happens quickly if len(tags) > 0: chunks = [tags[x:x + 50] for x in xrange(0, len(tags), 50)] # do 50 tags in each request use_pool = True if use_pool: pool = Pool(processes=MEDIA_METADATA_UPDATE_POOL_SIZE ) # process updates in parallel with worker function pool.map(_tag_media_worker, chunks) # blocks until they are all done pool.terminate() # extra safe garbage collection else: [_tag_media_worker(job) for job in chunks]
def source_create(): user_mc = user_admin_mediacloud_client() name = request.form['name'] url = request.form['url'] editor_notes = request.form[ 'editor_notes'] if 'editor_notes' in request.form else None # this is optional public_notes = request.form[ 'public_notes'] if 'public_notes' in request.form else None monitored = request.form[ 'monitored'] if 'monitored' in request.form else None # parse out any tag to add (ie. collections and metadata) tag_ids_to_add = tag_ids_from_collections_param() valid_metadata = [{ 'form_key': 'publicationCountry', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_COUNTRY }, { 'form_key': 'publicationState', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_STATE }, { 'form_key': 'primaryLanguageg', 'tag_sets_id': TAG_SETS_ID_PRIMARY_LANGUAGE }, { 'form_key': 'countryOfFocus', 'tag_sets_id': TAG_SETS_ID_COUNTRY_OF_FOCUS }, { 'form_key': 'mediaType', 'tag_sets_id': TAG_SETS_ID_MEDIA_TYPE }] source_to_create = { 'name': name, 'url': url, 'editor_notes': editor_notes, 'public_notes': public_notes, 'is_monitored': monitored, 'tags_ids': tag_ids_to_add } result = user_mc.mediaCreate([ source_to_create ])[0] # need just the first entry, since we only create one if result['status'] != "error": # if it worked, update any metadata, because we need to remove the other tags in each set for metadata_item in valid_metadata: metadata_tag_id = request.form[ metadata_item['form_key']] if metadata_item[ 'form_key'] in request.form else None # this is optional if metadata_tag_id: user_mc.tagMedia( tags=[ MediaTag(result['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD) ], clear_others=True ) # make sure to clear any other values set in this metadata tag set tag_ids_to_add.append(metadata_tag_id) if result['status'] == 'new': # if it is a really new source, kick off a scraping job to find any RSS feeds user_mc.feedsScrape(result['media_id']) return jsonify(result)
def source_update(media_id): user_mc = user_admin_mediacloud_client() # update the basic info name = request.form['name'] url = request.form['url'] editor_notes = request.form[ 'editor_notes'] if 'editor_notes' in request.form else None # this is optional public_notes = request.form[ 'public_notes'] if 'public_notes' in request.form else None # this is optional monitored = request.form[ 'monitored'] if 'monitored' in request.form else None result = user_mc.mediaUpdate( media_id, { 'url': url, 'name': name, 'editor_notes': editor_notes, 'is_monitored': monitored, 'public_notes': public_notes }) # now we need to update the collections separately, because they are tags on the media source source = user_mc.media(media_id) existing_tag_ids = [ t['tags_id'] for t in source['media_source_tags'] if (t['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS) ] tag_ids_to_add = tag_ids_from_collections_param() tag_ids_to_remove = list(set(existing_tag_ids) - set(tag_ids_to_add)) tags_to_add = [ MediaTag(media_id, tags_id=cid, action=TAG_ACTION_ADD) for cid in tag_ids_to_add if cid not in existing_tag_ids ] tags_to_remove = [ MediaTag(media_id, tags_id=cid, action=TAG_ACTION_REMOVE) for cid in tag_ids_to_remove ] tags = tags_to_add + tags_to_remove if len(tags) > 0: # don't make extraneous calls user_mc.tagMedia(tags=tags) # now update the metadata too valid_metadata = [{ 'form_key': 'publicationCountry', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_COUNTRY }, { 'form_key': 'publicationState', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_STATE }, { 'form_key': 'primaryLanguage', 'tag_sets_id': TAG_SETS_ID_PRIMARY_LANGUAGE }, { 'form_key': 'countryOfFocus', 'tag_sets_id': TAG_SETS_ID_COUNTRY_OF_FOCUS }, { 'form_key': 'mediaType', 'tag_sets_id': TAG_SETS_ID_MEDIA_TYPE }] for metadata_item in valid_metadata: metadata_tag_id = request.form[ metadata_item['form_key']] if metadata_item[ 'form_key'] in request.form else None # this is optional existing_tag_ids = [ t for t in source['media_source_tags'] if is_metadata_tag_set(t['tag_sets_id']) ] # form field check if metadata_tag_id in [None, '', 'null', 'undefined']: # we want to remove it if there was one there if len(existing_tag_ids) > 0: for remove_if_empty in existing_tag_ids: if metadata_item['tag_sets_id'] == remove_if_empty[ 'tag_sets_id']: tag = MediaTag(media_id, tags_id=remove_if_empty['tags_id'], action=TAG_ACTION_REMOVE) user_mc.tagMedia([tag]) elif metadata_tag_id not in existing_tag_ids: # need to add it and clear out the other tag = MediaTag(media_id, tags_id=metadata_tag_id, action=TAG_ACTION_ADD) user_mc.tagMedia([tag], clear_others=True) # result the success of the media update call - would be better to catch errors in any of these calls... return jsonify(result)