def api_collection_search(search_str): public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True results = collection_search(search_str, public_only, VALID_COLLECTION_TAG_SETS_IDS) trim_count = MAX_COLLECTIONS if len(results) > 20 else len(results) trimmed = results[:trim_count] add_user_favorite_flag_to_collections(trimmed) return jsonify({'list': trimmed})
def collection_search(search_str): public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True results = _matching_collections_by_set(search_str, public_only) trimmed = [r[:MAX_COLLECTIONS] for r in results] flat_list = [item for sublist in trimmed for item in sublist] add_user_favorite_flag_to_collections(flat_list) return jsonify({'list': flat_list})
def api_mediapicker_collection_search(): t0 = time.time() use_pool = None add_source_counts = False public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True search_str = request.args['media_keyword'] tag_sets_id_list = request.args['which_set'].split(',') t1 = time.time() results = collection_search(search_str, public_only, tag_sets_id_list) t2 = time.time() trimmed_collections = results[:MAX_COLLECTIONS] # flat_list_of_collections = [item for sublist in trimmed_collections for item in sublist] set_of_queried_collections = [] if add_source_counts: if len(trimmed_collections) > 0: if use_pool: pool = Pool(processes=STORY_COUNT_POOL_SIZE) set_of_queried_collections = pool.map(collection_details_worker, trimmed_collections) pool.close() else: set_of_queried_collections = [collection_details_worker(c) for c in trimmed_collections] else: # skip adding in the source count details all together set_of_queried_collections = trimmed_collections t3 = time.time() if use_pool is not None: set_of_queried_collections = sorted(set_of_queried_collections, key=itemgetter('media_count'), reverse=True) t4 = time.time() logger.debug("total: {}".format(t4 - t0)) logger.debug(" load: {}".format(t1-t0)) logger.debug(" search: {}".format(t2 - t1)) logger.debug(" media_count: {}".format(t3 - t2)) logger.debug(" sort: {}".format(t4 - t3)) return jsonify({'list': set_of_queried_collections})
def source_stats(media_id): username = user_name() results = {} # story count media_query = "(media_id:{})".format(media_id) source_specific_story_count = apicache.source_story_count(media_query) results['story_count'] = source_specific_story_count # health media_health = _cached_media_source_health(username, media_id) results['num_stories_90'] = media_health[ 'num_stories_90'] if 'num_stories_90' in media_health else None results['start_date'] = media_health[ 'start_date'] if 'start_date' in media_health else None info = _media_source_details(media_id) user_can_see_private_collections = user_has_auth_role(ROLE_MEDIA_EDIT) visible_collections = [ c for c in info['media_source_tags'] if ((c['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS) and ( (c['show_on_media'] == 1) or user_can_see_private_collections)) ] results['collection_count'] = len(visible_collections) # geography tags results['geoPct'] = apicache.tag_coverage_pct(media_query, TAG_SET_GEOCODER_VERSION) # nyt theme results['nytPct'] = apicache.tag_coverage_pct(media_query, TAG_SET_NYT_LABELS_VERSION) return jsonify(results)
def api_collection_search(search_str): public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True results = collection_search(search_str, public_only, VALID_COLLECTION_TAG_SETS_IDS) trim_count = MAX_COLLECTIONS if len(results) > 20 else len(results) trimmed = results[:trim_count] #flat_list = [{'tags_id': t['tags_id'], 'tag_set_label':t['tag_set_label'],'label':t['label'], 'tag':t['tag']} for t in trimmed] add_user_favorite_flag_to_collections(trimmed) return jsonify({'list': trimmed})
def api_explorer_collection_set(tag_sets_id): """ Return a list of all the (public only or public and private, depending on user role) collections in a tag set. Not cached because this can change, and load time isn't terrible. :param tag_sets_id: the tag set to query for public collections :return: dict of info and list of collections in """ info = _tag_set_with_collections(tag_sets_id, user_has_auth_role(ROLE_MEDIA_EDIT)) # add_user_favorite_flag_to_collections(info['collections']) return jsonify(info)
def api_media_source_details(media_id): health = _cached_media_source_health(user_mediacloud_key(), media_id) info = _media_source_details(media_id) info['health'] = health user_mc = user_admin_mediacloud_client() if user_has_auth_role(ROLE_MEDIA_EDIT): info['scrape_status'] = user_mc.feedsScrapeStatus(media_id) # need to know if scrape is running else: info['scrape_status'] = None add_user_favorite_flag_to_sources([info]) add_user_favorite_flag_to_collections(info['media_source_tags']) analytics_db.increment_count(analytics_db.TYPE_MEDIA, media_id, analytics_db.ACTION_SOURCE_MGR_VIEW) return jsonify(info)
def api_media_source_details(media_id): health = _cached_media_source_health(user_mediacloud_key(), media_id) info = _media_source_details(media_id) info['health'] = health user_mc = user_admin_mediacloud_client() if user_has_auth_role(ROLE_MEDIA_EDIT): info['scrape_status'] = user_mc.feedsScrapeStatus( media_id) # need to know if scrape is running else: info['scrape_status'] = None add_user_favorite_flag_to_sources([info]) add_user_favorite_flag_to_collections(info['media_source_tags']) return jsonify(info)
def api_collection_sources(collection_id): add_in_details = False if ('details' in request.args) and (request.args['details'] == 'true'): add_in_details = True results = {'tags_id': collection_id} media_in_collection = media_with_tag(user_mediacloud_key(), collection_id) add_user_favorite_flag_to_sources(media_in_collection) if add_in_details and user_has_auth_role(ROLE_MEDIA_EDIT): media_in_collection = fetch_collection_source_feed_info( media_in_collection) results['sources'] = media_in_collection return jsonify(results)
def api_explorer_collection_set(tag_sets_id): """ Return a list of all the (public only or public and private, depending on user role) collections in a tag set. Not cached because this can change, and load time isn't terrible. :param tag_sets_id: the tag set to query for public collections :return: dict of info and list of collections in """ if is_user_logged_in() and user_has_auth_role(ROLE_MEDIA_EDIT) is True: info = _tag_set_with_private_collections(tag_sets_id) else: info = _tag_set_with_public_collections(tag_sets_id) # add_user_favorite_flag_to_collections(info['collections']) return jsonify(info)
def api_explorer_collection_set(tag_sets_id): ''' Return a list of all the (public only or public and private, depending on user role) collections in a tag set. Not cached because this can change, and load time isn't terrible. :param tag_sets_id: the tag set to query for public collections :return: dict of info and list of collections in ''' info = [] if is_user_logged_in() and user_has_auth_role(ROLE_MEDIA_EDIT) == True: info = _tag_set_with_private_collections(tag_sets_id) else: info = _tag_set_with_public_collections(tag_sets_id) #_add_user_favorite_flag_to_collections(info['collections']) return jsonify(info)
def api_mediapicker_source_search(): public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True search_str = request.args['mediaKeyword'] results = _matching_sources_by_set(search_str, public_only) # from pool trimmed_sources = [r[:MAX_SOURCES] for r in results] flat_list_of_sources = [ item for sublist in trimmed_sources for item in sublist ] set_of_queried_sources = [] if len(flat_list_of_sources) > 0: pool = Pool(processes=STORY_COUNT_POOL_SIZE) set_of_queried_sources = pool.map(source_details_worker, flat_list_of_sources) pool.terminate() # extra s return jsonify({'list': set_of_queried_sources})
def api_collection_set(tag_sets_id): ''' Return a list of all the (public only or public and private, depending on user role) collections in a tag set. Not cached because this can change, and load time isn't terrible. :param tag_sets_id: the tag set to query for public collections :return: dict of info and list of collections in ''' if user_has_auth_role(ROLE_MEDIA_EDIT): info = apicache.tag_set_with_private_collections( user_mediacloud_key(), tag_sets_id) else: info = apicache.tag_set_with_public_collections( user_mediacloud_key(), tag_sets_id) add_user_favorite_flag_to_collections(info['tags']) # rename to make more sense here info['collections'] = sorted(info['tags'], key=itemgetter('label', 'tag')) del info['tags'] return jsonify(info)
def api_collection_set(tag_sets_id): """ Return a list of all the (public only or public and private, depending on user role) collections in a tag set. Not cached because this can change, and load time isn't terrible. :param tag_sets_id: the tag set to query for public collections :return: dict of info and list of collections in """ if user_has_auth_role(ROLE_MEDIA_EDIT): info = apicache.tag_set_with_private_collections(user_mediacloud_key(), tag_sets_id) else: info = apicache.tag_set_with_public_collections(user_mediacloud_key(), tag_sets_id) add_user_favorite_flag_to_collections(info['tags']) # rename to make more sense here for t in info['tags']: t['sort_key'] = t['label'] if t['label'] else t['tag'] info['collections'] = sorted(info['tags'], key=itemgetter('sort_key')) del info['tags'] return jsonify(info)
def source_stats(media_id): username = user_name() user_mc = user_admin_mediacloud_client() results = {} # story count media_query = "(media_id:{})".format(media_id) source_specific_story_count = cached_source_story_count( username, media_query) results['story_count'] = source_specific_story_count # health media_health = _cached_media_source_health(username, media_id) results['num_stories_90'] = media_health[ 'num_stories_90'] if 'num_stories_90' in media_health else None results['start_date'] = media_health[ 'start_date'] if 'start_date' in media_health else None info = _media_source_details(media_id) user_can_see_private_collections = user_has_auth_role(ROLE_MEDIA_EDIT) visible_collections = [ c for c in info['media_source_tags'] if ((c['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS) and ( (c['show_on_media'] == 1) or user_can_see_private_collections)) ] results['collection_count'] = len(visible_collections) # geography tags tag_specific_story_count = user_mc.storyTagCount( solr_query=media_query, tag_sets_id=TAG_SET_GEOCODER_VERSION) ratio_geo_tagged_count = float( tag_specific_story_count[0]['count']) / float( source_specific_story_count) if len( tag_specific_story_count) > 0 else 0 results['geoPct'] = ratio_geo_tagged_count # nyt theme tag_specific_story_count = user_mc.storyTagCount( solr_query=media_query, tag_sets_id=TAG_SET_NYT_LABELS_VERSION) ratio_nyt_tagged_count = float( tag_specific_story_count[0]['count']) / float( source_specific_story_count) if len( tag_specific_story_count) > 0 else 0 results['nytPct'] = ratio_nyt_tagged_count return jsonify(results)
def api_collection_sources(collection_id): add_in_details = False if ('details' in request.args) and (request.args['details'] == 'true'): add_in_details = True results = {'tags_id': collection_id} media_in_collection = media_with_tag(user_mediacloud_key(), collection_id) add_user_favorite_flag_to_sources(media_in_collection) if add_in_details and user_has_auth_role(ROLE_MEDIA_EDIT): # for editing users, add in last scrape and active feed count (if requested) pool = Pool(processes=FEED_SCRAPE_JOB_POOL_SIZE) jobs = [m['media_id'] for m in media_in_collection] job_results = pool.map(_media_list_edit_worker, jobs) # blocks until they are all done job_by_media_id = {j['media_id']: j for j in job_results} for m in media_in_collection: m['latest_scrape_job'] = job_by_media_id[ m['media_id']]['latest_scrape_job'] m['active_feed_count'] = job_by_media_id[ m['media_id']]['active_feed_count'] pool.terminate() results['sources'] = media_in_collection return jsonify(results)
def source_stats(media_id): username = user_name() user_mc = user_admin_mediacloud_client() results = {} # story count media_query = "(media_id:{})".format(media_id) source_specific_story_count = apicache.source_story_count(user_mediacloud_key(), media_query) results['story_count'] = source_specific_story_count # health media_health = _cached_media_source_health(username, media_id) results['num_stories_90'] = media_health['num_stories_90'] if 'num_stories_90' in media_health else None results['start_date'] = media_health['start_date'] if 'start_date' in media_health else None info = _media_source_details(media_id) user_can_see_private_collections = user_has_auth_role(ROLE_MEDIA_EDIT) visible_collections = [c for c in info['media_source_tags'] if ((c['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS) and ((c['show_on_media'] == 1) or user_can_see_private_collections))] results['collection_count'] = len(visible_collections) # geography tags results['geoPct'] = apicache.tag_coverage_pct(user_mediacloud_key(), media_query, TAG_SET_GEOCODER_VERSION) # nyt theme results['nytPct'] = apicache.tag_coverage_pct(user_mediacloud_key(), media_query, TAG_SET_NYT_LABELS_VERSION) return jsonify(results)
def download_sources_csv(all_media, file_prefix): if user_has_auth_role(ROLE_MEDIA_EDIT): what_type_download = SOURCES_TEMPLATE_PROPS_EDIT else: what_type_download = SOURCES_TEMPLATE_PROPS_VIEW # no editor_notes return download_media_csv(all_media, file_prefix, what_type_download)