Beispiel #1
0
def api_collection_search(search_str):
    public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True
    results = collection_search(search_str, public_only, VALID_COLLECTION_TAG_SETS_IDS)
    trim_count = MAX_COLLECTIONS if len(results) > 20 else len(results)
    trimmed = results[:trim_count]
    add_user_favorite_flag_to_collections(trimmed)
    return jsonify({'list': trimmed})
def collection_search(search_str):
    public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True
    results = _matching_collections_by_set(search_str, public_only)
    trimmed = [r[:MAX_COLLECTIONS] for r in results]
    flat_list = [item for sublist in trimmed for item in sublist]
    add_user_favorite_flag_to_collections(flat_list)
    return jsonify({'list': flat_list})
Beispiel #3
0
def api_mediapicker_collection_search():
    t0 = time.time()
    use_pool = None
    add_source_counts = False
    public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True
    search_str = request.args['media_keyword']
    tag_sets_id_list = request.args['which_set'].split(',')
    t1 = time.time()
    results = collection_search(search_str, public_only, tag_sets_id_list)
    t2 = time.time()
    trimmed_collections = results[:MAX_COLLECTIONS]
    # flat_list_of_collections = [item for sublist in trimmed_collections for item in sublist]
    set_of_queried_collections = []
    if add_source_counts:
        if len(trimmed_collections) > 0:
            if use_pool:
                pool = Pool(processes=STORY_COUNT_POOL_SIZE)
                set_of_queried_collections = pool.map(collection_details_worker, trimmed_collections)
                pool.close()
            else:
                set_of_queried_collections = [collection_details_worker(c) for c in trimmed_collections]
    else:
        # skip adding in the source count details all together
        set_of_queried_collections = trimmed_collections
    t3 = time.time()
    if use_pool is not None:
        set_of_queried_collections = sorted(set_of_queried_collections, key=itemgetter('media_count'), reverse=True)
    t4 = time.time()
    logger.debug("total: {}".format(t4 - t0))
    logger.debug("  load: {}".format(t1-t0))
    logger.debug("  search: {}".format(t2 - t1))
    logger.debug("  media_count: {}".format(t3 - t2))
    logger.debug("  sort: {}".format(t4 - t3))
    return jsonify({'list': set_of_queried_collections})
def api_mediapicker_collection_search():
    t0 = time.time()
    use_pool = None
    add_source_counts = False
    public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True
    search_str = request.args['media_keyword']
    tag_sets_id_list = request.args['which_set'].split(',')
    t1 = time.time()
    results = collection_search(search_str, public_only, tag_sets_id_list)
    t2 = time.time()
    trimmed_collections = results[:MAX_COLLECTIONS]
    # flat_list_of_collections = [item for sublist in trimmed_collections for item in sublist]
    set_of_queried_collections = []
    if add_source_counts:
        if len(trimmed_collections) > 0:
            if use_pool:
                pool = Pool(processes=STORY_COUNT_POOL_SIZE)
                set_of_queried_collections = pool.map(collection_details_worker, trimmed_collections)
                pool.close()
            else:
                set_of_queried_collections = [collection_details_worker(c) for c in trimmed_collections]
    else:
        # skip adding in the source count details all together
        set_of_queried_collections = trimmed_collections
    t3 = time.time()
    if use_pool is not None:
        set_of_queried_collections = sorted(set_of_queried_collections, key=itemgetter('media_count'), reverse=True)
    t4 = time.time()
    logger.debug("total: {}".format(t4 - t0))
    logger.debug("  load: {}".format(t1-t0))
    logger.debug("  search: {}".format(t2 - t1))
    logger.debug("  media_count: {}".format(t3 - t2))
    logger.debug("  sort: {}".format(t4 - t3))
    return jsonify({'list': set_of_queried_collections})
Beispiel #5
0
def source_stats(media_id):
    username = user_name()
    results = {}
    # story count
    media_query = "(media_id:{})".format(media_id)
    source_specific_story_count = apicache.source_story_count(media_query)
    results['story_count'] = source_specific_story_count
    # health
    media_health = _cached_media_source_health(username, media_id)
    results['num_stories_90'] = media_health[
        'num_stories_90'] if 'num_stories_90' in media_health else None
    results['start_date'] = media_health[
        'start_date'] if 'start_date' in media_health else None
    info = _media_source_details(media_id)
    user_can_see_private_collections = user_has_auth_role(ROLE_MEDIA_EDIT)
    visible_collections = [
        c for c in info['media_source_tags']
        if ((c['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS) and (
            (c['show_on_media'] == 1) or user_can_see_private_collections))
    ]
    results['collection_count'] = len(visible_collections)
    # geography tags
    results['geoPct'] = apicache.tag_coverage_pct(media_query,
                                                  TAG_SET_GEOCODER_VERSION)
    # nyt theme
    results['nytPct'] = apicache.tag_coverage_pct(media_query,
                                                  TAG_SET_NYT_LABELS_VERSION)
    return jsonify(results)
Beispiel #6
0
def api_collection_search(search_str):
    public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True
    results = collection_search(search_str, public_only, VALID_COLLECTION_TAG_SETS_IDS)
    trim_count = MAX_COLLECTIONS if len(results) > 20 else len(results)
    trimmed = results[:trim_count]
    #flat_list = [{'tags_id': t['tags_id'], 'tag_set_label':t['tag_set_label'],'label':t['label'], 'tag':t['tag']} for t in trimmed]
    add_user_favorite_flag_to_collections(trimmed)
    return jsonify({'list': trimmed})
Beispiel #7
0
def api_explorer_collection_set(tag_sets_id):
    """
    Return a list of all the (public only or public and private, depending on user role) collections in a tag set.
    Not cached because this can change, and load time isn't terrible.
    :param tag_sets_id: the tag set to query for public collections
    :return: dict of info and list of collections in
    """
    info = _tag_set_with_collections(tag_sets_id, user_has_auth_role(ROLE_MEDIA_EDIT))
    # add_user_favorite_flag_to_collections(info['collections'])
    return jsonify(info)
Beispiel #8
0
def api_media_source_details(media_id):
    health = _cached_media_source_health(user_mediacloud_key(), media_id)
    info = _media_source_details(media_id)
    info['health'] = health
    user_mc = user_admin_mediacloud_client()
    if user_has_auth_role(ROLE_MEDIA_EDIT):
        info['scrape_status'] = user_mc.feedsScrapeStatus(media_id)  # need to know if scrape is running
    else:
        info['scrape_status'] = None
    add_user_favorite_flag_to_sources([info])
    add_user_favorite_flag_to_collections(info['media_source_tags'])
    analytics_db.increment_count(analytics_db.TYPE_MEDIA, media_id, analytics_db.ACTION_SOURCE_MGR_VIEW)
    return jsonify(info)
def api_media_source_details(media_id):
    health = _cached_media_source_health(user_mediacloud_key(), media_id)
    info = _media_source_details(media_id)
    info['health'] = health
    user_mc = user_admin_mediacloud_client()
    if user_has_auth_role(ROLE_MEDIA_EDIT):
        info['scrape_status'] = user_mc.feedsScrapeStatus(
            media_id)  # need to know if scrape is running
    else:
        info['scrape_status'] = None
    add_user_favorite_flag_to_sources([info])
    add_user_favorite_flag_to_collections(info['media_source_tags'])
    return jsonify(info)
Beispiel #10
0
def api_collection_sources(collection_id):
    add_in_details = False
    if ('details' in request.args) and (request.args['details'] == 'true'):
        add_in_details = True
    results = {'tags_id': collection_id}
    media_in_collection = media_with_tag(user_mediacloud_key(), collection_id)
    add_user_favorite_flag_to_sources(media_in_collection)
    if add_in_details and user_has_auth_role(ROLE_MEDIA_EDIT):
        media_in_collection = fetch_collection_source_feed_info(
            media_in_collection)

    results['sources'] = media_in_collection
    return jsonify(results)
def api_explorer_collection_set(tag_sets_id):
    """
    Return a list of all the (public only or public and private, depending on user role) collections in a tag set.
    Not cached because this can change, and load time isn't terrible.
    :param tag_sets_id: the tag set to query for public collections
    :return: dict of info and list of collections in
    """
    if is_user_logged_in() and user_has_auth_role(ROLE_MEDIA_EDIT) is True:
        info = _tag_set_with_private_collections(tag_sets_id)
    else:
        info = _tag_set_with_public_collections(tag_sets_id)

    # add_user_favorite_flag_to_collections(info['collections'])
    return jsonify(info)
Beispiel #12
0
def api_explorer_collection_set(tag_sets_id):
    '''
    Return a list of all the (public only or public and private, depending on user role) collections in a tag set.  Not cached because this can change, and load time isn't terrible.
    :param tag_sets_id: the tag set to query for public collections
    :return: dict of info and list of collections in
    '''
    info = []
    if is_user_logged_in() and user_has_auth_role(ROLE_MEDIA_EDIT) == True:
        info = _tag_set_with_private_collections(tag_sets_id)
    else:
        info = _tag_set_with_public_collections(tag_sets_id)

    #_add_user_favorite_flag_to_collections(info['collections'])
    return jsonify(info)
def api_mediapicker_source_search():
    public_only = False if user_has_auth_role(ROLE_MEDIA_EDIT) else True
    search_str = request.args['mediaKeyword']
    results = _matching_sources_by_set(search_str, public_only)  # from pool
    trimmed_sources = [r[:MAX_SOURCES] for r in results]
    flat_list_of_sources = [
        item for sublist in trimmed_sources for item in sublist
    ]
    set_of_queried_sources = []
    if len(flat_list_of_sources) > 0:
        pool = Pool(processes=STORY_COUNT_POOL_SIZE)
        set_of_queried_sources = pool.map(source_details_worker,
                                          flat_list_of_sources)
        pool.terminate()  # extra s

    return jsonify({'list': set_of_queried_sources})
def api_collection_set(tag_sets_id):
    '''
    Return a list of all the (public only or public and private, depending on user role) collections in a tag set.  Not cached because this can change, and load time isn't terrible.
    :param tag_sets_id: the tag set to query for public collections
    :return: dict of info and list of collections in
    '''
    if user_has_auth_role(ROLE_MEDIA_EDIT):
        info = apicache.tag_set_with_private_collections(
            user_mediacloud_key(), tag_sets_id)
    else:
        info = apicache.tag_set_with_public_collections(
            user_mediacloud_key(), tag_sets_id)

    add_user_favorite_flag_to_collections(info['tags'])
    # rename to make more sense here
    info['collections'] = sorted(info['tags'], key=itemgetter('label', 'tag'))
    del info['tags']
    return jsonify(info)
def api_collection_set(tag_sets_id):
    """
    Return a list of all the (public only or public and private, depending on user role) collections in a tag set.
    Not cached because this can change, and load time isn't terrible.
    :param tag_sets_id: the tag set to query for public collections
    :return: dict of info and list of collections in
    """
    if user_has_auth_role(ROLE_MEDIA_EDIT):
        info = apicache.tag_set_with_private_collections(user_mediacloud_key(), tag_sets_id)
    else:
        info = apicache.tag_set_with_public_collections(user_mediacloud_key(), tag_sets_id)

    add_user_favorite_flag_to_collections(info['tags'])
    # rename to make more sense here
    for t in info['tags']:
        t['sort_key'] = t['label'] if t['label'] else t['tag']
    info['collections'] = sorted(info['tags'], key=itemgetter('sort_key'))
    del info['tags']
    return jsonify(info)
def source_stats(media_id):
    username = user_name()
    user_mc = user_admin_mediacloud_client()
    results = {}
    # story count
    media_query = "(media_id:{})".format(media_id)
    source_specific_story_count = cached_source_story_count(
        username, media_query)
    results['story_count'] = source_specific_story_count
    # health
    media_health = _cached_media_source_health(username, media_id)
    results['num_stories_90'] = media_health[
        'num_stories_90'] if 'num_stories_90' in media_health else None
    results['start_date'] = media_health[
        'start_date'] if 'start_date' in media_health else None
    info = _media_source_details(media_id)
    user_can_see_private_collections = user_has_auth_role(ROLE_MEDIA_EDIT)
    visible_collections = [
        c for c in info['media_source_tags']
        if ((c['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS) and (
            (c['show_on_media'] == 1) or user_can_see_private_collections))
    ]
    results['collection_count'] = len(visible_collections)
    # geography tags
    tag_specific_story_count = user_mc.storyTagCount(
        solr_query=media_query, tag_sets_id=TAG_SET_GEOCODER_VERSION)
    ratio_geo_tagged_count = float(
        tag_specific_story_count[0]['count']) / float(
            source_specific_story_count) if len(
                tag_specific_story_count) > 0 else 0
    results['geoPct'] = ratio_geo_tagged_count
    # nyt theme
    tag_specific_story_count = user_mc.storyTagCount(
        solr_query=media_query, tag_sets_id=TAG_SET_NYT_LABELS_VERSION)
    ratio_nyt_tagged_count = float(
        tag_specific_story_count[0]['count']) / float(
            source_specific_story_count) if len(
                tag_specific_story_count) > 0 else 0
    results['nytPct'] = ratio_nyt_tagged_count
    return jsonify(results)
def api_collection_sources(collection_id):
    add_in_details = False
    if ('details' in request.args) and (request.args['details'] == 'true'):
        add_in_details = True
    results = {'tags_id': collection_id}
    media_in_collection = media_with_tag(user_mediacloud_key(), collection_id)
    add_user_favorite_flag_to_sources(media_in_collection)
    if add_in_details and user_has_auth_role(ROLE_MEDIA_EDIT):
        # for editing users, add in last scrape and active feed count (if requested)
        pool = Pool(processes=FEED_SCRAPE_JOB_POOL_SIZE)
        jobs = [m['media_id'] for m in media_in_collection]
        job_results = pool.map(_media_list_edit_worker,
                               jobs)  # blocks until they are all done
        job_by_media_id = {j['media_id']: j for j in job_results}
        for m in media_in_collection:
            m['latest_scrape_job'] = job_by_media_id[
                m['media_id']]['latest_scrape_job']
            m['active_feed_count'] = job_by_media_id[
                m['media_id']]['active_feed_count']
        pool.terminate()
    results['sources'] = media_in_collection
    return jsonify(results)
Beispiel #18
0
def source_stats(media_id):
    username = user_name()
    user_mc = user_admin_mediacloud_client()
    results = {}
    # story count
    media_query = "(media_id:{})".format(media_id)
    source_specific_story_count = apicache.source_story_count(user_mediacloud_key(), media_query)
    results['story_count'] = source_specific_story_count
    # health
    media_health = _cached_media_source_health(username, media_id)
    results['num_stories_90'] = media_health['num_stories_90'] if 'num_stories_90' in media_health else None
    results['start_date'] = media_health['start_date'] if 'start_date' in media_health else None
    info = _media_source_details(media_id)
    user_can_see_private_collections = user_has_auth_role(ROLE_MEDIA_EDIT)
    visible_collections = [c for c in info['media_source_tags']
                           if ((c['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS) and
                               ((c['show_on_media'] == 1) or user_can_see_private_collections))]
    results['collection_count'] = len(visible_collections)
    # geography tags
    results['geoPct'] = apicache.tag_coverage_pct(user_mediacloud_key(), media_query, TAG_SET_GEOCODER_VERSION)
    # nyt theme
    results['nytPct'] = apicache.tag_coverage_pct(user_mediacloud_key(), media_query, TAG_SET_NYT_LABELS_VERSION)
    return jsonify(results)
def download_sources_csv(all_media, file_prefix):
    if user_has_auth_role(ROLE_MEDIA_EDIT):
        what_type_download = SOURCES_TEMPLATE_PROPS_EDIT
    else:
        what_type_download = SOURCES_TEMPLATE_PROPS_VIEW    # no editor_notes
    return download_media_csv(all_media, file_prefix, what_type_download)