def concatenate_solr_dates(start_date, end_date): user_mc = user_admin_mediacloud_client() publish_date = user_mc.publish_date_query( datetime.datetime.strptime(start_date, '%Y-%m-%d').date(), datetime.datetime.strptime(end_date, '%Y-%m-%d').date()) return publish_date
def _mc_client(admin=False): # return the user's client handler, or a tool one if not logged in if is_user_logged_in(): client_to_use = user_mediacloud_client() if not admin else user_admin_mediacloud_client() else: client_to_use = mc return client_to_use
def _tag_set_with_collections(tag_sets_id, show_only_public_collections): user_mc = user_admin_mediacloud_client() tag_set = user_mc.tagSet(tag_sets_id) # page through tags more_tags = True all_tags = [] last_tags_id = 0 while more_tags: tags = user_mc.tagList(tag_sets_id=tag_set['tag_sets_id'], last_tags_id=last_tags_id, rows=100, public_only=show_only_public_collections) all_tags = all_tags + tags if len(tags) > 0: last_tags_id = tags[-1]['tags_id'] more_tags = len(tags) != 0 collection_list = [ t for t in all_tags if t['show_on_media'] is 1 ] # double check the show_on_media because that controls public or not collection_list = sorted(collection_list, key=itemgetter('label')) return { 'name': tag_set['label'], 'description': tag_set['description'], 'collections': collection_list }
def collection_source_sentence_historical_counts(collection_id): user_mc = user_admin_mediacloud_client() start_date_str = request.args['start'] end_date_str = request.args['end'] results = _collection_source_sentence_historical_counts( collection_id, start_date_str, end_date_str) return jsonify({'counts': results})
def _cached_source_split_sentence_count(user_mc_key, query, split_start, split_end): user_mc = user_admin_mediacloud_client() return user_mc.sentenceCount(query, split=True, split_start_date=split_start, split_end_date=split_end)
def api_explorer_word_count(): user_mc = user_admin_mediacloud_client() solr_query = solr_query_from_request(request.form) word_count_result = user_mc.wordCount(solr_query=solr_query) return jsonify(word_count_result) # give them back new data, so they can update the client
def topic_focal_sets(user_mc_key, topics_id, snapshots_id): ''' This needs user_mc_key in the function signature to make sure the caching is keyed correctly. ''' user_mc = user_admin_mediacloud_client() response = user_mc.topicFocalSetList(topics_id, snapshots_id=snapshots_id) return response
def collection_update(collection_id): user_mc = user_admin_mediacloud_client() label = '{}'.format(request.form['name']) description = request.form['description'] static = request.form['static'] if 'static' in request.form else None show_on_stories = request.form['showOnStories'] if 'showOnStories' in request.form else None show_on_media = request.form['showOnMedia'] if 'showOnMedia' in request.form else None formatted_name = format_name_from_label(label) source_ids = [] if len(request.form['sources[]']) > 0: source_ids = [int(sid) for sid in request.form['sources[]'].split(',')] # first update the collection updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description, is_static=(static == 'true'), show_on_stories=(show_on_stories == 'true'), show_on_media=(show_on_media == 'true')) # get the sources in the collection first, then remove and add as needed existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)] source_ids_to_remove = list(set(existing_source_ids) - set(source_ids)) source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids] # logger.debug(existing_source_ids) # logger.debug(source_ids_to_add) # logger.debug(source_ids_to_remove) # then go through and tag all the sources specified with the new collection id tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add] tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove] tags = tags_to_add + tags_to_remove if len(tags) > 0: user_mc.tagMedia(tags) apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id) return jsonify(updated_collection['tag'])
def collection_create(): user_mc = user_admin_mediacloud_client( ) # has to be admin to call createTag label = '{}'.format(request.form['name']) description = request.form['description'] static = request.form['static'] if 'static' in request.form else None show_on_stories = request.form[ 'showOnStories'] if 'showOnStories' in request.form else None show_on_media = request.form[ 'showOnMedia'] if 'showOnMedia' in request.form else None source_ids = [] if len(request.form['sources[]']) > 0: source_ids = request.form['sources[]'].split(',') formatted_name = format_name_from_label(label) # first create the collection new_collection = user_mc.createTag( TAG_SETS_ID_COLLECTIONS, formatted_name, label, description, is_static=(static == 'true'), show_on_stories=(show_on_stories == 'true'), show_on_media=(show_on_media == 'true')) # then go through and tag all the sources specified with the new collection id tags = [ MediaTag(sid, tags_id=new_collection['tag']['tags_id'], action=TAG_ACTION_ADD) for sid in source_ids ] if len(tags) > 0: user_mc.tagMedia(tags) return jsonify(new_collection['tag'])
def mc_client(admin=False): # return the user's client handler, or a tool one if not logged in if is_user_logged_in(): client_to_use = user_mediacloud_client() if not admin else user_admin_mediacloud_client() else: client_to_use = mc return client_to_use
def topic_filter_cascade_list(): public_topics = sorted_public_topic_list() # for t in sorted_public_topics: # t['detailInfo'] = get_topic_info_per_snapshot_timespan(t['topics_id']) # check if user had favorites or personal user_topics = [] favorited_topics = [] results = {'link_ids': []} if is_user_logged_in(): user_mc = user_admin_mediacloud_client() link_id = request.args.get('linkId') results = user_mc.topicList(link_id=link_id) user_topics = results['topics'] favorite_topic_ids = db.get_users_lists(user_name(), 'favorite' 'Topics') # mark all the public topics as favorite or not for t in public_topics: t['isFavorite'] = t['topics_id'] in favorite_topic_ids # mark all the user's topics as favorite or not for t in user_topics: t['isFavorite'] = t['topics_id'] in favorite_topic_ids # fill in the list of favorite topics (need to hit server because they might no be in the other results) favorited_topics = [user_mc.topic(tid) for tid in favorite_topic_ids] for t in favorited_topics: t['isFavorite'] = True return jsonify({ 'topics': { 'favorite': favorited_topics, 'personal': user_topics, 'public': public_topics }, 'link_ids': results['link_ids'] })
def cached_geotag_count(user_mc_key, query): user_mc = user_admin_mediacloud_client() res = user_mc.sentenceFieldCount('*', query, tag_sets_id=tag_utl.GEO_TAG_SET, sample_size=tag_utl.GEO_SAMPLE_SIZE) res = [ r for r in res if int(r['tag'].split('_')[1]) in COUNTRY_GEONAMES_ID_TO_APLHA3.keys() ] for r in res: geonamesId = int(r['tag'].split('_')[1]) if geonamesId not in COUNTRY_GEONAMES_ID_TO_APLHA3.keys( ): # only include countries continue r['geonamesId'] = geonamesId # TODO: move this to JS? r['alpha3'] = COUNTRY_GEONAMES_ID_TO_APLHA3[geonamesId] r['count'] = ( float(r['count']) / float(tag_utl.GEO_SAMPLE_SIZE) ) # WTF: why is the API returning this as a string and not a number? for hq in HIGHCHARTS_KEYS: if hq['properties']['iso-a3'] == r['alpha3']: r['iso-a2'] = hq['properties']['iso-a2'] r['value'] = r['count'] return res
def map_files_download_custom(topics_id): user_mc = user_admin_mediacloud_client() # how to treat these as req or default? optional_args = { 'timespans_id': request.args['timespanId'] if 'timespanId' in request.args else None, 'snapshots_id': request.args['snapshotId'] if 'snapshots_id' in request.args else None, 'foci_id': request.args['fociId'] if 'foci_id' in request.args else None, 'color_field': request.args['color_field'] if 'color_field' in request.args else 'media_type', 'num_media': request.args['num_media'] if 'num_media' in request.args else 500, # this is optional 'include_weights': request.args['include_weights'] if 'include_weights' in request.args else 1, 'num_links_per_medium': request.args['num_links_per_medium'] if 'num_links_per_medium' in request.args else None, } filename = "link-map-" + topics_id + "-" + request.args[ 'timespanId'] + "." + "gexf" result_stream = user_mc.topicMediaMap(topics_id, **optional_args) generator = (cell for row in result_stream for cell in row) return flask.Response( result_stream, mimetype="attachment/octet-stream", headers={"Content-Disposition": "attachment;filename=" + filename})
def media_inlinks(topics_id, media_id): user_mc = user_admin_mediacloud_client() sort = validated_sort(request.args.get('sort')) limit = request.args.get('limit') inlinks = topic_story_list(user_mediacloud_key(), topics_id, link_to_media_id=media_id, sort=sort, limit=limit) return jsonify(inlinks)
def _topic_summary(topics_id): if access_public_topic(topics_id): local_mc = mc elif is_user_logged_in(): local_mc = user_admin_mediacloud_client() else: return jsonify({'status': 'Error', 'message': 'Invalid attempt'}) topic = local_mc.topic(topics_id) # add in snapshot and latest snapshot job status topic['snapshots'] = { 'list': local_mc.topicSnapshotList(topics_id), 'jobStatus': mc.topicSnapshotGenerateStatus(topics_id)['job_states'] # need to know if one is running } # add in spider job status topic['spiderJobs'] = local_mc.topicSpiderStatus(topics_id)['job_states'] if is_user_logged_in(): _add_user_favorite_flag_to_topics([topic]) ''' # add in story counts, overall seed and spidered feedTotal = topic_story_count(local_mc, topics_id) # with q - but not passed in for summary total = topic_story_count(local_mc, topics_id, timespans_id=None, q=None) # spidered count.. how? spidered = total - seedTotal topic['seedStories'] = seedTotal topic['spideredStories'] = spidered topic['totaltories'] = total ''' return topic
def collection_update(collection_id): user_mc = user_admin_mediacloud_client() label = '{}'.format(request.form['name']) description = request.form['description'] static = request.form['static'] if 'static' in request.form else None show_on_stories = request.form['showOnStories'] if 'showOnStories' in request.form else None show_on_media = request.form['showOnMedia'] if 'showOnMedia' in request.form else None formatted_name = format_name_from_label(label) source_ids = [] if len(request.form['sources[]']) > 0: source_ids = [int(sid) for sid in request.form['sources[]'].split(',')] # first update the collection updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description, is_static=(static == 'true'), show_on_stories=(show_on_stories == 'true'), show_on_media=(show_on_media == 'true')) # get the sources in the collection first, then remove and add as needed existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)] source_ids_to_remove = list(set(existing_source_ids) - set(source_ids)) source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids] # logger.debug(existing_source_ids) # logger.debug(source_ids_to_add) # logger.debug(source_ids_to_remove) # then go through and tag all the sources specified with the new collection id tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add] tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove] tags = tags_to_add + tags_to_remove if len(tags) > 0: user_mc.tagMedia(tags) return jsonify(updated_collection['tag'])
def remove_sources_from_collection(collection_id): source_ids_to_remove = request.form['sources[]'].split(',') source_ids_to_remove = [int(s) for s in source_ids_to_remove] user_mc = user_admin_mediacloud_client() # get the sources in the collection first, then remove and add as needed existing_source_ids = [ int(m['media_id']) for m in media_with_tag(collection_id) ] source_ids_to_remain = list( set(existing_source_ids) - set(source_ids_to_remove)) media_to_remove = [ MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove ] media_to_remain = [ MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_remain ] # do I need to run similar or TAG_ACTION_REMOVE? current_media = media_to_remove + media_to_remain results = {} if len(current_media) > 0: results = user_mc.tagMedia(current_media) apicache.invalidate_collection_source_representation_cache( user_mediacloud_key(), collection_id) return jsonify(results)
def _topic_story_link_list_by_page_as_csv_row(user_key, topics_id, props, **kwargs): local_mc = user_admin_mediacloud_client(user_key) yield u','.join(props) + u'\n' # first send the column names link_id = 0 more_pages = True while more_pages: story_link_page = topic_story_link_list_by_page(user_key, topics_id, link_ids=link_id, **kwargs) story_src_ids = [ str(s['source_stories_id']) for s in story_link_page['links'] ] story_ref_ids = [ str(s['ref_stories_id']) for s in story_link_page['links'] ] story_src_ids = story_src_ids + story_ref_ids # TODO there is a cached topic story list... but paging is different... storiesInfoList = local_mc.topicStoryList(topics_id, stories_id=story_src_ids) # get all source and ref story link ids and fetch them with topicStoryList if 'next' in story_link_page['link_ids']: link_id = story_link_page['link_ids']['next'] else: more_pages = False for s in storiesInfoList['stories']: cleaned_row = csv.dict2row(props, s) row_string = u','.join(cleaned_row) + u'\n' yield row_string
def topic_story_update(stories_id): user_mc = user_admin_mediacloud_client() optional_args = { 'title': request.form['title'] if 'title' in request.form else None, 'description': request.form['description'] if 'description' in request.form else '', 'guid': request.form['guid'] if 'guid' in request.form else 'guid', 'url': request.form['url'] if 'url' in request.form else 'url', 'language': request.form['language'] if 'language' in request.form else 'en', 'publish_date': request.form['publish_date'] if 'publish_date' in request.form else None, 'confirm_date': request.form['confirm_date'] if 'confirm_date' in request.form else False, 'undateable': request.form['undateable'] if 'undateable' in request.form else False, } stories = user_mc.storyUpdate(stories_id, **optional_args) return jsonify(stories)
def topic_update_permission(topics_id): user_mc = user_admin_mediacloud_client() new_permissions = json.loads(request.form["permissions"]) current_permissions = user_mc.topicPermissionsList( topics_id)['permissions'] # first remove any people that you need to new_emails = [p['email'] for p in new_permissions] current_emails = [p['email'] for p in current_permissions] for email in current_emails: if email not in new_emails: user_mc.topicPermissionsUpdate(topics_id, email, 'none') # now update the remaining permissions for permission in new_permissions: if permission['permission'] not in ['read', 'write', 'admin', 'none']: return json_error_response('Invalid permission value') try: user_mc.topicPermissionsUpdate(topics_id, permission['email'].strip(), permission['permission']) except MCException as e: # show a nice error if they type the email wrong if 'Unknown email' in e.message: return jsonify({'success': 0, 'results': e.message}) return jsonify({ 'success': 1, 'results': user_mc.topicPermissionsList(topics_id) })
def api_system_user_update(user_id): user_mc = user_admin_mediacloud_client() # needed to put this behind an endpoint so browser doesn't cache it valid_params = { 'email': request.form['email'], 'full_name': request.form['full_name'], 'notes': request.form['notes'] if 'notes' in request.form else None, # this is optional 'roles': request.form['roles[]'].split(',') if 'roles[]' in request.form else None, 'active': bool(request.form['active'] == 'true') if 'active' in request.form else False, 'max_topic_stories': request.form['max_topic_stories'] if 'max_topic_stories' in request.form else None, 'weekly_requests_limit': request.form['weekly_requests_limit'] if 'weekly_requests_limit' in request.form else None, 'has_consented': bool(request.form['has_consented'] == 'true') if 'has_consented' in request.form else False, } results = user_mc.userUpdate(user_id, **valid_params) return jsonify(results)
def api_system_user_search(): user_mc = user_admin_mediacloud_client() search = request.args.get( 'searchStr') if 'searchStr' in request.args else None, link_id = request.args.get('linkId') if 'linkId' in request.args else None, page = user_mc.userList(search=search, link_id=link_id) return jsonify(page)
def favorite_collections(): user_mc = user_admin_mediacloud_client() user_favorited = db.get_users_lists(user_name(), 'favoriteCollections') favorited_collections = [user_mc.tag(tag_id) for tag_id in user_favorited] for s in favorited_collections: s['isFavorite'] = True return jsonify({'list': favorited_collections})
def source_suggest(): user_mc = user_admin_mediacloud_client() url = request.form['url'] feed_url = request.form['feedurl'] if 'feedurl' in request.form else None name = request.form['name'] if 'name' in request.form else None reason = request.form['reason'] if 'reason' in request.form else None tag_ids_to_add = tag_ids_from_collections_param() new_suggestion = user_mc.mediaSuggest(url=url, name=name, feed_url=feed_url, reason=reason, collections=tag_ids_to_add) # send an email confirmation email_title = "Thanks for Suggesting " + url send_html_email( email_title, [user_name(), '*****@*****.**'], render_template("emails/source_suggestion_ack.txt", username=user_name(), name=name, url=url, feed_url=feed_url, reason=reason), render_template("emails/source_suggestion_ack.html", username=user_name(), name=name, url=url, feed_url=feed_url, reason=reason)) # and return that it worked return jsonify(new_suggestion)
def cached_recent_sentence_counts(user_mc_key, fq, start_date_str=None, end_date_str=None): ''' Helper to fetch sentences counts over the last year for an arbitrary query ''' user_mc = user_admin_mediacloud_client() if start_date_str is None: last_n_days = 365 start_date = datetime.date.today() - datetime.timedelta(last_n_days) else: start_date = datetime.datetime.strptime(start_date_str, '%Y-%m-%d') if end_date_str is None: end_date = datetime.date.today() - datetime.timedelta(1) # yesterday else: end_date = datetime.datetime.strptime(end_date_str, '%Y-%m-%d') fq.append(user_mc.publish_date_query(start_date, end_date)) sentences_over_time = user_mc.sentenceCount( '*', solr_filter=fq, split=True, split_start_date=datetime.datetime.strftime(start_date, '%Y-%m-%d'), split_end_date=datetime.datetime.strftime(end_date, '%Y-%m-%d'))['split'] return sentences_over_time
def favorite_sources(): user_mc = user_admin_mediacloud_client() user_favorited = db.get_users_lists(user_name(), 'favoriteSources') favorited_s = [user_mc.media(media_id) for media_id in user_favorited] for s in favorited_s: s['isFavorite'] = True return jsonify({'list': favorited_s})
def stream_story_list_csv(user_key, filename, topics_id, **kwargs): as_attachment = kwargs['as_attachment'] if 'as_attachment' in kwargs else True fb_data = kwargs['fb_data'] if 'fb_data' in kwargs else False all_stories = [] params = kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'foci_id' in request.args else None, 'q': request.args['q'] if 'q' in request.args else None, 'sort': request.args['sort'] if 'sort' in request.args else None, } params.update(merged_args) # if 'as_attachment' in params: del params['as_attachment'] if 'fb_data' in params: del params['fb_data'] if 'q' in params: params['q'] = params['q'] if 'q' not in [None, '', 'null', 'undefined'] else None params['limit'] = 100 # an arbitrary value to let us page through with big topics props = [ 'stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'themes', 'subtopics', 'inlink_count', 'facebook_share_count', 'outlink_count', 'media_inlink_count', 'media_id', 'media_name', 'media_url', # 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type' ] if fb_data: all_fb_count = [] more_fb_count = True link_id = 0 local_mc = user_admin_mediacloud_client() while more_fb_count: fb_page = local_mc.topicStoryListFacebookData(topics_id, limit=100, link_id=link_id) all_fb_count = all_fb_count + fb_page['counts'] if 'next' in fb_page['link_ids']: link_id = fb_page['link_ids']['next'] more_fb_count = True else: more_fb_count = False # now iterate through each list and set up the fb collection date for s in all_stories: for fb_item in all_fb_count: if int(fb_item['stories_id']) == int(s['stories_id']): s['facebook_collection_date'] = fb_item['facebook_api_collect_date'] props.append('facebook_collection_date') timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_topic_story_list_by_page_as_csv_row(user_key, topics_id, props, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def topic_update(topics_id): user_mc = user_admin_mediacloud_client() # top five cannot be empty fyi args = { 'name': request.form['name'] if 'name' in request.form else None, 'description': request.form['description'] if 'description' in request.form else None, 'solr_seed_query': request.form['solr_seed_query'] if 'solr_seed_query' in request.form else None, 'start_date': request.form['start_date'] if 'start_date' in request.form else None, 'end_date': request.form['end_date'] if 'end_date' in request.form else None, 'is_public': request.form['is_public'] if 'is_public' in request.form else None, 'is_logogram': request.form['is_logogram'] if 'is_logogram' in request.form else None, 'ch_monitor_id': request.form['ch_monitor_id'] if 'ch_monitor_id' in request.form and request.form['ch_monitor_id'] != 'null' and len(request.form['ch_monitor_id']) > 0 else None, 'max_iterations': request.form['max_iterations'] if 'max_iterations' in request.form else None, 'max_stories': request.form['max_stories'] if 'max_stories' in request.form else None, 'twitter_topics_id': request.form['twitter_topics_id'] if 'twitter_topics_id' in request.form else None } # parse out any sources and collections to add media_ids_to_add = ids_from_comma_separated_str(request.form['sources[]'] if 'sources[]' in request.form else '') tag_ids_to_add = ids_from_comma_separated_str(request.form['collections[]'] if 'collections[]' in request.form else '') # hack to support twitter-only topics if (len(media_ids_to_add) is 0) and (len(tag_ids_to_add) is 0): media_ids_to_add = None tag_ids_to_add = None result = user_mc.topicUpdate(topics_id, media_ids=media_ids_to_add, media_tags_ids=tag_ids_to_add, **args) return topic_summary(result['topics'][0]['topics_id']) # give them back new data, so they can update the client
def source_create(): user_mc = user_admin_mediacloud_client() name = request.form['name'] url = request.form['url'] editor_notes = request.form[ 'editor_notes'] if 'editor_notes' in request.form else None # this is optional public_notes = request.form[ 'public_notes'] if 'public_notes' in request.form else None monitored = request.form[ 'monitored'] if 'monitored' in request.form else None # parse out any tag to add (ie. collections and metadata) tag_ids_to_add = tag_ids_from_collections_param() valid_metadata = [{ 'form_key': 'publicationCountry', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_COUNTRY }, { 'form_key': 'publicationState', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_STATE }, { 'form_key': 'primaryLanguageg', 'tag_sets_id': TAG_SETS_ID_PRIMARY_LANGUAGE }, { 'form_key': 'countryOfFocus', 'tag_sets_id': TAG_SETS_ID_COUNTRY_OF_FOCUS }, { 'form_key': 'mediaType', 'tag_sets_id': TAG_SETS_ID_MEDIA_TYPE }] source_to_create = { 'name': name, 'url': url, 'editor_notes': editor_notes, 'public_notes': public_notes, 'is_monitored': monitored, 'tags_ids': tag_ids_to_add } result = user_mc.mediaCreate([ source_to_create ])[0] # need just the first entry, since we only create one if result['status'] != "error": # if it worked, update any metadata, because we need to remove the other tags in each set for metadata_item in valid_metadata: metadata_tag_id = request.form[ metadata_item['form_key']] if metadata_item[ 'form_key'] in request.form else None # this is optional if metadata_tag_id: user_mc.tagMedia( tags=[ MediaTag(result['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD) ], clear_others=True ) # make sure to clear any other values set in this metadata tag set tag_ids_to_add.append(metadata_tag_id) if result['status'] == 'new': # if it is a really new source, kick off a scraping job to find any RSS feeds user_mc.feedsScrape(result['media_id']) return jsonify(result)
def _update_source_worker(source_info): user_mc = user_admin_mediacloud_client() media_id = source_info['media_id'] # logger.debug("Updating media {}".format(media_id)) source_no_metadata_no_id = {k: v for k, v in source_info.items() if k != 'media_id' and k not in SOURCE_LIST_CSV_METADATA_PROPS} response = user_mc.mediaUpdate(media_id, source_no_metadata_no_id) return response
def media(topics_id, media_id): user_mc = user_admin_mediacloud_client() combined_media_info = topic_media_list(user_mediacloud_key(), topics_id, media_id=media_id)['media'][0] media_info = user_mc.media(media_id) for key in media_info.keys(): if key not in combined_media_info.keys(): combined_media_info[key] = media_info[key] return jsonify(combined_media_info)
def _cached_media_source_health(user_mc_key, media_id): user_mc = user_admin_mediacloud_client() results = None try: results = user_mc.mediaHealth(media_id) except Exception as e: logger.exception(e) return results
def cached_geotag_count(query): user_mc = user_admin_mediacloud_client() res = user_mc.storyTagCount(query, [QUERY_LAST_MONTH, QUERY_ENGLISH_LANGUAGE], tag_sets_id=tag_utl.GEO_TAG_SET) full_count = apicache.timeperiod_story_count(query, QUERY_LAST_MONTH)['count'] for r in res: r['pct'] = (float(r['count'])/float(full_count)) r['value'] = (float(r['count'])) return res
def story_tags_csv(stories_id): # in the download include all entity types admin_mc = user_admin_mediacloud_client() if stories_id in [None, 'NaN']: return jsonify({'error': 'bad value'}) story = admin_mc.story(stories_id, text=True) # Note - this call doesn't pull cliff places props = ['tags_id', 'tag', 'tag_sets_id', 'tag_set'] return csv.stream_response(story['story_tags'], props, 'story-' + str(stories_id) + '-all-tags-and-tag-sets')
def favorite_topics(): user_mc = user_admin_mediacloud_client() user_favorited = db.get_users_lists(user_name(), 'favoriteTopics') favorited_topics = [user_mc.topic(topic_id) for topic_id in user_favorited] for t in favorited_topics: t['isFavorite'] = True t['detailInfo'] = get_topic_info_per_snapshot_timespan(t['topics_id']) return jsonify({'topics': favorited_topics})
def media(topics_id, media_id): user_mc = user_admin_mediacloud_client() combined_media_info = apicache.topic_media_list(user_mediacloud_key(), topics_id, media_id=media_id)['media'][0] media_info = user_mc.media(media_id) for key in list(media_info.keys()): if key not in list(combined_media_info.keys()): combined_media_info[key] = media_info[key] return jsonify(combined_media_info)
def topic_admin_list(): user_mc = user_admin_mediacloud_client() # if a non-admin user calls this, using user_mc grantees this won't be a security hole # but for admins this will return ALL topics topics = user_mc.topicList(limit=500)['topics'] # we also want snapshot info topics = _add_snapshots_info_to_topics(topics) return jsonify(topics)
def api_explorer_sources_by_ids(): user_mc = user_admin_mediacloud_client() source_list = [] source_id_array = request.args['sources[]'].split(',') for media_id in source_id_array: info = user_mc.media(media_id) info['id'] = int(media_id) source_list.append(info) return jsonify({"results": source_list})
def feed_update(feed_id): user_mc = user_admin_mediacloud_client() name = request.form['name'] url = request.form['url'] feed_type = request.form['type'] if 'type' in request.form else None # this is optional active = request.form['active'] if 'active' in request.form else None # this is optional result = user_mc.feedUpdate(feeds_id=feed_id, name=name, url=url, feed_type=feed_type, active=active) return jsonify(result)
def _update_source_worker(source_info): # worker function to help update sources in parallel user_mc = user_admin_mediacloud_client() media_id = source_info['media_id'] logger.debug("Updating media {}".format(media_id)) source_no_metadata_no_id = {k: v for k, v in list(source_info.items()) if k != 'media_id' and k not in SOURCE_LIST_CSV_METADATA_PROPS} response = user_mc.mediaUpdate(media_id, source_no_metadata_no_id) return response
def api_topics_preview_word_count(): user_mc = user_admin_mediacloud_client() solr_query = concatenate_query_for_solr(solr_seed_query=request.form['q'], media_ids=ids_from_comma_separated_str(request.form['sources[]']) if 'sources[]' in request.form else None, tags_ids=ids_from_comma_separated_str(request.form['collections[]'])) if 'collections[]' in request.form else None, fq = concatenate_solr_dates(start_date=request.form['start_date'], end_date=request.form['end_date']) word_count_result = user_mc.wordCount(solr_query=solr_query, solr_filter=fq) return jsonify(word_count_result) # give them back new data, so they can update the client
def story_info(stories_id): user_mc = user_mediacloud_client() admin_mc = user_admin_mediacloud_client() if stories_id in [None, 'NaN']: return jsonify({'error': 'bad value'}) if 'text' in request.args and request.args['text'] == 'true': story = admin_mc.story(stories_id, text=True) else: story = user_mc.story(stories_id) story["media"] = user_mc.media(story["media_id"]) return jsonify({'info': story})
def topic_search(): search_str = request.args['searchStr'] mode = request.args['mode'] if 'mode' in request.args else 'list' user_mc = user_admin_mediacloud_client() results = user_mc.topicList(name=search_str, limit=50) if mode == 'full': matching_topics = results['topics'] else: # matching_topics = [{'name': x['name'], 'id': x['topics_id']} for x in results['topics']] matching_topics = results['topics'] return jsonify({'topics': matching_topics})
def source_suggestion_update(suggestion_id): user_mc = user_admin_mediacloud_client() suggestion = _media_suggestion(user_mc, suggestion_id) if suggestion is None: return json_error_response("Unknown suggestion id {}".format(suggestion_id)) status = request.form['status'] reason = request.form['reason'] results = None email_note = "" if status == "approved": # if approved, we have to create it flattend_tags = [t['tags_id'] for t in suggestion['tags_ids']] media_source_to_create = { 'url': suggestion['url'], 'name': suggestion['name'], 'feeds': [suggestion['feed_url']], 'tags_ids': flattend_tags, 'editor_notes': 'Suggested approved by {} on because {}. Suggested by {} on {} because {}' '(id #{}).'.format(user_name(), datetime.now().strftime("%I:%M%p on %B %d, %Y"), reason, suggestion['email'], suggestion['date_submitted'], suggestion['reason'], suggestion['media_suggestions_id'] ) } creation_results = user_mc.mediaCreate([media_source_to_create])[0] if creation_results['status'] == 'error': status = "pending" # so the email update looks good. email_note = creation_results['error']+". " else: email_note = "This source is "+str(creation_results['status'])+". " results = user_mc.mediaSuggestionsMark(suggestion_id, status, reason, creation_results['media_id']) else: # if rejected just mark it as such results = user_mc.mediaSuggestionsMark(suggestion_id, status, reason) # send an email to the person that suggested it url = suggestion['url'] email_title = "Source Suggestion {}: {}".format(status, url) content_title = "We {} {}".format(status, url) content_body = "Thanks for the suggestion. {}{}".format(email_note, reason) action_text = "Login to Media Cloud" action_url = "https://sources.mediacloud.org/#/login" # send an email confirmation send_html_email(email_title, [user_name(), '*****@*****.**'], render_template("emails/generic.txt", content_title=content_title, content_body=content_body, action_text=action_text, action_url=action_url), render_template("emails/generic.html", email_title=email_title, content_title=content_title, content_body=content_body, action_text=action_text, action_url=action_url) ) # and return that it worked or not if status == "pending": return json_error_response(email_note) return jsonify(results)
def _cached_sentence_list(mc_api_key, q, fq, rows, include_stories=True): # need to get an admin client with the tool key so they have sentence read permissions tool_mc = user_admin_mediacloud_client(mc_api_key) sentences = tool_mc.sentenceList(q, fq)[:rows] stories_id_list = [str(s['stories_id']) for s in sentences] if (len(stories_id_list) > 0) and include_stories: # this is the fastest way to get a list of stories by id stories = user_mediacloud_client().storyList("stories_id:({})".format(" ".join(stories_id_list))) stories_by_id = {s['stories_id']: s for s in stories} # build a quick lookup table by stories_id for s in sentences: s['story'] = stories_by_id[s['stories_id']] return sentences
def api_media_source_details(media_id): health = _cached_media_source_health(user_mediacloud_key(), media_id) info = _media_source_details(media_id) info['health'] = health user_mc = user_admin_mediacloud_client() if user_has_auth_role(ROLE_MEDIA_EDIT): info['scrape_status'] = user_mc.feedsScrapeStatus(media_id) # need to know if scrape is running else: info['scrape_status'] = None add_user_favorite_flag_to_sources([info]) add_user_favorite_flag_to_collections(info['media_source_tags']) analytics_db.increment_count(analytics_db.TYPE_MEDIA, media_id, analytics_db.ACTION_SOURCE_MGR_VIEW) return jsonify(info)
def api_topics_preview_split_story_count(): user_mc = user_admin_mediacloud_client() solr_query = concatenate_query_for_solr(solr_seed_query=request.form['q'], media_ids=ids_from_comma_separated_str(request.form['sources[]']) if 'sources[]' in request.form else None, tags_ids=ids_from_comma_separated_str(request.form['collections[]'])) if 'collections[]' in request.form else None, fq = concatenate_solr_dates(start_date=request.form['start_date'], end_date=request.form['end_date']) results = user_mc.storyCount(solr_query=solr_query, solr_filter=fq, split=True) total_stories = 0 for c in results['counts']: total_stories += c['count'] results['total_story_count'] = total_stories return jsonify({'results': results})
def api_system_user_update(user_id): user_mc = user_admin_mediacloud_client() # needed to put this behind an endpoint so browser doesn't cache it valid_params = { 'email': request.form['email'], 'full_name': request.form['full_name'], 'notes': request.form['notes'] if 'notes' in request.form else None, # this is optional 'roles': request.form['roles[]'].split(',') if 'roles[]' in request.form else None, 'active': bool(request.form['active'] == 'true') if 'active' in request.form else False, 'max_topic_stories': request.form['max_topic_stories'] if 'max_topic_stories' in request.form else None, 'weekly_requests_limit': request.form['weekly_requests_limit'] if 'weekly_requests_limit' in request.form else None, } results = user_mc .userUpdate(user_id, **valid_params) return jsonify(results)
def topic_story_update(stories_id): user_mc = user_admin_mediacloud_client() optional_args = { 'title': request.form['title'] if 'title' in request.form else None, 'description': request.form['description'] if 'description' in request.form else '', 'guid': request.form['guid'] if 'guid' in request.form else 'guid', 'url': request.form['url'] if 'url' in request.form else 'url', 'language': request.form['language'] if 'language' in request.form else 'en', 'publish_date': request.form['publish_date'] if 'publish_date' in request.form else None, #'custom_date': request.form['custom_date'] if 'custom_date' in request.form else False, 'undateable': bool(request.form['undateable'] == 'true') if 'active' in request.form else False, } stories = user_mc.storyUpdate(stories_id, **optional_args) return jsonify(stories)
def source_update(media_id): user_mc = user_admin_mediacloud_client() # update the basic info name = request.form['name'] url = request.form['url'] editor_notes = request.form['editor_notes'] if 'editor_notes' in request.form else None # this is optional public_notes = request.form['public_notes'] if 'public_notes' in request.form else None # this is optional monitored = request.form['monitored'] if 'monitored' in request.form else None result = user_mc.mediaUpdate(media_id, {'url': url, 'name': name, 'editor_notes': editor_notes, 'is_monitored': monitored, 'public_notes': public_notes}) # now we need to update the collections separately, because they are tags on the media source source = user_mc.media(media_id) existing_tag_ids = [t['tags_id'] for t in source['media_source_tags'] if (t['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS)] tag_ids_to_add = tag_ids_from_collections_param() tag_ids_to_remove = list(set(existing_tag_ids) - set(tag_ids_to_add)) tags_to_add = [MediaTag(media_id, tags_id=cid, action=TAG_ACTION_ADD) for cid in tag_ids_to_add if cid not in existing_tag_ids] tags_to_remove = [MediaTag(media_id, tags_id=cid, action=TAG_ACTION_REMOVE) for cid in tag_ids_to_remove] tags = tags_to_add + tags_to_remove if len(tags) > 0: # don't make extraneous calls user_mc.tagMedia(tags=tags) # now update the metadata too valid_metadata = [ {'form_key': 'publicationCountry', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_COUNTRY}, {'form_key': 'publicationState', 'tag_sets_id': TAG_SETS_ID_PUBLICATION_STATE}, {'form_key': 'primaryLanguage', 'tag_sets_id': TAG_SETS_ID_PRIMARY_LANGUAGE}, {'form_key': 'countryOfFocus', 'tag_sets_id': TAG_SETS_ID_COUNTRY_OF_FOCUS}, {'form_key': 'mediaType', 'tag_sets_id': TAG_SETS_ID_MEDIA_TYPE} ] for metadata_item in valid_metadata: metadata_tag_id = request.form[metadata_item['form_key']] if metadata_item['form_key'] in request.form else None # this is optional existing_tag_ids = [t for t in source['media_source_tags'] if is_metadata_tag_set(t['tag_sets_id'])] # form field check if metadata_tag_id in [None, '', 'null', 'undefined']: # we want to remove it if there was one there if len(existing_tag_ids) > 0: for remove_if_empty in existing_tag_ids: if metadata_item['tag_sets_id'] == remove_if_empty['tag_sets_id']: tag = MediaTag(media_id, tags_id=remove_if_empty['tags_id'], action=TAG_ACTION_REMOVE) user_mc.tagMedia([tag]) elif metadata_tag_id not in existing_tag_ids: # need to add it and clear out the other tag = MediaTag(media_id, tags_id=metadata_tag_id, action=TAG_ACTION_ADD) user_mc.tagMedia([tag], clear_others=True) # result the success of the media update call - would be better to catch errors in any of these calls... return jsonify(result)
def _cached_topic_sentence_sample(user_mc_key, topics_id, sample_size=1000, **kwargs): ''' Internal helper - don't call this; call topic_sentence_sample instead. This needs user_mc_key in the function signature to make sure the caching is keyed correctly. It includes topics_id in the method signature to make sure caching works reasonably. ''' local_mc = None if user_mc_key == TOOL_API_KEY: local_mc = mc else: # important for this to be an admin client local_mc = user_admin_mediacloud_client() sentences = local_mc.sentenceList(kwargs['q'], "timespans_id:{}".format(kwargs['timespans_id']), rows=sample_size, sort=local_mc.SORT_RANDOM) return sentences
def api_explorer_collections_by_ids(): client_mc = user_admin_mediacloud_client() try: if int(request.args['collections[]']) == -1: return jsonify([{'id': ALL_MEDIA, 'label': "All Media", 'tag_sets_id': ALL_MEDIA}]) else: collection_ids = request.args['collections[]'].split(',') except ValueError: # ie. request.args['collections[]'] is not an int collection_ids = request.args['collections[]'].split(',') collection_list = [] for tags_id in collection_ids: info = client_mc.tag(tags_id) info['id'] = int(tags_id) collection_list.append(info) return jsonify({"results": collection_list})
def map_files_download_custom(topics_id): user_mc = user_admin_mediacloud_client() # how to treat these as req or default? optional_args = { 'timespans_id': request.args['timespanId'] if 'timespanId' in request.args else None, 'snapshots_id': request.args['snapshotId'] if 'snapshots_id' in request.args else None, 'foci_id': request.args['fociId'] if 'foci_id' in request.args else None, 'color_field': request.args['color_field'] if 'color_field' in request.args else 'media_type', 'num_media': request.args['num_media'] if 'num_media' in request.args else 500, # this is optional 'include_weights': request.args['include_weights'] if 'include_weights' in request.args else 1, 'num_links_per_medium': request.args['num_links_per_medium'] if 'num_links_per_medium' in request.args else None, } filename = "link-map-"+topics_id+"-"+request.args['timespanId']+"."+"gexf" result_stream = user_mc.topicMediaMap(topics_id, **optional_args) return flask.Response(result_stream, mimetype="attachment/octet-stream", headers={"Content-Disposition": "attachment;filename="+filename})
def cached_geotag_count(user_mc_key, query): user_mc = user_admin_mediacloud_client() res = user_mc.storyTagCount(query, [QUERY_LAST_MONTH, QUERY_ENGLISH_LANGUAGE], tag_sets_id=tag_utl.GEO_TAG_SET) full_count = apicache.timeperiod_story_count(user_mc, query, QUERY_LAST_MONTH)['count'] res = [r for r in res if int(r['tag'].split('_')[1]) in list(COUNTRY_GEONAMES_ID_TO_APLHA3.keys())] for r in res: geonamesId = int(r['tag'].split('_')[1]) if geonamesId not in list(COUNTRY_GEONAMES_ID_TO_APLHA3.keys()): # only include countries continue r['geonamesId'] = geonamesId r['alpha3'] = COUNTRY_GEONAMES_ID_TO_APLHA3[geonamesId] r['pct'] = (float(r['count'])/float(full_count)) r['value'] = (float(r['count'])) for hq in HIGHCHARTS_KEYS: if hq['properties']['iso-a3'] == r['alpha3']: r['iso-a2'] = hq['properties']['iso-a2'] return res
def remove_sources_from_collection(collection_id): source_ids_to_remove = request.form['sources[]'].split(',') source_ids_to_remove = [int(s) for s in source_ids_to_remove] user_mc = user_admin_mediacloud_client() # get the sources in the collection first, then remove and add as needed existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)] source_ids_to_remain = list(set(existing_source_ids) - set(source_ids_to_remove)) media_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove] media_to_remain = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_remain] # do I need to run similar or TAG_ACTION_REMOVE? current_media = media_to_remove + media_to_remain if len(current_media) > 0: results = user_mc.tagMedia(current_media) apicache.invalidate_collection_source_representation_cache(user_mediacloud_key(), collection_id) return jsonify(results)
def _media_list_edit_worker(media_id): user_mc = user_admin_mediacloud_client() # latest scrape job scrape_jobs = user_mc.feedsScrapeStatus(media_id) latest_scrape_job = None if len(scrape_jobs['job_states']) > 0: latest_scrape_job = scrape_jobs['job_states'][0] # active feed count feeds = source_feed_list(media_id) active_syndicated_feeds = [f for f in feeds if f['active'] and f['type'] == 'syndicated'] active_feed_count = len(active_syndicated_feeds) query = "media_id:{}".format(media_id) full_count = apicache.timeperiod_story_count(user_mc, query, QUERY_LAST_YEAR)['count'] return { 'media_id': media_id, 'latest_scrape_job': latest_scrape_job, 'active_feed_count': active_feed_count, 'num_stories_last_year': full_count, }
def get_topic_info_per_snapshot_timespan(topic_id): if not is_user_logged_in(): local_mc = mc else: local_mc = user_admin_mediacloud_client() snapshots = { 'list': local_mc.topicSnapshotList(topic_id), } most_recent_running_snapshot = {} overall_timespan = {} for snp in snapshots['list']: if snp['searchable'] == 1 and snp['state'] == "completed": most_recent_running_snapshot = snp timespans = cached_topic_timespan_list(user_mediacloud_key(), topic_id, most_recent_running_snapshot['snapshots_id']) for ts in timespans: if ts['period'] == "overall": overall_timespan = ts return {'snapshot': most_recent_running_snapshot, 'timespan': overall_timespan}
def source_suggest(): user_mc = user_admin_mediacloud_client() url = request.form['url'] feed_url = request.form['feedurl'] if 'feedurl' in request.form else None name = request.form['name'] if 'name' in request.form else None reason = request.form['reason'] if 'reason' in request.form else None tag_ids_to_add = tag_ids_from_collections_param() new_suggestion = user_mc.mediaSuggest(url=url, name=name, feed_url=feed_url, reason=reason, tags_ids=tag_ids_to_add) # send an email confirmation email_title = "Thanks for Suggesting " + url send_html_email(email_title, [user_name(), '*****@*****.**'], render_template("emails/source_suggestion_ack.txt", username=user_name(), name=name, url=url, feed_url=feed_url, reason=reason), render_template("emails/source_suggestion_ack.html", username=user_name(), name=name, url=url, feed_url=feed_url, reason=reason) ) # and return that it worked return jsonify(new_suggestion)
def topic_update_permission(topics_id): user_mc = user_admin_mediacloud_client() new_permissions = json.loads(request.form["permissions"]) current_permissions = user_mc.topicPermissionsList(topics_id)['permissions'] # first remove any people that you need to new_emails = [p['email'] for p in new_permissions] current_emails = [p['email'] for p in current_permissions] for email in current_emails: if email not in new_emails: user_mc.topicPermissionsUpdate(topics_id, email, 'none') # now update the remaining permissions for permission in new_permissions: if permission['permission'] not in ['read', 'write', 'admin', 'none']: return json_error_response('Invalid permission value') try: user_mc.topicPermissionsUpdate(topics_id, permission['email'], permission['permission']) except MCException as e: # show a nice error if they type the email wrong if 'Unknown email' in e.message: return jsonify({'success': 0, 'results': e.message}) return jsonify({'success': 1, 'results': user_mc.topicPermissionsList(topics_id)})