def add_retweet_partisanship_to_topic(topics_id, focal_set_name, focal_set_description): user_mc = user_mediacloud_client() focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY new_focal_set = user_mc.topicFocalSetDefinitionCreate( topics_id, focal_set_name, focal_set_description, focal_technique) if 'focal_set_definitions_id' not in new_focal_set: return json_error_response('Unable to create the subtopic set') # now make the foci in it - one for each partisanship quintile partisanship_tags = _cached_media_tags( TAG_SETS_ID_RETWEET_PARTISANSHIP_2016) for tag in partisanship_tags: name = tag['label'] description = "Media sources that were retweeted more often during the 2016 US election " \ "season by people on the {}".format(tag['label']) query = tag['query'] focal_set_definitions_id = new_focal_set['focal_set_definitions_id'] # create a new boolean query subtopic based on the tag sets new_focus = user_mc.topicFocusDefinitionCreate( topics_id, name=name, description=description, query=query, focal_set_definitions_id=focal_set_definitions_id) if (len(new_focus) == 0) or ('focus_definitions_id' not in new_focus[0]): return json_error_response( 'Unable to create the {} subtopic'.format(name)) return {'success': True}
def topic_create(): user_mc = user_mediacloud_client() name = request.form['name'] description = request.form['description'] solr_seed_query = request.form['solr_seed_query'] start_date = request.form['start_date'] end_date = request.form['end_date'] optional_args = { 'max_iterations': request.form['max_iterations'] if 'max_iterations' in request.form and request.form['max_iterations'] != 'null' else None, 'max_stories': request.form['max_stories'] if 'max_stories' in request.form and request.form['max_stories'] != 'null' else flask_login.current_user.profile['limits']['max_topic_stories'], } try: topic_result = user_mc.topicCreate(name=name, description=description, solr_seed_query=solr_seed_query, start_date=start_date, end_date=end_date, media_tags_ids=[COLLECTION_US_TOP_ONLINE], # HACK: can't save without one of these in place (for now) **optional_args, )['topics'][0] topics_id = topic_result['topics_id'] logger.info("Created new topic \"{}\" as {}".format(name, topics_id)) # if this includes any of the US-centric collections, add the retweet partisanship subtopic by default # client will either make a empty snapshot, or a spidering one return topic_summary(topics_id) except mediacloud.error.MCException as e: logging.error("Topic creation failed {}".format(name)) logging.exception(e) return json_error_response(e.message, e.status_code) except Exception as e: logging.error("Topic creation failed {}".format(name)) logging.exception(e) return json_error_response(str(e), 500)
def topic_create(): user_mc = user_mediacloud_client() name = request.form['name'] description = request.form['description'] solr_seed_query = request.form['solr_seed_query'] start_date = request.form['start_date'] end_date = request.form['end_date'] optional_args = { 'is_public': request.form['is_public'] if 'is_public' in request.form else None, 'is_logogram': request.form['is_logogram'] if 'is_logogram' in request.form else None, 'ch_monitor_id': request.form['ch_monitor_id'] if len(request.form['ch_monitor_id']) > 0 and request.form['ch_monitor_id'] != 'null' else None, 'max_iterations': request.form['max_iterations'] if 'max_iterations' in request.form else None, 'max_stories': request.form['max_stories'] if 'max_stories' in request.form and request.form['max_stories'] != 'null' else flask_login.current_user.profile['max_topic_stories'], } # parse out any sources and collections to add media_ids_to_add = ids_from_comma_separated_str(request.form['sources[]']) tag_ids_to_add = ids_from_comma_separated_str( request.form['collections[]']) try: topic_result = user_mc.topicCreate(name=name, description=description, solr_seed_query=solr_seed_query, start_date=start_date, end_date=end_date, media_ids=media_ids_to_add, media_tags_ids=tag_ids_to_add, **optional_args)['topics'][0] topics_id = topic_result['topics_id'] logger.info("Created new topic \"{}\" as {}".format(name, topics_id)) # if this includes any of the US-centric collections, add the retweet partisanship subtopic by default if set(tag_ids_to_add).intersection(US_COLLECTIONS): add_retweet_partisanship_to_topic( topic_result['topics_id'], 'Retweet Partisanship', 'Subtopics driven by our analysis of Twitter followers of Trump and Clinton during the 2016 election season. Each media soure is scored based on the ratio of retweets of their stories in those two groups.' ) # client will either make a empty snapshot, or a spidering one return topic_summary(topics_id) except Exception as e: logging.error("Topic creation failed {}".format(name)) logging.exception(e) return json_error_response(str(e), 500) except mediacloud.error.MCException as e: logging.error("Topic creation failed {}".format(name)) logging.exception(e) return json_error_response(e.message, e.status_code)
def upload_file(): time_start = time.time() # grab and verify the file if 'file' not in request.files: return json_error_response('No file part') uploaded_file = request.files['file'] if uploaded_file.filename == '': return json_error_response('No selected file') if not (uploaded_file and allowed_file(uploaded_file.filename)): return json_error_response('Invalid file') filepath = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(uploaded_file.filename)) # have to save b/c otherwise we can't locate the file path (security restriction)... can delete afterwards uploaded_file.save(filepath) time_file_saved = time.time() # parse all the source data out of the file sources_to_update, sources_to_create = _parse_sources_from_csv_upload( filepath) all_results = [] all_errors = [] if len(sources_to_create) > 300: return jsonify({ 'status': 'Error', 'message': 'Too many sources to upload. The limit is 300.' }) else: audit = [] if len(sources_to_create) > 0: audit_results, successful, errors = _create_or_update_sources( sources_to_create, True) all_results += successful audit += audit_results all_errors += errors if len(sources_to_update) > 0: audit_results, successful, errors = _create_or_update_sources( sources_to_update, False) all_results += successful audit += audit_results all_errors += errors if settings.has_option('smtp', 'enabled'): mail_enabled = settings.get('smtp', 'enabled') if mail_enabled is '1': _email_batch_source_update_results(audit) for media in all_results: if 'media_id' in media: media['media_id'] = int( media['media_id'] ) # make sure they are ints so no-dupes logic works on front end time_end = time.time() logger.debug("upload_file: {}".format(time_end - time_start)) logger.debug(" save file: {}".format(time_file_saved - time_start)) logger.debug(" processing: {}".format(time_end - time_file_saved)) return jsonify({'results': all_results})
def topic_focal_set_sentences_compare(topics_id, focal_sets_id): snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) all_focal_sets = topic_focal_sets(user_mediacloud_key(), topics_id, snapshots_id) # need the timespan info, to find the appropriate timespan with each focus base_snapshot_timespans = cached_topic_timespan_list( user_mediacloud_key(), topics_id, snapshots_id=snapshots_id) # if they have a focus selected, we need to find the appropriate overall timespan if foci_id is not None: timespan = topic_timespan(topics_id, snapshots_id, foci_id, timespans_id) for t in base_snapshot_timespans: if timespans_match(timespan, t): base_timespan = t else: base_timespan = None for t in base_snapshot_timespans: if t['timespans_id'] == int(timespans_id): base_timespan = t logger.info('base timespan = %s', timespans_id) if base_timespan is None: return json_error_response("Couldn't find the timespan you specified") # iterate through to find the one of interest focal_set = None for fs in all_focal_sets: if int(fs['focal_sets_id']) == int(focal_sets_id): focal_set = fs if focal_set is None: return json_error_response('Invalid Focal Set Id') # collect the sentence counts for each foci for focus in focal_set['foci']: # find the matching timespan within this focus snapshot_timespans = cached_topic_timespan_list( user_mediacloud_key(), topics_id, snapshots_id=snapshots_id, foci_id=focus['foci_id']) timespan = None for t in snapshot_timespans: if timespans_match(t, base_timespan): timespan = t logger.info('matching in focus %s, timespan = %s', focus['foci_id'], t['timespans_id']) if timespan is None: return json_error_response( 'Couldn\'t find a matching timespan in the ' + focus.name + ' focus') data = topic_sentence_counts(user_mediacloud_key(), topics_id, snapshots_id=snapshots_id, timespans_id=timespan['timespans_id'], foci_id=focus['foci_id']) focus['sentence_counts'] = data return jsonify(focal_set)
def source_suggestion_update(suggestion_id): user_mc = user_admin_mediacloud_client() suggestion = _media_suggestion(user_mc, suggestion_id) if suggestion is None: return json_error_response("Unknown suggestion id {}".format(suggestion_id)) status = request.form['status'] reason = request.form['reason'] results = None email_note = "" if status == "approved": # if approved, we have to create it flattend_tags = [t['tags_id'] for t in suggestion['tags_ids']] media_source_to_create = { 'url': suggestion['url'], 'name': suggestion['name'], 'feeds': [suggestion['feed_url']], 'tags_ids': flattend_tags, 'editor_notes': 'Suggested approved by {} on because {}. Suggested by {} on {} because {}' '(id #{}).'.format(user_name(), datetime.now().strftime("%I:%M%p on %B %d, %Y"), reason, suggestion['email'], suggestion['date_submitted'], suggestion['reason'], suggestion['media_suggestions_id'] ) } creation_results = user_mc.mediaCreate([media_source_to_create])[0] if creation_results['status'] == 'error': status = "pending" # so the email update looks good. email_note = creation_results['error']+". " else: email_note = "This source is "+str(creation_results['status'])+". " results = user_mc.mediaSuggestionsMark(suggestion_id, status, reason, creation_results['media_id']) else: # if rejected just mark it as such results = user_mc.mediaSuggestionsMark(suggestion_id, status, reason) # send an email to the person that suggested it url = suggestion['url'] email_title = "Source Suggestion {}: {}".format(status, url) content_title = "We {} {}".format(status, url) content_body = "Thanks for the suggestion. {}{}".format(email_note, reason) action_text = "Login to Media Cloud" action_url = "https://sources.mediacloud.org/#/login" # send an email confirmation send_html_email(email_title, [user_name(), '*****@*****.**'], render_template("emails/generic.txt", content_title=content_title, content_body=content_body, action_text=action_text, action_url=action_url), render_template("emails/generic.html", email_title=email_title, content_title=content_title, content_body=content_body, action_text=action_text, action_url=action_url) ) # and return that it worked or not if status == "pending": return json_error_response(email_note) return jsonify(results)
def upload_file(): time_start = time.time() # grab and verify the file if 'file' not in request.files: return json_error_response('No file part') uploaded_file = request.files['file'] if uploaded_file.filename == '': return json_error_response('No selected file') if not(uploaded_file and allowed_file(uploaded_file.filename)): return json_error_response('Invalid file') filepath = os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(uploaded_file.filename)) # have to save b/c otherwise we can't locate the file path (security restriction)... can delete afterwards uploaded_file.save(filepath) time_file_saved = time.time() # parse all the source data out of the file try: sources_to_update, sources_to_create = _parse_sources_from_csv_upload(filepath) except Exception as e: logger.error("Couldn't process a CSV row: " + str(e)) return jsonify({'status': 'Error', 'message': str(e)}) all_results = [] all_errors = [] if len(sources_to_create) > 300: return jsonify({'status': 'Error', 'message': 'Too many sources to upload. The limit is 300.'}) else: audit = [] if len(sources_to_create) > 0: audit_results, successful, errors = _create_or_update_sources(sources_to_create, True) all_results += successful audit += audit_results all_errors += errors if len(sources_to_update) > 0: audit_results, successful, errors = _create_or_update_sources(sources_to_update, False) all_results += successful audit += audit_results all_errors += errors try: mail_enabled = config.get('SMTP_ENABLED') if mail_enabled == u'1': _email_batch_source_update_results(audit) except ConfigException: logger.debug("Skipping collection file upload confirmation email") for media in all_results: if 'media_id' in media: media['media_id'] = int( media['media_id']) # make sure they are ints so no-dupes logic works on front end time_end = time.time() logger.debug("upload_file: {}".format(time_end - time_start)) logger.debug(" save file: {}".format(time_file_saved - time_start)) logger.debug(" processing: {}".format(time_end - time_file_saved)) return jsonify({'results': all_results, 'status': "Success"})
def source_suggestion_update(suggestion_id): user_mc = user_admin_mediacloud_client() suggestion = _media_suggestion(user_mc, suggestion_id) if suggestion is None: return json_error_response("Unknown suggestion id {}".format(suggestion_id)) status = request.form['status'] reason = request.form['reason'] results = None email_note = "" if status == "approved": # if approved, we have to create it flattend_tags = [t['tags_id'] for t in suggestion['tags_ids']] media_source_to_create = { 'url': suggestion['url'], 'name': suggestion['name'], 'feeds': [suggestion['feed_url']], 'tags_ids': flattend_tags, 'editor_notes': 'Suggested approved by {} on because {}. Suggested by {} on {} because {} (id #{}).'.format( user_name(), datetime.now().strftime("%I:%M%p on %B %d, %Y"), reason, suggestion['email'], suggestion['date_submitted'], suggestion['reason'], suggestion['media_suggestions_id'] ) } creation_results = user_mc.mediaCreate([media_source_to_create])[0] if creation_results['status'] == 'error': status = "pending" # so the email update looks good. email_note = creation_results['error']+". " else: email_note = "This source is "+str(creation_results['status'])+". " results = user_mc.mediaSuggestionsMark(suggestion_id, status, reason, creation_results['media_id']) else: # if rejected just mark it as such results = user_mc.mediaSuggestionsMark(suggestion_id, status, reason) # send an email to the person that suggested it url = suggestion['url'] email_title = "Source Suggestion {}: {}".format(status, url) content_title = "We {} {}".format(status, url) content_body = "Thanks for the suggestion. {}{}".format(email_note, reason) action_text = "Login to Media Cloud" action_url = "https://sources.mediacloud.org/#/login" # send an email confirmation send_html_email(email_title, [user_name(), '*****@*****.**'], render_template("emails/generic.txt", content_title=content_title, content_body=content_body, action_text=action_text, action_url=action_url), render_template("emails/generic.html", email_title=email_title, content_title=content_title, content_body=content_body, action_text=action_text, action_url=action_url) ) # and return that it worked or not if status == "pending": return json_error_response(email_note) return jsonify(results)
def login_with_cookie(): cached_user = flask_login.current_user if cached_user.is_anonymous: # no user session logger.debug(" login failed (%s)", cached_user.is_anonymous) return json_error_response("Login failed", 401) user = _create_user_session(cached_user) return jsonify(user.get_properties())
def create_nyt_theme_focal_set(topics_id): user_mc = user_mediacloud_client() # grab the focalSetName and focalSetDescription and then make one focal_set_name = request.form['focalSetName'] focal_set_description = request.form['focalSetDescription'] theme_data = json.loads(request.form['data[]']) focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY # is this right? new_focal_set = user_mc.topicFocalSetDefinitionCreate( topics_id, focal_set_name, focal_set_description, focal_technique) if 'focal_set_definitions_id' not in new_focal_set: return json_error_response('Unable to create the subtopic set') # now make the foci in it - one for each country for tag in theme_data: params = { 'name': tag['label'], 'description': "Stories about {}".format(tag['label']), 'query': "tags_id_stories:{}".format(tag['tags_id']), 'focal_set_definitions_id': new_focal_set['focal_set_definitions_id'], } user_mc = user_mediacloud_client() user_mc.topicFocusDefinitionCreate(topics_id, **params) return {'success': True}
def topic_update_permission(topics_id): user_mc = user_admin_mediacloud_client() new_permissions = json.loads(request.form["permissions"]) current_permissions = user_mc.topicPermissionsList( topics_id)['permissions'] # first remove any people that you need to new_emails = [p['email'] for p in new_permissions] current_emails = [p['email'] for p in current_permissions] for email in current_emails: if email not in new_emails: user_mc.topicPermissionsUpdate(topics_id, email, 'none') # now update the remaining permissions for permission in new_permissions: if permission['permission'] not in ['read', 'write', 'admin', 'none']: return json_error_response('Invalid permission value') try: user_mc.topicPermissionsUpdate(topics_id, permission['email'].strip(), permission['permission']) except MCException as e: # show a nice error if they type the email wrong if 'Unknown email' in e.message: return jsonify({'success': 0, 'results': e.message}) return jsonify({ 'success': 1, 'results': user_mc.topicPermissionsList(topics_id) })
def topic_compare_subtopic_top_words(topics_id): snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) selected_focal_sets_id = request.args['focal_sets_id'] word_count = request.args['word_count'] if 'word_count' in request.args else 20 # first we need to figure out which timespan they are working on selected_snapshot_timespans = apicache.cached_topic_timespan_list(user_mediacloud_key(), topics_id, snapshots_id=snapshots_id) selected_timespan = None for t in selected_snapshot_timespans: if t['timespans_id'] == int(timespans_id): selected_timespan = t try: focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, selected_focal_sets_id) except ValueError: return json_error_response('Invalid Focal Set Id') timespans = apicache.matching_timespans_in_foci(topics_id, selected_timespan, focal_set['foci']) for idx in range(0, len(timespans)): data = apicache.topic_word_counts(user_mediacloud_key(), topics_id, timespans_id=timespans[idx]['timespans_id']) focal_set['foci'][idx]['top_words'] = data # stitch together the counts to download now data = [] headers = [f['name'] for f in focal_set['foci']] for idx in range(0, word_count): row = {f['name']: "{} ({})".format(f['top_words'][idx]['term'], f['top_words'][idx]['count']) for f in focal_set['foci']} data.append(row) return csv.stream_response(data, headers, 'topic-{}-subtopic-{}-{}-top-words-comparison'.format( topics_id, focal_set['name'], selected_focal_sets_id))
def change_password(): user_mc = user_mediacloud_client() results = {} try: results = user_mc.authChangePassword(request.form['old_password'], request.form['new_password']) except MCException as e: logger.exception(e) if 'Unable to change password' in e.message: if 'Old password is incorrect' in e.message or 'Unable to log in with old password' in e.message: return json_error_response('Unable to change password - old password is incorrect') if 'not found or password is incorrect' in e.message: return json_error_response('Unable to change password - user not found or password is incorrect') else: return json_error_response('Unable to change password - see log for more details') return jsonify(results)
def api_user_delete(): email = request.form['email'] user = flask_login.current_user if email == user.name: # double-check confirmation they typed in # delete them from the front-end system database user_db.delete_user(user.name) # delete them from the back-end system results = mc.userDelete(user.profile['auth_users_id']) # need to do this with the tool's admin account try: if ('success' in results) and (results['success'] is 1): return logout() else: return json_error_response("We failed to delete your account, sorry!", 400) except MCException as mce: logger.exception(mce) return json_error_response("We failed to delete your account, sorry!", 400) else: return json_error_response("Your email confirmation didn't match.", 400)
def change_password(): user_mc = user_mediacloud_client() try: results = user_mc.authChangePassword(request.form['old_password'], request.form['new_password']) except MCException as e: logger.exception(e) if 'Unable to change password: Old password is incorrect' in e.message: return json_error_response('Unable to change password: Old password is incorrect') return jsonify(results)
def platform_generic_upload_csv(topics_id): """ Handle an uploaded CSV file by saving it into a temp dir and returning the temp dir to the client. That filename will then be relayed back to the server to support preview operations. :param topics_id: :return: """ if 'file' not in request.files: return json_error_response('No file uploaded') uploaded_file = request.files['file'] if uploaded_file.filename == '': return json_error_response('No file found in uploads') if not(uploaded_file and allowed_file(uploaded_file.filename)): return json_error_response('Invalid file') filename = "{}-{}-{}".format(topics_id, dt.datetime.now().strftime("%Y%m%d%H%M%S"), secure_filename(uploaded_file.filename)) filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename) # have to save b/c otherwise we can't locate the file path (security restriction)... can delete afterwards uploaded_file.save(filepath) return jsonify({'status': 'Success', 'filename': filename})
def login_with_password(): username = request.form["email"] logger.debug("login request from %s", username) password = request.form["password"] # try to log them in results = mc.authLogin(username, password) if 'error' in results: return json_error_response(results['error'], 401) user = auth.create_and_cache_user(results['profile']) logger.debug(" succeeded - got a key (user.is_anonymous=%s)", user.is_anonymous) auth.login_user(user) return jsonify(user.get_properties())
def topic_create(): user_mc = user_admin_mediacloud_client() name = request.form['name'] description = request.form['description'] solr_seed_query = request.form['solr_seed_query'] start_date = request.form['start_date'] end_date = request.form['end_date'] optional_args = { 'is_public': request.form['is_public'] if 'is_public' in request.form else None, 'is_logogram': request.form['is_logogram'] if 'is_logogram' in request.form else None, 'ch_monitor_id': request.form['ch_monitor_id'] if len(request.form['ch_monitor_id']) > 0 and request.form['ch_monitor_id'] != 'null' else None, 'max_iterations': request.form['max_iterations'] if 'max_iterations' in request.form else None, 'max_stories': request.form['max_stories'] if 'max_stories' in request.form and request.form['max_stories'] != 'null' else None, } # parse out any sources and collections to add media_ids_to_add = ids_from_comma_separated_str(request.form['sources[]']) tag_ids_to_add = ids_from_comma_separated_str( request.form['collections[]']) try: topic_result = user_mc.topicCreate(name=name, description=description, solr_seed_query=solr_seed_query, start_date=start_date, end_date=end_date, media_ids=media_ids_to_add, media_tags_ids=tag_ids_to_add, **optional_args)['topics'][0] topic_id = topic_result['topics_id'] logger.info("Created new topic \"{}\" as {}".format(name, topic_id)) spider_job = user_mc.topicSpider( topic_id) # kick off a spider, which will also generate a snapshot logger.info(" spider result = {}".format(json.dumps(spider_job))) results = user_mc.topic(topic_id) results['spider_job_state'] = spider_job return jsonify( results) # give them back new data, so they can update the client except Exception as e: logging.error("Topic creation failed {}".format(name)) logging.exception(e) return json_error_response(e.message, e.status_code)
def merged_user_profile(user_results): if not isinstance(user_results,dict): user_results = user_results.get_properties() user_email = user_results['profile']['email'] user_results["user"] = mc.userList(search=user_email)['users'][0] merged_user_info = user_results['profile'].copy() # start with x's keys and values merged_user_info.update(user_results["user"]) if 'error' in user_results: return json_error_response(user_results['error'], 401) user = auth.create_and_cache_user(merged_user_info) return user
def topic_update_permission(topics_id): email = request.form["email"] permission = request.form["permission"] if permission not in ['read', 'write', 'admin', 'none']: return json_error_response('Invalid permission value') user_mc = user_admin_mediacloud_client() try: results = user_mc.topicPermissionsUpdate(topics_id, email, permission) except MCException as e: # show a nice error if they type the email wrong if 'Unknown email' in e.message: return jsonify({'success': 0, 'results': e.message}) return jsonify({'success': 1, 'results': results})
def topic_focal_set_split_stories_compare(topics_id, focal_sets_id): snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) # need the timespan info, to find the appropriate timespan with each focus base_snapshot_timespans = apicache.cached_topic_timespan_list( user_mediacloud_key(), topics_id, snapshots_id=snapshots_id) # if they have a focus selected, we need to find the appropriate overall timespan if foci_id is not None: timespan = apicache.topic_timespan(topics_id, snapshots_id, foci_id, timespans_id) for t in base_snapshot_timespans: if apicache.is_timespans_match(timespan, t): base_timespan = t else: base_timespan = None for t in base_snapshot_timespans: if t['timespans_id'] == int(timespans_id): base_timespan = t logger.info('base timespan = %s', timespans_id) if base_timespan is None: return json_error_response("Couldn't find the timespan you specified") # iterate through to find the one of interest focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, focal_sets_id) if focal_set is None: return json_error_response('Invalid Focal Set Id') # collect the story split counts for each foci timespans = apicache.matching_timespans_in_foci(topics_id, base_timespan, focal_set['foci']) for idx in range(0, len(timespans)): data = apicache.topic_split_story_counts( user_mediacloud_key(), topics_id, snapshots_id=snapshots_id, timespans_id=timespans[idx]['timespans_id']) focal_set['foci'][idx]['split_story_counts'] = data return jsonify(focal_set)
def topic_focal_set_split_stories_compare(topics_id, focal_sets_id): snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) # need the timespan info, to find the appropriate timespan with each focus try: base_timespan = base_snapshot_timespan(topics_id) focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, focal_sets_id) except ValueError as e: return json_error_response(e.message) # collect the story split counts for each foci timespans = apicache.matching_timespans_in_foci(topics_id, base_timespan, focal_set['foci']) for idx in range(0, len(timespans)): data = apicache.topic_split_story_counts(user_mediacloud_key(), topics_id, snapshots_id=snapshots_id, timespans_id=timespans[idx]['timespans_id']) focal_set['foci'][idx]['split_story_counts'] = data return jsonify(focal_set)
def _create_user_session(user_results): if not isinstance(user_results, dict): user_results = user_results.get_properties() # HACK: the API used to return this as true/false, but not returns it as 1 or 0, so we change it to # boolean here so we don't have to change front-end JS logic user_results['profile']['has_consented'] = (user_results['profile']['has_consented'] == 1) or \ (user_results['profile']['has_consented'] is True) merged_user_info = user_results['profile'].copy( ) # start with x's keys and values if 'error' in user_results: return json_error_response(user_results['error'], 401) user = auth.create_user(merged_user_info) return user
def _add_story_counts_to_foci(topics_id, focal_sets): snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) # need the timespan info, to find the appropriate timespan with each focus try: base_timespan = base_snapshot_timespan(topics_id) except ValueError as e: return json_error_response(e.message) # now find the story count in each foci in this for fs in focal_sets: timespans = apicache.matching_timespans_in_foci(topics_id, base_timespan, fs['foci']) for idx in range(0, len(timespans)): timespan = timespans[idx] focus = fs['foci'][idx] foci_story_count = apicache.topic_story_count(user_mediacloud_key(), topics_id, snapshots_id=snapshots_id, timespans_id=timespan['timespans_id'], q=q, foci_id=focus['foci_id'])['count'] focus['story_count'] = foci_story_count return jsonify(focal_sets)
def topic_update_permission(topics_id): user_mc = user_admin_mediacloud_client() new_permissions = json.loads(request.form["permissions"]) current_permissions = user_mc.topicPermissionsList(topics_id)['permissions'] # first remove any people that you need to new_emails = [p['email'] for p in new_permissions] current_emails = [p['email'] for p in current_permissions] for email in current_emails: if email not in new_emails: user_mc.topicPermissionsUpdate(topics_id, email, 'none') # now update the remaining permissions for permission in new_permissions: if permission['permission'] not in ['read', 'write', 'admin', 'none']: return json_error_response('Invalid permission value') try: user_mc.topicPermissionsUpdate(topics_id, permission['email'], permission['permission']) except MCException as e: # show a nice error if they type the email wrong if 'Unknown email' in e.message: return jsonify({'success': 0, 'results': e.message}) return jsonify({'success': 1, 'results': user_mc.topicPermissionsList(topics_id)})
def topic_create(): user_mc = user_admin_mediacloud_client() name = request.form['name'] description = request.form['description'] solr_seed_query = request.form['solr_seed_query'] start_date = request.form['start_date'] end_date = request.form['end_date'] optional_args = { 'is_public': request.form['is_public'] if 'is_public' in request.form else None, 'is_logogram': request.form['is_logogram'] if 'is_logogram' in request.form else None, 'ch_monitor_id': request.form['ch_monitor_id'] if len(request.form['ch_monitor_id']) > 0 and request.form['ch_monitor_id'] != 'null' else None, 'max_iterations': request.form['max_iterations'] if 'max_iterations' in request.form else None, 'max_stories': request.form['max_stories'] if 'max_stories' in request.form and request.form['max_stories'] != 'null' else None, } # parse out any sources and collections to add media_ids_to_add = ids_from_comma_separated_str(request.form['sources[]']) tag_ids_to_add = ids_from_comma_separated_str(request.form['collections[]']) try: topic_result = user_mc.topicCreate(name=name, description=description, solr_seed_query=solr_seed_query, start_date=start_date, end_date=end_date, media_ids=media_ids_to_add, media_tags_ids=tag_ids_to_add, **optional_args)['topics'][0] topic_id = topic_result['topics_id'] logger.info("Created new topic \"{}\" as {}".format(name, topic_id)) if set(tag_ids_to_add).intersection(US_COLLECTIONS): create_retweet_partisanship_focal_set(topic_result['topics_id']) spider_job = user_mc.topicSpider(topic_id) # kick off a spider, which will also generate a snapshot logger.info(" spider result = {}".format(json.dumps(spider_job))) results = user_mc.topic(topic_id) results['spider_job_state'] = spider_job return jsonify(results) # give them back new data, so they can update the client except Exception as e: logging.error("Topic creation failed {}".format(name)) logging.exception(e) return json_error_response(e.message, e.status_code)
def topic_compare_subtopic_top_words(topics_id): snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) selected_focal_sets_id = request.args['focal_sets_id'] word_count = request.args[ 'word_count'] if 'word_count' in request.args else 20 # first we need to figure out which timespan they are working on selected_snapshot_timespans = apicache.cached_topic_timespan_list( user_mediacloud_key(), topics_id, snapshots_id=snapshots_id) selected_timespan = None for t in selected_snapshot_timespans: if t['timespans_id'] == int(timespans_id): selected_timespan = t try: focal_set = apicache.topic_focal_set(user_mediacloud_key(), topics_id, snapshots_id, selected_focal_sets_id) except ValueError: return json_error_response('Invalid Focal Set Id') timespans = apicache.matching_timespans_in_foci(topics_id, selected_timespan, focal_set['foci']) for idx in range(0, len(timespans)): data = apicache.topic_word_counts( user_mediacloud_key(), topics_id, timespans_id=timespans[idx]['timespans_id']) focal_set['foci'][idx]['top_words'] = data # stitch together the counts to download now data = [] headers = [f['name'] for f in focal_set['foci']] for idx in range(0, word_count): row = { f['name']: "{} ({})".format(f['top_words'][idx]['term'], f['top_words'][idx]['count']) for f in focal_set['foci'] } data.append(row) return csv.stream_response( data, headers, 'topic-{}-subtopic-{}-{}-top-words-comparison'.format( topics_id, focal_set['name'], selected_focal_sets_id))
def create_media_type_focal_set(topics_id): user_mc = user_mediacloud_client() # grab the focalSetName and focalSetDescription and then make one focal_set_name = request.form['focalSetName'] focal_set_description = request.form['focalSetDescription'] media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY new_focal_set = user_mc.topicFocalSetDefinitionCreate(topics_id, focal_set_name, focal_set_description, focal_technique) if 'focal_set_definitions_id' not in new_focal_set: return json_error_response('Unable to create the subtopic set') # now make the foci in it - one for each media type focus_def_results = [] for tag in media_type_tags: params = { 'name': tag['label'], 'description': "Stories from {} sources".format(tag['label']), 'query': "tags_id_media:{}".format(tag['tags_id']), 'focal_set_definitions_id': new_focal_set['focal_set_definitions_id'], } result = user_mc.topicFocusDefinitionCreate(topics_id, **params) focus_def_results.append(result) return {'success': True}
def create_top_countries_focal_set(topics_id): user_mc = user_mediacloud_client() # grab the focalSetName and focalSetDescription and then make one focal_set_name = request.form['focalSetName'] focal_set_description = request.form['focalSetDescription'] country_data = json.loads(request.form['data[]']) focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY new_focal_set = user_mc.topicFocalSetDefinitionCreate(topics_id, focal_set_name, focal_set_description, focal_technique) if 'focal_set_definitions_id' not in new_focal_set: return json_error_response('Unable to create the subtopic set') # now make the foci in it - one for each country focus_def_results = [] for tag in country_data: params = { 'name': tag['label'], 'description': "Stories about {}".format(tag['label']), 'query': "tags_id_stories:{}".format(tag['tags_id']), 'focal_set_definitions_id': new_focal_set['focal_set_definitions_id'], } result = user_mc.topicFocusDefinitionCreate(topics_id, **params) focus_def_results.append(result) return {'success': True}
def create_media_type_focal_set(topics_id): user_mc = user_mediacloud_client() # grab the focalSetName and focalSetDescription and then make one focal_set_name = request.form['focalSetName'] focal_set_description = request.form['focalSetDescription'] media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE) focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY new_focal_set = user_mc.topicFocalSetDefinitionCreate( topics_id, focal_set_name, focal_set_description, focal_technique) if 'focal_set_definitions_id' not in new_focal_set: return json_error_response('Unable to create the subtopic set') # now make the foci in it - one for each media type focus_def_results = [] for tag in media_type_tags: params = { 'name': tag['label'], 'description': "Stories from {} sources".format(tag['label']), 'query': "tags_id_media:{}".format(tag['tags_id']), 'focal_set_definitions_id': new_focal_set['focal_set_definitions_id'], } result = user_mc.topicFocusDefinitionCreate(topics_id, **params) focus_def_results.append(result) return {'success': True}