def stream_media_link_list_csv(user_mc_key, filename, topics_id, **kwargs): params = kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'foci_id' in request.args else None, } params.update(merged_args) if 'q' in params: params['q'] = params['q'] if 'q' not in [ None, '', 'null', 'undefined' ] else None params[ 'limit'] = 1000 # an arbitrary value to let us page through with big topics timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } columns = [ 'src_media_id', 'src_media_name', 'src_media_url', 'ref_media_id', 'ref_media_name', 'ref_media_url' ] return Response(_topic_media_link_list_by_page_as_csv_row( user_mc_key, topics_id, columns, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def stream_story_link_list_csv(user_key, filename, topics_id, **kwargs): all_stories = [] params=kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'foci_id' in request.args else None, } params.update(merged_args) if 'q' in params: params['q'] = params['q'] if 'q' not in [None, '', 'null', 'undefined'] else None params['limit'] = 100 # an arbitrary value to let us page through with big topics props = [ 'stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'inlink_count','outlink_count' # 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type' ] timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_topic_story_link_list_by_page_as_csv_row(user_key, topics_id, props, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def _stream_story_list_csv(filename, q, fq, stories_per_page=500, sort=MediaCloud.SORT_PROCESSED_STORIES_ID, page_limit=None): props = [ 'stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'themes', 'media_id', 'media_name', 'media_url' ] if INCLUDE_MEDIA_METADATA_IN_CSV: metadata_cols = [ 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type' ] props += metadata_cols timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_story_list_by_page_as_csv_row(user_mediacloud_key(), q, fq, stories_per_page, sort, page_limit, props), mimetype='text/csv; charset=utf-8', headers=headers)
def stream_story_list_csv(user_key, filename, topics_id, **kwargs): as_attachment = kwargs['as_attachment'] if 'as_attachment' in kwargs else True fb_data = kwargs['fb_data'] if 'fb_data' in kwargs else False all_stories = [] params = kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'foci_id' in request.args else None, 'q': request.args['q'] if 'q' in request.args else None, 'sort': request.args['sort'] if 'sort' in request.args else None, } params.update(merged_args) # if 'as_attachment' in params: del params['as_attachment'] if 'fb_data' in params: del params['fb_data'] if 'q' in params: params['q'] = params['q'] if 'q' not in [None, '', 'null', 'undefined'] else None params['limit'] = 100 # an arbitrary value to let us page through with big topics props = [ 'stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'themes', 'subtopics', 'inlink_count', 'facebook_share_count', 'outlink_count', 'media_inlink_count', 'media_id', 'media_name', 'media_url', # 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type' ] if fb_data: all_fb_count = [] more_fb_count = True link_id = 0 local_mc = user_admin_mediacloud_client() while more_fb_count: fb_page = local_mc.topicStoryListFacebookData(topics_id, limit=100, link_id=link_id) all_fb_count = all_fb_count + fb_page['counts'] if 'next' in fb_page['link_ids']: link_id = fb_page['link_ids']['next'] more_fb_count = True else: more_fb_count = False # now iterate through each list and set up the fb collection date for s in all_stories: for fb_item in all_fb_count: if int(fb_item['stories_id']) == int(s['stories_id']): s['facebook_collection_date'] = fb_item['facebook_api_collect_date'] props.append('facebook_collection_date') timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_topic_story_list_by_page_as_csv_row(user_key, topics_id, props, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def stream_media_list_csv(user_mc_key, topic, filename, **kwargs): filename = topic['name'] + '-' + filename # we have to make a separate call to the media info if the user wants to inlcude the media metadata include_media_metadata = ('media_metadata' in kwargs) and (kwargs['media_metadata'] == '1') # if the focusId is a URL Sharing subtopic, then we have platform-specific post/author/channel share counts include_platform_url_shares = kwargs[ 'include_platform_url_shares'] if 'include_platform_url_shares' in kwargs else False # if this topic includes platforms, then we have URL sharing counts (post/author/channel) for each platform include_all_url_shares = kwargs[ 'include_all_url_shares'] if 'include_all_url_shares' in kwargs else False params = kwargs.copy() snapshots_id, timespans_id, foci_id, q = filters_from_args(request.args) merged_args = { 'timespans_id': timespans_id, 'snapshots_id': snapshots_id, 'foci_id': foci_id, 'q': q, 'sort': request.args.get('sort') if 'sort' in request.args else None, } params.update(merged_args) # do a check to see if the user has added in a real query or not if 'q' in params: params['q'] = params['q'] if 'q' not in [ None, '', 'null', 'undefined' ] else None params[ 'limit'] = 1000 # an arbitrary value to let us page through with big topics (note, this is the page size) # set up the dict keys / column headers that the user cares about for this download props = TOPIC_MEDIA_CSV_PROPS if include_platform_url_shares: props += ['post_count', 'channel_count', 'author_count'] if include_all_url_shares: # if the user requested to download all the url sharing counts by platform, we need to grab the config for that # which is held in the platform seed query objects topic_seed_queries = topic['topic_seed_queries'] extra_columns = [] for tsq in topic_seed_queries: prefix = platform_csv_column_header_prefix(tsq) extra_columns += [ prefix + 'post_count', prefix + 'channel_count', prefix + 'author_count' ] props += extra_columns params['topic_seed_queries'] = topic_seed_queries if include_media_metadata: props += [ 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type' ] timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_stream_media_by_page(user_mc_key, topic['topics_id'], props, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def _stream_story_list_csv(filename, q, fq, stories_per_page=500, sort=MediaCloud.SORT_PROCESSED_STORIES_ID, page_limit=None): props = ['stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'themes', 'media_id', 'media_name', 'media_url', 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type'] timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_story_list_by_page_as_csv_row(q, fq, stories_per_page, sort, page_limit, props), mimetype='text/csv; charset=utf-8', headers=headers)
def stream_media_link_list_csv(user_mc_key, filename, topics_id, **kwargs): params = kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'foci_id' in request.args else None, } params.update(merged_args) if 'q' in params: params['q'] = params['q'] if 'q' not in [None, '', 'null', 'undefined'] else None params['limit'] = 1000 # an arbitrary value to let us page through with big topics timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } columns = ['src_media_id', 'src_media_name', 'src_media_url', 'ref_media_id', 'ref_media_name', 'ref_media_url'] return Response(_topic_media_link_list_by_page_as_csv_row(user_mc_key, topics_id, columns, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def stream_story_link_list_csv(user_key, filename, topics_id, **kwargs): params = kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'foci_id' in request.args else None, } params.update(merged_args) if 'q' in params: params['q'] = params['q'] if 'q' not in [None, '', 'null', 'undefined'] else None params['limit'] = 100 # an arbitrary value to let us page through with big topics props = [ 'stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'inlink_count', 'outlink_count' # 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type' ] timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_topic_story_link_list_by_page_as_csv_row(user_key, topics_id, props, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def stream_story_list_csv(user_key, topic, **kwargs): filename = topic['name']+'-stories' has_twitter_data = (topic['ch_monitor_id'] is not None) and (topic['ch_monitor_id'] != 0) # as_attachment = kwargs['as_attachment'] if 'as_attachment' in kwargs else True include_media_metadata = ('media_metadata' in kwargs) and (kwargs['media_metadata'] is True) include_story_tags = ('story_tags' in kwargs) and (kwargs['story_tags'] is True) include_reddit_submissions = ('reddit_submissions' in kwargs) and (kwargs['reddit_submissions'] is True) include_fb_date = kwargs['fb_data'] if 'fb_data' in kwargs else False all_stories = [] params = kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'focusId' in request.args else None, 'q': request.args['q'] if 'q' in request.args else None, 'sort': request.args['sort'] if 'sort' in request.args else None, } params.update(merged_args) story_count = apicache.topic_story_count(user_mediacloud_key(), topic['topics_id'], snapshots_id=params['snapshots_id'], timespans_id=params['timespans_id'], foci_id=params['foci_id'], q=params['q']) logger.info("Total stories to download: {}".format(story_count['count'])) if 'as_attachment' in params: del params['as_attachment'] if 'fb_data' in params: del params['fb_data'] if 'q' in params: params['q'] = params['q'] if 'q' not in [None, '', 'null', 'undefined'] else None params['limit'] = 1000 # an arbitrary value to let us page through with big topics # determine which props the user actually wants to download props = [ 'stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'inlink_count', 'facebook_share_count', ] if has_twitter_data: props.append('simple_tweet_count') if include_reddit_submissions: props.append('reddit_submissions') if include_fb_date: props.append('facebook_collection_date') if include_story_tags: props += ['themes', 'subtopics'] props += ['outlink_count', 'media_inlink_count', 'media_id', 'media_name', 'media_url'] if include_media_metadata: props += ['media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type'] if include_fb_date: all_fb_count = [] more_fb_count = True link_id = 0 local_mc = user_admin_mediacloud_client() while more_fb_count: fb_page = local_mc.topicStoryListFacebookData(topic['topics_id'], limit=100, link_id=link_id) all_fb_count = all_fb_count + fb_page['counts'] if 'next' in fb_page['link_ids']: link_id = fb_page['link_ids']['next'] more_fb_count = True else: more_fb_count = False # now iterate through each list and set up the fb collection date for s in all_stories: for fb_item in all_fb_count: if int(fb_item['stories_id']) == int(s['stories_id']): s['facebook_collection_date'] = fb_item['facebook_api_collect_date'] timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_topic_story_list_by_page_as_csv_row(user_key, topic['topics_id'], props, **params), mimetype='text/csv; charset=utf-8', headers=headers)
def stream_story_list_csv(user_key, filename, topics_id, **kwargs): user_mc = user_mediacloud_client(user_key) topic = user_mc.topic(topics_id) has_twitter_data = topic['ch_monitor_id'] is not None # as_attachment = kwargs['as_attachment'] if 'as_attachment' in kwargs else True include_fb_date = kwargs['fb_data'] if 'fb_data' in kwargs else False all_stories = [] params = kwargs.copy() merged_args = { 'snapshots_id': request.args['snapshotId'], 'timespans_id': request.args['timespanId'], 'foci_id': request.args['focusId'] if 'focusId' in request.args else None, 'q': request.args['q'] if 'q' in request.args else None, 'sort': request.args['sort'] if 'sort' in request.args else None, } params.update(merged_args) story_count = apicache.topic_story_count(user_mediacloud_key(), topics_id, snapshots_id=params['snapshots_id'], timespans_id=params['timespans_id'], foci_id = params['foci_id'], q=params['q']) logger.info("Total stories to download: {}".format(story_count)) if 'as_attachment' in params: del params['as_attachment'] if 'fb_data' in params: del params['fb_data'] if 'q' in params: params['q'] = params['q'] if 'q' not in [None, '', 'null', 'undefined'] else None params['limit'] = 1000 # an arbitrary value to let us page through with big topics # determine which props the user actaully wants to download props = [ 'stories_id', 'publish_date', 'title', 'url', 'language', 'ap_syndicated', 'themes', 'subtopics', 'inlink_count', 'facebook_share_count', # removed media metadata here because it takes too long to query for it # 'media_pub_country', 'media_pub_state', 'media_language', 'media_about_country', 'media_media_type' ] if has_twitter_data: props.append('simple_tweet_count') if include_fb_date: props.append('facebook_collection_date') props += ['outlink_count', 'media_inlink_count', 'media_id', 'media_name', 'media_url'] if include_fb_date: all_fb_count = [] more_fb_count = True link_id = 0 local_mc = user_admin_mediacloud_client() while more_fb_count: fb_page = local_mc.topicStoryListFacebookData(topics_id, limit=100, link_id=link_id) all_fb_count = all_fb_count + fb_page['counts'] if 'next' in fb_page['link_ids']: link_id = fb_page['link_ids']['next'] more_fb_count = True else: more_fb_count = False # now iterate through each list and set up the fb collection date for s in all_stories: for fb_item in all_fb_count: if int(fb_item['stories_id']) == int(s['stories_id']): s['facebook_collection_date'] = fb_item['facebook_api_collect_date'] timestamped_filename = csv.safe_filename(filename) headers = { "Content-Disposition": "attachment;filename=" + timestamped_filename } return Response(_topic_story_list_by_page_as_csv_row(user_key, topics_id, props, **params), mimetype='text/csv; charset=utf-8', headers=headers)