def crowdtangle_lists(pool, token=None, format='csv_dict_row'): if token is None: raise CrowdTangleMissingTokenError if format not in CROWDTANGLE_OUTPUT_FORMATS: raise TypeError('minet.crowdtangle.lists: unkown `format`.') # Fetching api_url = URL_TEMPLATE % token err, response, data = request_json(api_url, pool=pool) if err is not None: raise err if response.status == 401: raise CrowdTangleInvalidTokenError if response.status >= 400: raise CrowdTangleInvalidRequestError(api_url) lists = nested_get(['result', 'lists'], data) if format == 'csv_dict_row': return [format_list(l, as_dict=True) for l in lists] elif format == 'csv_row': return [format_list(l) for l in lists] return lists
def crowdtangle_post(pool, post_id, token=None, format='csv_dict_row'): if token is None: raise CrowdTangleMissingTokenError if format not in CROWDTANGLE_OUTPUT_FORMATS: raise TypeError('minet.crowdtangle.post: unkown `format`.') # Fetching api_url = URL_TEMPLATE % (post_id, token) err, response, data = request_json(api_url, pool=pool) if err is not None: raise err if response.status == 401: raise CrowdTangleInvalidTokenError if response.status >= 400: raise CrowdTangleInvalidRequestError(api_url) post = nested_get(['result', 'posts', 0], data) if post is None: return if format == 'csv_dict_row': return format_post(post, as_dict=True) elif format == 'csv_row': return format_post(post) return post
def crowdtangle_post(pool, post_id, token=None, raw=False): if token is None: raise CrowdTangleMissingTokenError # Fetching api_url = URL_TEMPLATE % (post_id, token) err, response, data = request_json(api_url, pool=pool) if err is not None: raise err if response.status == 401: raise CrowdTangleInvalidTokenError if response.status >= 400: raise CrowdTangleInvalidRequestError(api_url) post = getpath(data, ['result', 'posts', 0]) if post is None: return if not raw: return format_post(post) return post
def mediacloud_topic_stories(pool, token, topic_id, link_id=None, media_id=None, from_media_id=None, raw=False): while True: url = url_forge( token, topic_id=topic_id, link_id=link_id, media_id=media_id, from_media_id=from_media_id, ) err, _, data = request_json(url, pool=pool) if err: raise err if 'stories' not in data or len(data['stories']) == 0: return next_link_id = get_next_link_id(data) for story in data['stories']: if not raw: story = format_topic_story(story, next_link_id) yield story if next_link_id is None: return link_id = next_link_id
def crowdtangle_lists(pool, token=None, raw=False): if token is None: raise CrowdTangleMissingTokenError # Fetching api_url = URL_TEMPLATE % token err, response, data = request_json(api_url, pool=pool) if err is not None: raise err if response.status == 401: raise CrowdTangleInvalidTokenError if response.status >= 400: raise CrowdTangleInvalidRequestError(api_url) lists = getpath(data, ['result', 'lists']) if not raw: return [format_list(l) for l in lists] return lists
def crowdtangle_summary(pool, link, token=None, start_date=None, with_top_posts=False, sort_by=CROWDTANGLE_SUMMARY_DEFAULT_SORT_TYPE, format='csv_dict_row', platforms=None): if token is None: raise CrowdTangleMissingTokenError if format not in CROWDTANGLE_OUTPUT_FORMATS: raise TypeError('minet.crowdtangle.summary: unkown `format`.') if not isinstance(start_date, str): raise TypeError('minet.crowdtangle.summary: expecting a `start_date` kwarg.') if sort_by not in CROWDTANGLE_SUMMARY_SORT_TYPES: raise TypeError('minet.crowdtangle.summary: unknown `sort_by`.') # Fetching api_url = url_forge( link, token, start_date, sort_by, platforms, with_top_posts ) err, response, data = request_json(api_url, pool=pool) if err is not None: raise err if response.status == 401: raise CrowdTangleInvalidTokenError if response.status >= 400: raise CrowdTangleInvalidRequestError(api_url) stats = nested_get(['result', 'summary', 'facebook'], data) posts = nested_get(['result', 'posts'], data) if with_top_posts else None if stats is not None: if format == 'csv_dict_row': stats = format_summary(stats, as_dict=True) elif format == 'csv_row': stats = format_summary(stats) if not with_top_posts: return stats else: if posts is not None: if format == 'csv_dict_row': posts = [format_post(post, as_dict=True) for post in posts] elif format == 'csv_row': posts = [format_post(post) for post in posts] return stats, posts
def crowdtangle_summary(pool, link, token=None, start_date=None, with_top_posts=False, sort_by=CROWDTANGLE_SUMMARY_DEFAULT_SORT_TYPE, raw=False, platforms=None): if token is None: raise CrowdTangleMissingTokenError if not isinstance(start_date, str): raise TypeError( 'minet.crowdtangle.summary: expecting a `start_date` kwarg.') if sort_by not in CROWDTANGLE_SUMMARY_SORT_TYPES: raise TypeError('minet.crowdtangle.summary: unknown `sort_by`.') # Fetching api_url = url_forge(link, token, start_date, sort_by, platforms, with_top_posts) err, response, data = request_json(api_url, pool=pool) if err is not None: raise err if response.status == 401: raise CrowdTangleInvalidTokenError if response.status >= 400: raise CrowdTangleInvalidRequestError(api_url) stats = getpath(data, ['result', 'summary', 'facebook']) posts = getpath(data, ['result', 'posts']) if with_top_posts else None if stats is not None: if not raw: stats = format_summary(stats) if not with_top_posts: return stats else: if not raw: posts = [format_post(post, link=link) for post in posts] return stats, posts
def request_json(self, url): err, response, data = request_json( url, pool=self.pool, spoof_ua=True, headers={'Cookie': self.cookie} ) if err: raise err if response.status >= 400: raise InstagramPublicAPIInvalidResponseError return data
def make_simple_call(pool, token, route, formatter, format='csv_dict_row', arg=None, query=None, single=False): url = MEDIACLOUD_API_BASE_URL + route if arg is not None: url += '/' + str(arg) url += '?key=%s' % token if query is not None: url += '&' + ('&'.join('%s=%s' % (str(k), str(v)) for k, v in query.items())) err, response, data = request_json(url, pool=pool) if err: raise err if response.status >= 500: raise MediacloudServerError(server_error=data.get('error')) results = [] for item in data: if format == 'csv_dict_row': item = formatter(item, as_dict=True) elif format == 'csv_row': item = formatter(item) results.append(item) if single: return results[0] return results
def generator(): last_processed_stories_id = None while True: url = url_forge( token, query, collections=collections, medias=medias, publish_day=publish_day, publish_month=publish_month, publish_year=publish_year, count=count, last_processed_stories_id=last_processed_stories_id) err, response, data = request_json(url, pool=pool) if err: raise err if response.status >= 500: raise MediacloudServerError(server_error=data.get('error')) if count: yield data['count'] return for story in data: if format == 'csv_dict_row': yield format_story(story, as_dict=True) elif format == 'csv_row': yield format_story(story) else: yield story last_processed_stories_id = get_last_processed_stories_id(data) if last_processed_stories_id is None: return
def mediacloud_topic_stories(pool, token, topic_id, link_id=None, media_id=None, from_media_id=None, format='csv_dict_row'): while True: url = url_forge( token, topic_id=topic_id, link_id=link_id, media_id=media_id, from_media_id=from_media_id, ) err, _, data = request_json(url, pool=pool) if err: raise err if 'stories' not in data or len(data['stories']) == 0: return next_link_id = get_next_link_id(data) for story in data['stories']: if format == 'csv_dict_row': yield format_topic_story(story, next_link_id, as_dict=True) elif format == 'csv_row': yield format_topic_story(story, next_link_id) else: yield story if next_link_id is None: return link_id = next_link_id
def request_json(self, url): err, response, data = request_json(url, pool=self.pool) if err: raise err if response.status == 403: sleep_time = seconds_to_midnight_pacific_time() + 10 if callable(self.before_sleep): self.before_sleep(sleep_time) time.sleep(sleep_time) return self.request_json(url) if response.status >= 400: if data is not None and 'API key not valid' in nested_get(['error', 'message'], data, ''): raise YouTubeInvalidAPIKeyError raise YouTubeInvalidAPICall(url, response.status, data) return data
def request_json(self, url, headers=None): return request_json(url, pool=self.pool, spoof_ua=True, headers=headers)