Example #1
0
def crowdtangle_lists(pool, token=None, format='csv_dict_row'):

    if token is None:
        raise CrowdTangleMissingTokenError

    if format not in CROWDTANGLE_OUTPUT_FORMATS:
        raise TypeError('minet.crowdtangle.lists: unkown `format`.')

    # Fetching
    api_url = URL_TEMPLATE % token

    err, response, data = request_json(api_url, pool=pool)

    if err is not None:
        raise err

    if response.status == 401:
        raise CrowdTangleInvalidTokenError

    if response.status >= 400:
        raise CrowdTangleInvalidRequestError(api_url)

    lists = nested_get(['result', 'lists'], data)

    if format == 'csv_dict_row':
        return [format_list(l, as_dict=True) for l in lists]
    elif format == 'csv_row':
        return [format_list(l) for l in lists]

    return lists
Example #2
0
def crowdtangle_post(pool, post_id, token=None, format='csv_dict_row'):

    if token is None:
        raise CrowdTangleMissingTokenError

    if format not in CROWDTANGLE_OUTPUT_FORMATS:
        raise TypeError('minet.crowdtangle.post: unkown `format`.')

    # Fetching
    api_url = URL_TEMPLATE % (post_id, token)

    err, response, data = request_json(api_url, pool=pool)

    if err is not None:
        raise err

    if response.status == 401:
        raise CrowdTangleInvalidTokenError

    if response.status >= 400:
        raise CrowdTangleInvalidRequestError(api_url)

    post = nested_get(['result', 'posts', 0], data)

    if post is None:
        return

    if format == 'csv_dict_row':
        return format_post(post, as_dict=True)
    elif format == 'csv_row':
        return format_post(post)

    return post
Example #3
0
def crowdtangle_post(pool, post_id, token=None, raw=False):

    if token is None:
        raise CrowdTangleMissingTokenError

    # Fetching
    api_url = URL_TEMPLATE % (post_id, token)

    err, response, data = request_json(api_url, pool=pool)

    if err is not None:
        raise err

    if response.status == 401:
        raise CrowdTangleInvalidTokenError

    if response.status >= 400:
        raise CrowdTangleInvalidRequestError(api_url)

    post = getpath(data, ['result', 'posts', 0])

    if post is None:
        return

    if not raw:
        return format_post(post)

    return post
Example #4
0
def mediacloud_topic_stories(pool, token, topic_id, link_id=None, media_id=None,
                             from_media_id=None, raw=False):

    while True:
        url = url_forge(
            token,
            topic_id=topic_id,
            link_id=link_id,
            media_id=media_id,
            from_media_id=from_media_id,
        )

        err, _, data = request_json(url, pool=pool)

        if err:
            raise err

        if 'stories' not in data or len(data['stories']) == 0:
            return

        next_link_id = get_next_link_id(data)

        for story in data['stories']:
            if not raw:
                story = format_topic_story(story, next_link_id)

            yield story

        if next_link_id is None:
            return

        link_id = next_link_id
Example #5
0
def crowdtangle_lists(pool, token=None, raw=False):

    if token is None:
        raise CrowdTangleMissingTokenError

    # Fetching
    api_url = URL_TEMPLATE % token

    err, response, data = request_json(api_url, pool=pool)

    if err is not None:
        raise err

    if response.status == 401:
        raise CrowdTangleInvalidTokenError

    if response.status >= 400:
        raise CrowdTangleInvalidRequestError(api_url)

    lists = getpath(data, ['result', 'lists'])

    if not raw:
        return [format_list(l) for l in lists]

    return lists
Example #6
0
def crowdtangle_summary(pool, link, token=None, start_date=None, with_top_posts=False,
                        sort_by=CROWDTANGLE_SUMMARY_DEFAULT_SORT_TYPE, format='csv_dict_row', platforms=None):

    if token is None:
        raise CrowdTangleMissingTokenError

    if format not in CROWDTANGLE_OUTPUT_FORMATS:
        raise TypeError('minet.crowdtangle.summary: unkown `format`.')

    if not isinstance(start_date, str):
        raise TypeError('minet.crowdtangle.summary: expecting a `start_date` kwarg.')

    if sort_by not in CROWDTANGLE_SUMMARY_SORT_TYPES:
        raise TypeError('minet.crowdtangle.summary: unknown `sort_by`.')

    # Fetching
    api_url = url_forge(
        link,
        token,
        start_date,
        sort_by,
        platforms,
        with_top_posts
    )

    err, response, data = request_json(api_url, pool=pool)

    if err is not None:
        raise err

    if response.status == 401:
        raise CrowdTangleInvalidTokenError

    if response.status >= 400:
        raise CrowdTangleInvalidRequestError(api_url)

    stats = nested_get(['result', 'summary', 'facebook'], data)
    posts = nested_get(['result', 'posts'], data) if with_top_posts else None

    if stats is not None:
        if format == 'csv_dict_row':
            stats = format_summary(stats, as_dict=True)
        elif format == 'csv_row':
            stats = format_summary(stats)

    if not with_top_posts:
        return stats

    else:
        if posts is not None:
            if format == 'csv_dict_row':
                posts = [format_post(post, as_dict=True) for post in posts]
            elif format == 'csv_row':
                posts = [format_post(post) for post in posts]

        return stats, posts
Example #7
0
def crowdtangle_summary(pool,
                        link,
                        token=None,
                        start_date=None,
                        with_top_posts=False,
                        sort_by=CROWDTANGLE_SUMMARY_DEFAULT_SORT_TYPE,
                        raw=False,
                        platforms=None):

    if token is None:
        raise CrowdTangleMissingTokenError

    if not isinstance(start_date, str):
        raise TypeError(
            'minet.crowdtangle.summary: expecting a `start_date` kwarg.')

    if sort_by not in CROWDTANGLE_SUMMARY_SORT_TYPES:
        raise TypeError('minet.crowdtangle.summary: unknown `sort_by`.')

    # Fetching
    api_url = url_forge(link, token, start_date, sort_by, platforms,
                        with_top_posts)

    err, response, data = request_json(api_url, pool=pool)

    if err is not None:
        raise err

    if response.status == 401:
        raise CrowdTangleInvalidTokenError

    if response.status >= 400:
        raise CrowdTangleInvalidRequestError(api_url)

    stats = getpath(data, ['result', 'summary', 'facebook'])
    posts = getpath(data, ['result', 'posts']) if with_top_posts else None

    if stats is not None:
        if not raw:
            stats = format_summary(stats)

    if not with_top_posts:
        return stats

    else:
        if not raw:
            posts = [format_post(post, link=link) for post in posts]

        return stats, posts
Example #8
0
    def request_json(self, url):
        err, response, data = request_json(
            url,
            pool=self.pool,
            spoof_ua=True,
            headers={'Cookie': self.cookie}
        )

        if err:
            raise err

        if response.status >= 400:
            raise InstagramPublicAPIInvalidResponseError

        return data
Example #9
0
def make_simple_call(pool,
                     token,
                     route,
                     formatter,
                     format='csv_dict_row',
                     arg=None,
                     query=None,
                     single=False):
    url = MEDIACLOUD_API_BASE_URL + route

    if arg is not None:
        url += '/' + str(arg)

    url += '?key=%s' % token

    if query is not None:
        url += '&' + ('&'.join('%s=%s' % (str(k), str(v))
                               for k, v in query.items()))

    err, response, data = request_json(url, pool=pool)

    if err:
        raise err

    if response.status >= 500:
        raise MediacloudServerError(server_error=data.get('error'))

    results = []

    for item in data:
        if format == 'csv_dict_row':
            item = formatter(item, as_dict=True)
        elif format == 'csv_row':
            item = formatter(item)

        results.append(item)

    if single:
        return results[0]

    return results
Example #10
0
    def generator():
        last_processed_stories_id = None

        while True:
            url = url_forge(
                token,
                query,
                collections=collections,
                medias=medias,
                publish_day=publish_day,
                publish_month=publish_month,
                publish_year=publish_year,
                count=count,
                last_processed_stories_id=last_processed_stories_id)

            err, response, data = request_json(url, pool=pool)

            if err:
                raise err

            if response.status >= 500:
                raise MediacloudServerError(server_error=data.get('error'))

            if count:
                yield data['count']
                return

            for story in data:
                if format == 'csv_dict_row':
                    yield format_story(story, as_dict=True)
                elif format == 'csv_row':
                    yield format_story(story)
                else:
                    yield story

            last_processed_stories_id = get_last_processed_stories_id(data)

            if last_processed_stories_id is None:
                return
Example #11
0
def mediacloud_topic_stories(pool,
                             token,
                             topic_id,
                             link_id=None,
                             media_id=None,
                             from_media_id=None,
                             format='csv_dict_row'):

    while True:
        url = url_forge(
            token,
            topic_id=topic_id,
            link_id=link_id,
            media_id=media_id,
            from_media_id=from_media_id,
        )

        err, _, data = request_json(url, pool=pool)

        if err:
            raise err

        if 'stories' not in data or len(data['stories']) == 0:
            return

        next_link_id = get_next_link_id(data)

        for story in data['stories']:
            if format == 'csv_dict_row':
                yield format_topic_story(story, next_link_id, as_dict=True)
            elif format == 'csv_row':
                yield format_topic_story(story, next_link_id)
            else:
                yield story

        if next_link_id is None:
            return

        link_id = next_link_id
Example #12
0
    def request_json(self, url):
        err, response, data = request_json(url, pool=self.pool)

        if err:
            raise err

        if response.status == 403:
            sleep_time = seconds_to_midnight_pacific_time() + 10

            if callable(self.before_sleep):
                self.before_sleep(sleep_time)

            time.sleep(sleep_time)

            return self.request_json(url)

        if response.status >= 400:
            if data is not None and 'API key not valid' in nested_get(['error', 'message'], data, ''):
                raise YouTubeInvalidAPIKeyError

            raise YouTubeInvalidAPICall(url, response.status, data)

        return data
Example #13
0
 def request_json(self, url, headers=None):
     return request_json(url,
                         pool=self.pool,
                         spoof_ua=True,
                         headers=headers)