Esempio n. 1
0
def fetch_listing(path, limit=1000, batch_size=100):
    """Fetch a reddit listing from reddit.com."""

    session = requests.Session()
    session.headers.update({
        "User-Agent": "reddit-test-data-generator/1.0",
    })

    base_url = "https://api.reddit.com" + path

    after = None
    count = 0
    while count < limit:
        params = {"limit": batch_size, "count": count}
        if after:
            params["after"] = after

        print "> {}-{}".format(count, count+batch_size)
        response = session.get(base_url, params=params)
        response.raise_for_status()

        listing = get_requests_resp_json(response)["data"]
        for child in listing["children"]:
            yield child["data"]
            count += 1

        after = listing["after"]
        if not after:
            break

        # obey reddit.com's ratelimits
        # see: https://github.com/reddit/reddit/wiki/API#rules
        time.sleep(2)
Esempio n. 2
0
def fetch_listing(path, limit=1000, batch_size=100):
    """Fetch a listing from reddit.com."""

    session = requests.Session()
    session.headers.update({
        "User-Agent": "saidit-test-data-generator/1.0",
    })

    base_url = "https://api.reddit.com" + path

    after = None
    count = 0
    while count < limit:
        params = {"limit": batch_size, "count": count}
        if after:
            params["after"] = after

        print "> {}-{}".format(count, count + batch_size)
        response = session.get(base_url, params=params)
        response.raise_for_status()

        listing = get_requests_resp_json(response)["data"]
        for child in listing["children"]:
            yield child["data"]
            count += 1

        after = listing["after"]
        if not after:
            break

        # obey reddit.com's ratelimits
        # see: https://github.com/reddit-archive/reddit/wiki/API#rules
        time.sleep(2)
Esempio n. 3
0
def fetch_listing(path, limit=1000, batch_size=100):
    """Fetch a reddit listing from reddit.com."""

    session = requests.Session()
    session.headers.update({
        "User-Agent": "reddit-test-data-generator/1.0",
    })

    base_url = "https://api.reddit.com" + path

    after = None
    count = 0
    while count < limit:
        params = {"limit": batch_size, "count": count}
        if after:
            params["after"] = after

        print "> {}-{}".format(count, count + batch_size)

        try:
            response = session.get(base_url, params=params)
            response.raise_for_status()
        except requests.exceptions.HTTPError as ex:
            print "Exception fetching (%r), continuing on" % (ex, )
            # just take the data we have
            return

        listing = get_requests_resp_json(response)["data"]
        for child in listing["children"]:
            yield child["data"]
            count += 1

        after = listing["after"]
        if not after:
            break

        # obey reddit.com's ratelimits
        # see: https://github.com/reddit/reddit/wiki/API#rules
        time.sleep(2)
Esempio n. 4
0
def _fetch_embedly_service_data():
    resp = requests.get("https://api.embed.ly/1/services/python")
    return get_requests_resp_json(resp)
Esempio n. 5
0
def _fetch_embedly_service_data():
    resp = requests.get("https://api.embed.ly/1/services/python")
    return get_requests_resp_json(resp)