コード例 #1
0
def main(notify):

    session = Session()
    total = session.query(Subreddit).count()
    count = 0

    notify("starting update of %d subs" % total)

    query   = session.query(Subreddit).order_by("scraped_time asc")
    dbi     = DBIterator(query=query, use_offset=None)

    for subreddit in dbi.results_iter():

        count += 1

        try:
            subreddit.update_from_praw(r.get_subreddit(subreddit.url.split('/')[2]))
            session.add(subreddit)

        except (praw.requests.exceptions.HTTPError, praw.errors.InvalidSubreddit) as e:
            print "ERROR", str(e)
            subreddit.touch()
            session.add(subreddit)

        if count % 2000 == 0 and notify is not None:
            notify("at %d of %d" % (count, total))

        if count % 10 == 0:
            session.commit()

    session.commit()
コード例 #2
0
ファイル: get_subreddits.py プロジェクト: TomDunn/RedditGraph
def get_subreddit(display_name):
    try:
        subreddit = r.get_subreddit(display_name)
        subreddit.title # force praw to load
        return subreddit._json_data
    except praw.errors.InvalidSubreddit:
        return None
    except praw.requests.exceptions.HTTPError as e:
        if Util.is_400_exception(e):
            return None
        else:
            return False
コード例 #3
0
ファイル: get_posts.py プロジェクト: TomDunn/RedditGraph
def main(notify):
    session = Session()
    gen = r.get_subreddit("all").get_top(limit=3000)

    start = session.query(Post).count()
    notify("Getting posts, initial count: %d" % start)
    count = 0

    for post in gen:
        count += 1

        p = Post.get_or_create(session, post.id)
        p.update_from_praw(post)

        author_name = Util.patch_author(post.author)
        Util.update_user(User, p, session, author_name)
        Util.update_subreddit(Subreddit, p, session, post.subreddit.display_name)

        session.add(p)
        session.commit()

    diff = session.query(Post).count() - start
    notify("Added %d posts" % diff)
コード例 #4
0
ファイル: get_posts.py プロジェクト: TomDunn/RedditGraph
def get_submissions(subreddit_name, limit=50):
    gen = r.get_subreddit(subreddit_name).get_top(limit=limit)
    return [sub._json_data for sub in gen]