def main(notify):

    session = Session()
    total = session.query(Subreddit).count()
    count = 0

    notify("starting update of %d subs" % total)

    query   = session.query(Subreddit).order_by("scraped_time asc")
    dbi     = DBIterator(query=query, use_offset=None)

    for subreddit in dbi.results_iter():

        count += 1

        try:
            subreddit.update_from_praw(r.get_subreddit(subreddit.url.split('/')[2]))
            session.add(subreddit)

        except (praw.requests.exceptions.HTTPError, praw.errors.InvalidSubreddit) as e:
            print "ERROR", str(e)
            subreddit.touch()
            session.add(subreddit)

        if count % 2000 == 0 and notify is not None:
            notify("at %d of %d" % (count, total))

        if count % 10 == 0:
            session.commit()

    session.commit()
def main(notify):

    parser = HTMLParser()
    session = Session()

    subreddit_count = session.query(Subreddit).count()
    start_count = session.query(DiscoveredSub).count()
    notify("discovering from %d exiting" % subreddit_count)

    discovered_subs = set()
    query = session.query(Subreddit.description_html).filter(Subreddit.description_html != None)
    dbi = DBIterator(query=query)

    for sub in dbi.results_iter():
        links = set(map(lambda s: u'/r/' + s.lower().strip() + u'/', find_sub_links(parser.unescape(sub.description_html))))

        if len(links) == 0:
            continue

        existing = set(map(lambda s: s.url.lower().strip(), session.query(Subreddit.url).filter(Subreddit.url.in_(links))))
        found    = set(map(lambda s: s.url.lower().strip(), session.query(DiscoveredSub.url).filter(DiscoveredSub.url.in_(links))))
        new_subs = (links - existing) - found

        if len(new_subs) > 0:
            discovered_subs.update(new_subs)

        if len(discovered_subs) > 25:
            add_new_subs(session, discovered_subs)

    if len(discovered_subs) > 0:
        add_new_subs(session, discovered_subs)

    end_count = session.query(DiscoveredSub).count()
    notify("found additional %d" % (end_count - start_count))
def main(notify):

    g = nx.Graph()
    out_filename = "data/subreddits_edged_by_description_links.gexf"
    parser = HTMLParser()
    session = Session()
    query = session.query(Subreddit)
    dbi = DBIterator(query=query)

    for subreddit in dbi.results_iter():
        sub = subreddit.url.split("/")[2].lower()

        initialize_node(g, sub)

        if not subreddit.description_html:
            continue

        html = parser.unescape(subreddit.description_html)
        for linked_sub in find_sub_links(html):
            if g.has_edge(sub, linked_sub):
                g[sub][linked_sub]["weight"] += 1
            else:
                g.add_edge(sub, linked_sub, weight=1)

    nx.write_gexf(g, out_filename)