Esempio n. 1
0
def crawl_room(room, http):
    """
    See if there's a subreddit corresponding to this room. If there is,
    fill in the model's moderator list, subscriber count, and shortname_display.
    """
    room.last_crawled = datetime.datetime.now()
    room.save() # Save immediately so even if it errors we don't try again too fast
    subreddit = room.shortname
    log(logger, 'debug', 'Crawling', subreddit)
    url = "http://www.reddit.com/r/%s/about/moderators/" % subreddit
    r = http.request('GET', url)
    if r.status != 200:
        if subreddit not in ['tester']:
            # We know that "tester" gives a 403 for whatever reason
            log(logger, 'error', 'Request got error:', r.status, "on url", url)
        return
    soup = BeautifulSoup(r.data)

    # Check whether subreddit exists:
    if soup.find(id='noresults'):
        r.moderators = r.subscribers = None
        room.save()
        return

    # Get display shortname
    try:
        title = soup.find(id='moderator-table').h1.text
    except AttributeError:
        # We couldn't find the moderator table
        log(logger, 'info', 'Could not find moderator table for:', room.shortname)
        return
    assert title.startswith('moderators of ')
    shortname_display = title.replace('moderators of ', '')
    room.shortname_display = shortname_display

    # Get number of subscribers
    number = soup.find('div', 'side').find('span', 'number').text
    number = int(re.sub('[^0-9]', '', number))
    room.subscribers = number

    # Get moderator list
    mods = soup.find(id='moderator-table').find_all('span', 'user')
    mods = [m.a.text for m in mods]
    room.moderators = mods

    # Get image URL
    room.image_url = soup.find(id='header-img').get('src') or room.image_url or ''

    # Shit we need another URL to get the title
    url = "http://www.reddit.com/r/%s/" % subreddit
    r = http.request('GET', url)
    if r.status != 200:
        log(logger, 'error', 'Request got error:', r.status, "on url", url)
        return
    soup = BeautifulSoup(r.data)
    room.title = soup.title.text

    # Write
    log(logger, 'debug', 'Setting', subreddit, 'to', room.to_dict())
    room.save()
def main():
    room_jids = get_online_rooms()
    log(logger, "debug", "Online jids:", room_jids)
    room_jids = filter(is_subreddit_room, room_jids)
    log(logger, "debug", "Valid online jids:", room_jids)
    for rjid in room_jids:
        room_node, room_host = rjid.split("@")
        assert room_host == XMPP_MUC_DOMAIN
        affiliations = get_mnesia_affiliations(room_node)
        # Make me owner
        if affiliations.get(ME) != OWNER:
            set_room_affiliation(ME, room_node, OWNER)
        # Make moderators admins
        try:
            room = get_room_by_node(room_node)
        except Room.DoesNotExist:
            # Some rooms may not be in the DB. User created (hax),
            # or our DB got emptied.
            continue
        reddit_mods = room.get_reddit_moderators()
        log(logger, "debug", "Current affiliations for room:", room.title, affiliations)
        log(logger, "debug", "Found mods for room:", room.title, reddit_mods)
        # TODO: don't set again if already set
        for mod_username in reddit_mods:
            set_room_affiliation(mod_username, room_node, ADMIN)
Esempio n. 3
0
def crawl_room(room, http):
    """
    See if there's a subreddit corresponding to this room. If there is,
    fill in the model's moderator list, subscriber count, and shortname_display.
    """
    room.last_crawled = datetime.datetime.now()
    room.save()  # Save immediately so even if it errors we don't try again too fast
    subreddit = room.shortname
    log(logger, "debug", "Crawling", subreddit)
    url = "http://www.reddit.com/r/%s/about/moderators/" % subreddit
    r = http.request("GET", url)
    if r.status != 200:
        if subreddit not in ["tester"]:
            # We know that "tester" gives a 403 for whatever reason
            log(logger, "error", "Request got error:", r.status, "on url", url)
        return
    soup = BeautifulSoup(r.data)

    # Check whether subreddit exists:
    if soup.find(id="noresults"):
        r.moderators = r.subscribers = None
        room.save()
        return

    # Get display shortname
    title = soup.find(id="moderator-table").h1.text
    assert title.startswith("moderators of ")
    shortname_display = title.replace("moderators of ", "")
    room.shortname_display = shortname_display

    # Get number of subscribers
    number = soup.find("div", "side").find("span", "number").text
    number = int(re.sub("[^0-9]", "", number))
    room.subscribers = number

    # Get moderator list
    mods = soup.find(id="moderator-table").find_all("span", "user")
    mods = [m.a.text for m in mods]
    room.moderators = mods

    # Get image URL
    room.image_url = soup.find(id="header-img").get("src") or room.image_url or ""

    # Shit we need another URL to get the title
    url = "http://www.reddit.com/r/%s/" % subreddit
    r = http.request("GET", url)
    if r.status != 200:
        log(logger, "error", "Request got error:", r.status, "on url", url)
        return
    soup = BeautifulSoup(r.data)
    room.title = soup.title.text

    # Write
    log(logger, "debug", "Setting", subreddit, "to", room.to_dict())
    room.save()
Esempio n. 4
0
def main():
    rooms_to_crawl = get_rooms_to_crawl()
    log(logger, "debug", "Got rooms to crawl:", rooms_to_crawl)
    crawl_rooms(rooms_to_crawl)
Esempio n. 5
0
def add_rooms(room_names):
    for n in room_names:
        log(logger, 'debug', 'get or create', n)
        Room.get_or_create_by_shortname(n)
def set_room_affiliation(username, room_node, affiliation):
    log(logger, "debug", "Setting room affiliation:", username, room_node, affiliation)
    user_jid = (username + "@" + XMPP_DOMAIN).lower()
    args = ["set_room_affiliation", room_node, XMPP_MUC_DOMAIN, user_jid, affiliation]
    ejabberdctl_command(args)
    room_jids = get_online_rooms()
    log(logger, "debug", "Online jids:", room_jids)
    room_jids = filter(is_subreddit_room, room_jids)
    log(logger, "debug", "Valid online jids:", room_jids)
    for rjid in room_jids:
        room_node, room_host = rjid.split("@")
        assert room_host == XMPP_MUC_DOMAIN
        affiliations = get_mnesia_affiliations(room_node)
        # Make me owner
        if affiliations.get(ME) != OWNER:
            set_room_affiliation(ME, room_node, OWNER)
        # Make moderators admins
        try:
            room = get_room_by_node(room_node)
        except Room.DoesNotExist:
            # Some rooms may not be in the DB. User created (hax),
            # or our DB got emptied.
            continue
        reddit_mods = room.get_reddit_moderators()
        log(logger, "debug", "Current affiliations for room:", room.title, affiliations)
        log(logger, "debug", "Found mods for room:", room.title, reddit_mods)
        # TODO: don't set again if already set
        for mod_username in reddit_mods:
            set_room_affiliation(mod_username, room_node, ADMIN)


if __name__ == "__main__":
    log(logger, "debug", "set_room_affiliations starting")
    main()
    log(logger, "debug", "set_room_affiliations done")
Esempio n. 8
0
def main():
    rooms_to_crawl = get_rooms_to_crawl()
    log(logger, 'debug', 'Got rooms to crawl:', rooms_to_crawl)
    crawl_rooms(rooms_to_crawl)