def archive_topic(self, t): """Scrapes the given topic and saves its posts""" ts = TopicScraper(t, self.gfaqs_client) logger.info("Archiving Topic (%s) started" % t.gfaqs_id) posts_examined, posts_saved = 0, 0 posts = list(ts.retrieve()) for p in reversed(posts): posts_examined += 1 # Check if post exists already in db to determine update or add with transaction.atomic(): try: p_db = Post.objects.filter(topic=t).get(post_num=p.post_num) # we already have the rest of the posts in the db break except ObjectDoesNotExist: p.creator = self.add_user(p.creator) p.save() posts_saved += 1 logger.debug("Added Post %s" % t) throttle_thread() # update poll results if applicable if posts and t.status in Topic.POLL_STATUSES: p = posts[0] p_db = Post.objects.filter(topic=t).get(post_num=p.post_num) p_db.contents = p.contents p_db.save() logger.debug("Updated Post [%s] for poll" % p.topic) logger.debug("Archiving Topic (%s) finished; %s posts examined, %s new" % \ (t.gfaqs_id, posts_examined, posts_saved))
def archive_board(self, b, recursive=True): """ scrapes and saves the topics of a board to the db b: the models.Board to archive recursive: archives the posts of each topic as well """ bs = BoardScraper(b, self.gfaqs_client) logger.info("Archiving Board (%s) started" % b.alias) topics_examined, topics_saved = 0, 0 for t in bs.retrieve(): topics_examined += 1 if t.status in Topic.ARCHIVED_STATUSES: # we reached archived topics; don't continue break try: t_db = Topic.objects.get(gfaqs_id=t.gfaqs_id) t.pk = t_db.pk if t_db.number_of_posts == t.number_of_posts: if t.status in Topic.STICKY_STATUSES: continue else: # this is the first topic that hasn't been updated since # last archive run, so we stop break except ObjectDoesNotExist: t.pk = None with transaction.atomic(): t.creator = self.add_user(t.creator) t.save() topics_saved += 1 logger.debug("Saved topic %s" % t) if recursive: self.pool.add_task(self.archive_topic, t) throttle_thread() logger.info("Archiving Board (%s) finished; %s topics examined, %s new" % \ (b.alias, topics_examined, topics_saved))
def __init__(self, email, password): logger.info("Creating Authenticated GFAQSClient with email=%s" % email) self.opener = build_opener() self.login() logger.info("Logged in as %s" % settings.GFAQS_LOGIN_EMAIL)