Esempio n. 1
0
    def archive_topic(self, t):
        """Scrapes the given topic and saves its posts"""
        ts = TopicScraper(t, self.gfaqs_client)
        logger.info("Archiving Topic (%s) started" % t.gfaqs_id)
        posts_examined, posts_saved = 0, 0

        posts = list(ts.retrieve())

        for p in reversed(posts):
            posts_examined += 1
            # Check if post exists already in db to determine update or add
            with transaction.atomic():
                try:
                    p_db = Post.objects.filter(topic=t).get(post_num=p.post_num)
                    # we already have the rest of the posts in the db
                    break
                except ObjectDoesNotExist:
                    p.creator = self.add_user(p.creator)
                    p.save()
                    posts_saved += 1
                    logger.debug("Added Post %s" % t)
            throttle_thread()

        # update poll results if applicable
        if posts and t.status in Topic.POLL_STATUSES:
            p = posts[0]
            p_db = Post.objects.filter(topic=t).get(post_num=p.post_num)
            p_db.contents = p.contents
            p_db.save()
            logger.debug("Updated Post [%s] for poll" % p.topic)

        logger.debug("Archiving Topic (%s) finished; %s posts examined, %s new" % \
            (t.gfaqs_id, posts_examined, posts_saved))
Esempio n. 2
0
    def archive_board(self, b, recursive=True):
        """ scrapes and saves the topics of a board to the db

            b: the models.Board to archive
            recursive: archives the posts of each topic as well
        """
        bs = BoardScraper(b, self.gfaqs_client)
        logger.info("Archiving Board (%s) started" % b.alias)
        topics_examined, topics_saved = 0, 0

        for t in bs.retrieve():
            topics_examined += 1
            if t.status in Topic.ARCHIVED_STATUSES:
                # we reached archived topics; don't continue
                break
            try:
                t_db = Topic.objects.get(gfaqs_id=t.gfaqs_id)
                t.pk = t_db.pk
                if t_db.number_of_posts == t.number_of_posts:
                    if t.status in Topic.STICKY_STATUSES:
                        continue
                    else:
                        # this is the first topic that hasn't been updated since
                        # last archive run, so we stop
                        break
            except ObjectDoesNotExist:
                t.pk = None

            with transaction.atomic():
                t.creator = self.add_user(t.creator)
                t.save()
                topics_saved += 1
                logger.debug("Saved topic %s" % t)

                if recursive:
                    self.pool.add_task(self.archive_topic, t)
                throttle_thread()

        logger.info("Archiving Board (%s) finished; %s topics examined, %s new" % \
                (b.alias, topics_examined, topics_saved))
Esempio n. 3
0
 def __init__(self, email, password):
     logger.info("Creating Authenticated GFAQSClient with email=%s" % email)
     self.opener = build_opener()
     self.login()
     logger.info("Logged in as %s" % settings.GFAQS_LOGIN_EMAIL)