Ejemplo n.º 1
0
    def read_posts(self):
        posts = []
        number_per_request = LIMIT_THRESHOLD - 1
        index = 0
        days_done = False

        while True:
            limit = min(
                self.limit - len(posts),
                number_per_request) if self.limit else number_per_request
            if index > 0:
                limit = limit + 1
                last_post = new_posts[-1]
            else:
                last_post = None

            res = self.read_posts_with_limit(limit, last_post)
            new_posts = res["posts"]
            days_done = res["days_done"]

            # remove the head because it's the end of last request
            if index > 0:
                new_posts.pop(0)
            if len(new_posts) > 0:
                posts.extend(new_posts)
            else:
                break

            # if time exceeds, stop
            if days_done:
                if settings.is_debug():
                    print('{} posts in {} days are fetched'.format(
                        len(posts), self.days))
                break

            # if limit exceeds, stop
            index += 1
            if self.limit and self.limit <= len(posts):
                if settings.is_debug():
                    print('{} posts of {} target posts are fetched'.format(
                        len(posts), self.limit))
                break

        print('{} posts are fetched'.format(len(posts)))
        return posts
Ejemplo n.º 2
0
 def subset(self, item, keys=[]):
     for key in list(item.keys()):
         if key not in keys:
             del item[key]
     if u'url' in item:
         item[u'url'] = STEEM_HOST + item[u'url']
     else:
         if settings.is_debug():
             logger.info("the item may have issue: {} with keys: {}".format(
                 item, list(item.keys())))
     return item