def read_posts(self): posts = [] number_per_request = LIMIT_THRESHOLD - 1 index = 0 days_done = False while True: limit = min( self.limit - len(posts), number_per_request) if self.limit else number_per_request if index > 0: limit = limit + 1 last_post = new_posts[-1] else: last_post = None res = self.read_posts_with_limit(limit, last_post) new_posts = res["posts"] days_done = res["days_done"] # remove the head because it's the end of last request if index > 0: new_posts.pop(0) if len(new_posts) > 0: posts.extend(new_posts) else: break # if time exceeds, stop if days_done: if settings.is_debug(): print('{} posts in {} days are fetched'.format( len(posts), self.days)) break # if limit exceeds, stop index += 1 if self.limit and self.limit <= len(posts): if settings.is_debug(): print('{} posts of {} target posts are fetched'.format( len(posts), self.limit)) break print('{} posts are fetched'.format(len(posts))) return posts
def subset(self, item, keys=[]): for key in list(item.keys()): if key not in keys: del item[key] if u'url' in item: item[u'url'] = STEEM_HOST + item[u'url'] else: if settings.is_debug(): logger.info("the item may have issue: {} with keys: {}".format( item, list(item.keys()))) return item