def run(self): max_id = self.get_max_id() count = 0 for keyword in self.keywords: while True: max_id = self.get_max_id() tweets = self.get_search(keyword, 10, max_id) if not len(tweets): self.logging.debug('INFO: no new tweets for {keyword}'.format(keyword=keyword)) break for tweet in tweets: item = Tweet(tweet._json) count += 1 item_ready = item.format_save() # with open('data.json', 'w') as outfile: # json.dump(item_ready, outfile) # sys.exit(0) self.save_tweet(item_ready) self.logging.debug('END: {nb} tweets were crawled'.format(nb=count)) print(count, " tweets were crawled")
def refresher(self, item): del item["_id"] tweet = Tweet(item.copy()) item = tweet.format_save() print ("Updated tweet {id}".format(id=item["id"])) return self.save_tweet(item)