Esempio n. 1
0
 def run(self):
     max_id = self.get_max_id()
     count = 0
     for keyword in self.keywords:
         while True:
             max_id = self.get_max_id()
             tweets = self.get_search(keyword, 10, max_id)
             if not len(tweets):
                 self.logging.debug('INFO: no new tweets for {keyword}'.format(keyword=keyword))
                 break
             for tweet in tweets:
                 item = Tweet(tweet._json)
                 count += 1
                 item_ready = item.format_save()
                 # with open('data.json', 'w') as outfile:
                 #     json.dump(item_ready, outfile)
                 #     sys.exit(0)
                 self.save_tweet(item_ready)
     self.logging.debug('END: {nb} tweets were crawled'.format(nb=count))
     print(count, " tweets were crawled")
Esempio n. 2
0
 def refresher(self, item):
     del item["_id"]
     tweet = Tweet(item.copy())
     item = tweet.format_save()
     print ("Updated tweet {id}".format(id=item["id"]))
     return self.save_tweet(item)