def collectTweets(self, twitterhandle): """ Collects all/latest tweets Saves tweets to the database. @param twitterhandle {String} name of users twitter handle """ if not self.signedIn: self.signin() if '@' in twitterhandle: twitterhandle = twitterhandle.split('@')[1] # Store User Handle handles = database_commands.getHandler(twitterhandle) if len(handles) < 1: self.saveTwitterHandle(twitterhandle) sleep(1) self.liveSearch('from:' + twitterhandle) boxInd = 0 lastNumBoxes = 0 self.tweetboxes = self._loadAllTweets(numTimes=1) numBoxes = len(self.tweetboxes) inserted = True while lastNumBoxes != numBoxes and inserted: while boxInd < len(self.tweetboxes) and inserted: tweetbox = self.tweetboxes[boxInd] try: tweet = self._getTweetStats(tweetbox) tweet['handle'] = twitterhandle inserted = database_commands.insertTweet(tweet) except: inserted = True boxInd += 1 if inserted: self.tweetboxes = self._loadAllTweets(numTimes=5) lastNumBoxes = numBoxes numBoxes = len(self.tweetboxes) self.logger.info('Completed ' + str(boxInd) + ' tweets, loaded ' + str(numBoxes-lastNumBoxes) + ' more tweets for ' + twitterhandle) self.logger.info( 'Inserted ' + str(boxInd - 1) + ' tweets for ' + twitterhandle)
def collectAllTweets(self, twitterhandle): """ Collects all tweets Saves tweets to the database. Continues searching until it finds no *new* tweets for 6 consecutive months. @param twitterhandle {String} name of users twitter handle """ if not self.signedIn: self.signin() if '@' in twitterhandle: twitterhandle = twitterhandle.split('@')[1] # Store User Handle handles = database_commands.getHandler(twitterhandle) if len(handles) < 1: self.saveTwitterHandle(twitterhandle) sleep(1) numZeros = 0 totalInserted = 0 twitterDates = utils.allTwitterDates drySpan = 6 if self.phantom: twitterDates = utils.allTwitterDatesByDay drySpan = 180 for i in range(len(twitterDates)-2,-1,-1): try: cmd = 'from:' + twitterhandle + ' since:' + twitterDates[i] + ' until:' + twitterDates[i+1] self.logger.info('searching: "' + cmd + "'") self.liveSearch(cmd) boxInd = 0 lastNumBoxes = 0 self.tweetboxes = self._loadAllTweets(numTimes=5) numBoxes = len(self.tweetboxes) numInserted = 0 while lastNumBoxes != numBoxes: while boxInd < len(self.tweetboxes): tweetbox = self.tweetboxes[boxInd] inserted = False try: tweet = self._getTweetStats(tweetbox) tweet['handle'] = twitterhandle inserted = database_commands.insertTweet(tweet,insertDuplicates=False) if inserted: numInserted += 1 except: pass boxInd += 1 self.tweetboxes = self._loadAllTweets(numTimes=5) lastNumBoxes = numBoxes numBoxes = len(self.tweetboxes) self.logger.info('Inserted ' + str(numInserted) + ' tweets, loaded ' + str(numBoxes-lastNumBoxes) + ' more tweets for ' + twitterhandle) if numInserted == 0: numZeros += 1 else: numZeros = 0 if numZeros > drySpan: break totalInserted += numInserted self.logger.info('Inserted ' + str(totalInserted) + ' TOTAL tweets for ' + twitterhandle + '. Zeros in-a-row: ' + str(numZeros)) except Exception as e: traceback.print_exc() traceback.print_stack() self.logger.error('Error scrolling through tweets!') self.logger.error(e)