def post(self): try: self.open() self.tweets = [] self.lshj = LshJaccard(num_bands=20, rows_per_band=10) except: self.session['tw_auth'] = None self.redirect('/') return while True: try: for shingles_list, original_document in shingle_generator(self.get_next()): # get minhash signatures for each shingle list min_hash_signatures = minhash.run(shingles_list) #create document and run LSH for Jaccard Distance doc_obj = JaccardDocument(original_document, shingles_list, min_hash_signatures) logging.info('Running Jaccard LSH Current Tweet: %s', original_document) results = self.lshj.run(doc_obj) if results: logging.info('.....RESULTS.....') logging.info('.....score: %s', str(results['score'])) logging.info('.....match_found: %s', str(results['match_found'])) logging.info(results['document_1']) logging.info(results['document_2']) logging.info('---------------------------------------------------') logging.info('Results: %s', str(results['score'])) #TODO update the code the read this and prints out score, docs and match boolean flag #self.tweets.append(str(results['score'])) except NotFound as nf: logging.error('TwitterReadNode.GetNext completed, %s', nf.value) break self.close(save=True)
class TwitterReadNode(TwitterGetTweets, PipeNode): def open(self): if not ('tw_auth' in self.session): logging.error("Not logged in into twitter, tw_auth key not found in session dict") raise NotLoggedIn("Not logged in into twitter...auth not in session dict") auth = self.session['tw_auth'] api = tweepy.API(auth) if not api: logging.error("Not logged in into twitter, no tweepy api") raise NotLoggedIn("Not logged in into twitter...no tweepy api") # Read tweets from the stream self.twitter_listener = super(TwitterReadNode, self).get() logging.info('TwitterReadNode.Open completed') def get_next(self): while not self.twitter_listener.queue.empty(): yield self.twitter_listener.queue.dequeue() raise NotFound('Tweets exhausted') def close(self, save=False): tweets = '<br/>\n— '.join(self.tweets) if save: all_the_tweets = TwitterStreamDump(content=tweets) all_the_tweets.put() logging.info('TwitterReadNode.Close completed') num_tweets_status = "Number of tweets received - %s" % str(self.twitter_listener.tweet_counter) banner = 'Done getting tweets at %s. %s' % (time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime()), num_tweets_status) self.session['tw_status'] = banner self.session['tweets'] = tweets self.redirect('/') def post(self): try: self.open() self.tweets = [] self.lshj = LshJaccard(num_bands=20, rows_per_band=10) except: self.session['tw_auth'] = None self.redirect('/') return while True: try: for shingles_list, original_document in shingle_generator(self.get_next()): # get minhash signatures for each shingle list min_hash_signatures = minhash.run(shingles_list) #create document and run LSH for Jaccard Distance doc_obj = JaccardDocument(original_document, shingles_list, min_hash_signatures) logging.info('Running Jaccard LSH Current Tweet: %s', original_document) results = self.lshj.run(doc_obj) if results: logging.info('.....RESULTS.....') logging.info('.....score: %s', str(results['score'])) logging.info('.....match_found: %s', str(results['match_found'])) logging.info(results['document_1']) logging.info(results['document_2']) logging.info('---------------------------------------------------') logging.info('Results: %s', str(results['score'])) #TODO update the code the read this and prints out score, docs and match boolean flag #self.tweets.append(str(results['score'])) except NotFound as nf: logging.error('TwitterReadNode.GetNext completed, %s', nf.value) break self.close(save=True)