def get_tweets_sentiment(self, tweets: List, sentiment=None) -> List:
     tweets_sentiment = [self.sentiment_analyze(tweet) for tweet in tweets]
     if not sentiment:
         return tweets_sentiment
     else:
         if sentiment == "neg":
             return [
                 tweets_sentiment[tweet]["code"]
                 for tweet in range(len(tweets_sentiment))
                 if tweets_sentiment[tweet]["code"] < 0
             ]
         elif sentiment == "pos":
             return [
                 tweets_sentiment[tweet]["code"]
                 for tweet in range(len(tweets_sentiment))
                 if tweets_sentiment[tweet]["code"] > 0
             ]
         elif sentiment == "neutros":
             return [
                 tweets_sentiment[tweet]["code"]
                 for tweet in range(len(tweets_sentiment))
                 if tweets_sentiment[tweet]["code"] == 0
             ]
         else:
             logger.error(f"Error invalid sentiment paramater")
 def sentiment_analyze(self, tweet: List) -> Dict:
     try:
         analysis = TextBlob(tweet[1])
         if analysis.sentiment.polarity > 0:
             sentiment = {
                 "code": 1,
                 "sentiment": "positivo",
                 "tweet_id": tweet[0]
             }
         elif analysis.sentiment.polarity == 0:
             sentiment = {
                 "code": 0,
                 "sentiment": "neutro",
                 "tweet_id": tweet[0]
             }
         else:
             sentiment = {
                 "code": -1,
                 "sentiment": "negative",
                 "tweet_id": tweet[0]
             }
     except Exception as error:
         logger.error(f"Error sentiment analyze - {error}")
     else:
         return sentiment
def consumer() -> NoReturn:
    logger.info(f"This is {realtime}...")
    consumer = TwitterConsumer(broker, tweets_topic).consumer
    if consumer.bootstrap_connected():
        logger.info("Connection okay.")
        for messagem in consumer:
            logger.info(messagem.value)
    else:
        logger.error("Failed to connect to service")
Esempio n. 4
0
 def search(self, **query) -> List:
     try:
         search = [
             elemento for elemento in self.twitter.search(**query)
             if elemento
         ]
     except Exception as error:
         logger.error(f"Error twitter action search - {error}")
     else:
         return search
 def stream_tweets(self, languages: List, track=None) -> NoReturn:
     if self.producer.producer.bootstrap_connected():
         while True:
             try:
                 stream = self._stream(languages, track)
             except IncompleteRead:
                 logger.info("I'm Here!")
                 continue
             except KeyboardInterrupt:
                 stream.disconnect()
                 self.producer.producer.close()
                 break
     else:
         logger.error("Failed to connect to service")
Esempio n. 6
0
 def get_cleaned_text(self, text: Text) -> Text:
     try:
         cleaned_text = text.replace("\n", "").replace('\"', '').replace(
             '\'', '').replace('-', ' ')
         retweet_info = self.get_claned_retweet_text(cleaned_text)
         cleaned_text = re.sub(r"@[a-zA-Z0-9_]+", "",
                               (retweet_info + cleaned_text)).strip()
         cleaned_text = re.sub(r"RT\s:\s", "", cleaned_text).lstrip()
         cleaned_text = cleaned_text[::] if cleaned_text[
             0] != " " else cleaned_text[1::]
         cleaned_text = self.give_emoji_free_text(cleaned_text)
         cleaned_text = re.sub(r"http\S+", "", cleaned_text)
         cleaned_text = re.sub(r"https\S+", "", cleaned_text)
         return cleaned_text
     except Exception as error:
         logger.error(f"Error get cleaned tweet text - {error}")
 def on_data(self, data: Text) -> NoReturn:
   try:
     tweet = TweetCleaner().filter_tweet(data)
     logger.info("Send message to kafka producer...")
     self.producer.send_message(tweets_topic, tweet)
     logger.info("Sleeping 3 seconds...")
     sleep(3)
   except BaseException as error:
     logger.error(f"BaseException Twitter on data - {error}")
   except Exception as error:
     logger.error(f"Exception Twitter on data - {error}")
   except ProtocolError as error:
     logger.error(f"ProtocolError Twitter on data - {error}")
   else:
     return True
 def get_twitter_client(self) -> Callable:
     twitter_client = API(self.auth,
                          wait_on_rate_limit=True,
                          wait_on_rate_limit_notify=True)
     try:
         logger.info("Checking twitter credentials")
         twitter_client.verify_credentials()
     except RateLimitError as error:
         logger.error(f"Tweepy RateLimitError - {error}")
     except TweepError as error:
         logger.error(f"Tweepy TweepError - {error}")
     except Exception as error:
         logger.error(f"Error general exception - {error}")
     else:
         logger.info("Successful twitter authentication!")
         return twitter_client
 def get(self):
     logger.info("GET - covid tweets...")
     tweets = tweet_clean.filter_tweets(Twitter().search(**query_tweets))
     mongo_client = mongo.get_connection()
     db = mongo_client["twitter"]
     collection = db["covid"]
     if tweets:
         logger.info("200 - GET - successfully get covid tweets")
         for tweet in tweets:
             try:
                 found = collection.find({
                     "_id": dict(tweet)["_id"]
                 }).limit(1).count()
                 if found > 0:
                     logger.info(
                         "Data alredy exist in MongoDB. Continue...")
                     continue
                 else:
                     insert = collection.insert_one(tweet)
                     if insert.inserted_id:
                         logger.info(
                             "Insert data covid tweet in MongoDB - Successfully insert data!"
                         )
                     else:
                         logger.error(
                             "Insert data covid tweet in MongoDB - Bad insert data..."
                         )
             except DuplicateKeyError as error:
                 logger.error("400 - GET - no covid tweets")
                 logger.error(f"DuplicateKeyError - {error}")
                 return {"message": responses[400], "count": 0}, 400
         return {"message": responses[200], "count": len(tweets)}, 200
     else:
         logger.error("400 - GET - no covid tweets")
         return {"message": responses[400], "count": 0}, 400
     mongo.close_connection(mongo_client)
 def get_connection(self) -> MongoClient:
   try:
     conn = MongoClient(self.uri, serverSelectionTimeoutMS=1)
     conn.server_info()
     try:
       conn.admin.command("ismaster")
     except ConnectionFailure:
       logger.error(f"Could not connect to MongoDB: {error}")
     finally:
       try:
         conn["admin"].command("serverStatus")
       except Exception as error:
         logger.error(f"General Exception Mongo - {error}")
       else:
         logger.info("You are connected!")
       return conn
   except ServerSelectionTimeoutError  as error:
     logger.error(f"Server Timeout Error - {error}")
 def get(self):
     logger.info("GET - sentimental analysis of covid tweets...")
     mongo_client = mongo.get_connection()
     db = mongo_client["twitter"]
     collection_covid = db["covid"]
     collection_sentimental = db["sentimental"]
     if collection_sentimental.count() == 0:
         logger.info("Sentimental collection is empty")
         if "sentimental" in db.list_collection_names():
             logger.info("Sentimental collection exist")
         else:
             logger.info("Sentimental collection not exist")
     else:
         logger.info("Sentimental collection is not empty")
     try:
         tweets = [[
             tweet["_id"], tweet["full_text"]
         ] for tweet in [tweet for tweet in collection_covid.find()]]
         if len(tweets) == 0:
             logger.warning("No covid tweets. Empty information")
     except Exception as error:
         logger.error("400 - GET - {}".format(error))
         return {"message": str(error), "count": 0}, 400
     if tweets:
         sentiments = analyzer.get_tweets_sentiment(tweets, sentiment=None)
         if sentiments:
             logger.info(
                 "200 - GET - successfully get sentimental analysis of covid tweets"
             )
             neg = analyzer.get_tweets_sentiment(tweets, sentiment="neg")
             pos = analyzer.get_tweets_sentiment(tweets, sentiment="pos")
             neutros = analyzer.get_tweets_sentiment(tweets,
                                                     sentiment="neutros")
             for tweet in sentiments:
                 try:
                     found = collection_sentimental.find({
                         "tweet_id":
                         dict(tweet)["tweet_id"]
                     }).limit(1).count()
                     if found > 0:
                         logger.info(
                             "Data alredy exist in MongoDB. Continue...")
                         continue
                     else:
                         insert = collection_sentimental.insert_one(tweet)
                         if insert.inserted_id:
                             logger.info(
                                 "Insert data sentimental analysis in MongoDB - Successfully insert data!"
                             )
                         else:
                             logger.error(
                                 "Insert data sentimental analysis in MongoDB - Bad insert data..."
                             )
                 except pymongo.errors.DuplicateKeyError as error:
                     logger.error(
                         "400 - GET - no data sentimental analysis of covid tweets"
                     )
                     logger.error(f"DuplicateKeyError - {error}")
                     return {"message": responses[400], "count": 0}, 400
             return {
                 "message": responses[200],
                 "count": len(sentiments)
             }, 200
         else:
             logger.error(
                 "400 - GET - no data sentimental analysis of covid tweets")
             return {
                 "message": responses[400],
                 "count": 0,
             }, 400
     else:
         logger.info("400 - GET - no covid tweets")
         return {
             "message": responses[400],
             "data": [],
             "count": 0,
         }, 400
     mongo.close_connection(mongo_client)
 def on_timeout(self) -> bool:
   logger.error(f"Twitter timeout...")
   return True
 def on_error(self, status_code) -> bool:
   if status_code == 420:
     logger.error(f"Returning False on_data method in case rate limit occurs")
     return False
   logger.error(f"Error received in kafka producer - {status_code}")
   return True