Ejemplo n.º 1
0
    def process_twitter_data(
            status: Status,
            twitter_credentials: dict,
            add_sentiment: bool = True,
            add_bot_analysis: bool = True) -> TwitterDataOutput:
        output: TwitterDataOutput = TwitterDataOutput(status={}, user={})
        try:

            # 1. Retrieve status document
            status_doc: StatusDoc = TwitterDataProcessor.process_status(
                status=status)
            response_sent: SentimentAnalysisOutput = SentimentAnalysisOutput()

            # 2. Add additional parameters related to sentiment analysis
            if add_sentiment:
                if len(status_doc.text) >= TwitterDataProcessor.min_char:
                    response_sent: SentimentAnalysisOutput = TwitterDataProcessor.process_sentiment_analysis(
                        doc=status_doc.text)
            status_doc.sentiment_analysis: dict = response_sent.__dict__

            # 3. Get the user
            user: User = status.__getattribute__("user")
            user_doc: UserAccountDoc = TwitterDataProcessor.process_user(
                user=user)

            # 4. Add additional params
            if add_bot_analysis:
                response_botometer_analysis: BotometerAnalysisOutput = TwitterDataProcessor.process_botometer_analysis(
                    user_id=user_doc.id,
                    twitter_credentials=twitter_credentials)
                user_doc.botometer_analysis: dict = response_botometer_analysis.__dict__

            # 5. Get the output
            output: TwitterDataOutput = TwitterDataOutput(
                status=status_doc.__dict__, user=user_doc.__dict__)
        except Exception as e:
            logger.error(e)
        return output
Ejemplo n.º 2
0
    def on_status(self, status: Status):

        logger.info(f"1. Loading Status with ID {status.__getattribute__('id')}")

        # 1. Check whether the status is already in the storage
        non_exists: bool = self.check_data_in_storage(
            entity_id=status.__getattribute__('id'),
            storage=self.storage,
            collection_name=self.collection_names.get("status"),
            identifier_key="id")

        if non_exists:
            # 3. Process Tweets an Users
            logger.info(f"2. Pre-processing Status with ID {status.__getattribute__('id')}")
            data: TwitterDataOutput = self.process_status(
                status=status, add_sentiment=self.add_sentiment,
                add_bot_analysis=self.add_bot_analysis)

            logger.info(f"3. Storing Status with ID {status.__getattribute__('id')} in {self.storage.title()}")
            # 2. Storage data
            self.storage_data(data=data, collection_names=self.collection_names,
                              storage=self.storage, mongodb_connector=self.mongodb_connector,
                              elasticsearch_connector=self.elasticsearch_connector,
                              identifier_key=self.identifier_key)