Ejemplo n.º 1
0
    def get_tweets_by_user_id_time_restricted(self,
                                              user_id: str) -> List[Tweet]:
        """
        Return a list of tweet with user_id that matches the given user_id
        since a certain time

        @param user_id the id of the user to retrieve tweets from

        @return a list of tweets by the given user
        """
        from_date = datetime.today() + relativedelta(months=-12)
        # from_date = datetime(2020, 6, 30)
        tweet_doc_list = self.collection.find({
            "$and": [{
                "user_id": bson.int64.Int64(user_id)
            }, {
                "created_at": {
                    "$gte": from_date
                }
            }]
        })
        tweets = []
        for doc in tweet_doc_list:
            tweets.append(Tweet.fromDict(doc))
        return tweets
Ejemplo n.º 2
0
    def get_retweets_of_user_by_user_id(self, user_id: str) -> List[Tweet]:
        retweet_doc_list = self.collection.find(
            {"retweet_user_id": bson.int64.Int64(user_id)})

        retweets = []
        for doc in retweet_doc_list:
            retweets.append(Tweet.fromDict(doc))

        return retweets
Ejemplo n.º 3
0
    def get_tweet_by_id(self, id: str) -> Tweet:
        """
        Return tweet with id that matches the given id

        @param id the id of the tweet to get

        @return the Tweet object corresponding to the tweet id, or none if no
            tweet matches the given id
        """
        tweet_doc = self.collection.find_one({"id": bson.int64.Int64(id)})
        if tweet_doc is not None:
            return Tweet.fromDict(tweet_doc)
        else:
            return None
Ejemplo n.º 4
0
    def get_tweets_by_user_id(self, user_id, num_tweets=0):
        tweets = []
        try:
            cursor = Cursor(self.twitter_api.user_timeline,
                            user_id=user_id,
                            count=200,
                            since_id='1277627227954458624',
                            exclude_replies=True).items()
            for data in cursor:
                tweets.append(Tweet.fromTweepyJSON(data._json))
        except TweepError as e:
            log.error(e)

        return tweets
Ejemplo n.º 5
0
    def get_tweets_by_user_id(self, user_id: str) -> List[Tweet]:
        """
        Return a list of tweet with user_id that matches the given user_id

        @param user_id the id of the user to retrieve tweets from

        @return a list of tweets by the given user
        """

        tweet_doc_list = self.collection.find(
            {"user_id": bson.int64.Int64(user_id)})

        tweets = []
        for doc in tweet_doc_list:
            tweets.append(Tweet.fromDict(doc))

        return tweets
Ejemplo n.º 6
0
    def get_retweets_of_user_by_user_id_time_restricted(
            self, user_id: str) -> List[Tweet]:

        from_date = datetime(2020, 6, 30)
        retweet_doc_list = self.collection.find({
            "$and": [{
                "retweet_user_id": bson.int64.Int64(user_id)
            }, {
                "created_at": {
                    "$gte": from_date
                }
            }]
        })
        retweets = []
        for doc in retweet_doc_list:
            retweets.append(Tweet.fromDict(doc))

        return retweets
Ejemplo n.º 7
0
    def _process_tweet_text(self, tweet: Tweet): # -> ProcessedTweet:
        """
        Processes a given tweet

        @param tweet the raw, unprocessed tweet
        @return the processed tweet
        """
        text = tweet.get_text()
        text = text.lower()

        # Filter links, numbers, and emojis
        text = re.sub(r"\bhttps:\S*\b", "", text)
        text = re.sub(r"\b\d*\b", "", text)
        text = re.sub(r"[^\w\s@#]", "", text)

        processed_text_list = text.split()
        # Hashtags, usernames
        for i in range(0, len(processed_text_list)):
            word = processed_text_list[i]
            if '#' in word or '@' in word:
                processed_text_list[i] = ''

        processed_text_list = list(filter(lambda x: x != '', processed_text_list))

        # Run stemming: it's important to run this first before stop words for cases such as that's
        sno = nltk.stem.SnowballStemmer('english')
        processed_text_list = [sno.stem(word) for word in processed_text_list]

        # Remove stop words
        stopwords = set(nltk.corpus.stopwords.words('english'))
        stopwords.add('amp')
        for word in stopwords:
            if word in processed_text_list:
                # extract
                while (processed_text_list.count(word)):
                    processed_text_list.remove(word)

        return processed_text_list
Ejemplo n.º 8
0
 def on_status(self, data):
     tweet = Tweet.fromTweepyJSON(data._json)
     self.raw_tweet_setter.store_tweet(tweet)