def test_count_words_in_tweets(): tweets = [ Tweet(full_text='word'), Tweet(full_text='word\nword'), Tweet(full_text=' wor''d. , ! * ( ) = + ` ~ " '' word word'), ] tweets_processing_service = TweetsProcessingService(tweets) tweets_processing_service.count_tweet_words() assert 6 == sum([tweet.count for tweet in tweets])
def test_detail_builder_init(): tweet = Tweet() detail_builder = DetailBuilder(tweet) assert tweet == detail_builder.tweet assert 'blog' == detail_builder.default_detail_type
def test_tweet(): screen_name = 'screen_name' created_at = datetime.now() full_text = 'full_text' _type = 'type' hashtags = 'hashtags' urls = None count = 0 classification = 'classification' expected_raw_data = b'raw_data' expected_str = '<Tweet(id=1, ' \ f'screen_name={screen_name}, ' \ f'created_at={created_at}, ' \ f'full_text={full_text}, ' \ f'type={_type}, ' \ f'hashtags={hashtags}, ' \ f'urls={urls}, ' \ f'count={count}, ' \ f'classification={classification})>' tweet = Tweet(screen_name=screen_name, id=1, created_at=created_at, full_text=full_text, hashtags=hashtags, type=_type, count=count, classification=classification) assert [] == tweet.details assert 0 == tweet.count assert None == tweet.is_fully_classified assert expected_str == str(tweet)
def test_classify(hashtags, full_text, expected_classification): tweet = Tweet(hashtags=hashtags, full_text=full_text) tweet_classifier = TweetClassifier( tweet, classification_model=test_classification_model) tweet_classifier.classify() assert expected_classification == tweet.classification
def test_weights(tag_count, not_tag_count, full_text, hashtags, weight_text, weight_hashtag): expected_classification = {} if tag_count: expected_classification['tag'] = tag_count if not_tag_count: expected_classification['not_tag'] = not_tag_count tweet = Tweet(full_text=full_text, hashtags=hashtags) tweet_classifier = TweetClassifier( tweet, classification_model=test_classification_model, weight_text=weight_text, weight_hashtag=weight_hashtag) hashtag_classification = tweet_classifier._classify_hashtags() full_text_classification = tweet_classifier._classify_full_text() classification = hashtag_classification classification += full_text_classification assert expected_classification == classification
def test_get_with_retweeted_status(mock_call, mock_extract_hashtags, mock_tweet_cacher, mock_byte_serialize_object, tweets): tweets._cached_tweets = [] full_text = 'full_text' tweet_response = MagicMock(id=1, retweeted_status=MagicMock(full_text=full_text)) mock_call.return_value = [tweet_response] expected_tweet_model = Tweet( id=tweet_response.id, created_at=tweet_response.created_at, full_text=full_text, type='favorite', hashtags=mock_extract_hashtags.return_value, tweet_raw_data=mock_byte_serialize_object.return_value) tweets_list = tweets.get() assert [expected_tweet_model] == tweets_list assert mock_call.called mock_tweet_cacher.assert_called_with(tweets.screen_name, expected_tweet_model) assert mock_tweet_cacher.return_value.cache.called
def test_build_sub_classification_model(): classification = 'classification' expected_classification_model = { classification: {'full', 'text', 'words', 'swords'} } tweets = [ Tweet(hashtags='hashtag', full_text='full text words', classification=classification), Tweet(hashtags='hashtag', full_text='full swords', classification=classification)] classification_model = {classification: set()} tweets_processing_service = TweetsProcessingService(tweets, classification_model=classification_model) sub_classification_model = tweets_processing_service._build_sub_classification_model() assert classification_model != sub_classification_model assert expected_classification_model == sub_classification_model
def test_classify_tweets(expected_classification_values, hashtags, full_texts): tweets = [Tweet(hashtags=hashtag, full_text=full_text) for hashtag, full_text in zip(hashtags, full_texts)] tweets_processing_service = TweetsProcessingService(tweets, classification_model=test_classification_model) tweets_processing_service.classify_tweets() for count, expected_classification_value in enumerate(expected_classification_values): assert expected_classification_value == tweets_processing_service.tweets[count].classification
def test_sub_classify_unclassified_tweets(): classification = 'classification' classification_model = { classification: {'text'} } expected_tweet_classifications = [classification, classification, classification, ''] tweets = [ Tweet(full_text='full text words', classification=classification), Tweet(full_text='text', classification=None), Tweet(full_text='words', classification=None), Tweet(full_text='spoon', classification=None) ] tweets_processing_service = TweetsProcessingService(tweets, classification_model=classification_model) tweets_processing_service.sub_classify_unclassified_tweets() assert expected_tweet_classifications == [tweet.classification for tweet in tweets_processing_service.tweets]
def test_tweet_classifier_init(): tweet = Tweet() tweet_classifier = TweetClassifier(tweet) assert tweet == tweet_classifier.tweet assert global_classification_model == tweet_classifier.classification_model tweet_classifier = TweetClassifier( tweet, classification_model=test_classification_model) assert tweet == tweet_classifier.tweet assert test_classification_model == tweet_classifier.classification_model
def test_raw_data_init(): # class RawData(Base): # __tablename__ = 'raw_data' # # id = Column(Integer, primary_key=True) # # tweet_id = Column(Integer, ForeignKey('tweet.id')) # tweet = relationship('Tweet', back_populates='raw_data') # # raw_data = Column(Binary, default=None) tweet_raw_data = TweetRawData(id=1, tweet_id=2, tweet=Tweet(id=2), raw_data=b'raw_data')
def test_add_words_to_classification_model(): classification = 'classification' full_text = 'full text"Words' expected_classification_model = { classification: {'full', 'text', 'words'} } classification_model = {classification: set()} tweet = Tweet(full_text=full_text, classification=classification) add_words_to_classification_model(classification_model, tweet) assert expected_classification_model == classification_model
def test_classify_words(tag_count, not_tag_count, words, delimiter): expected_classification = {} if tag_count: expected_classification['tag'] = tag_count if not_tag_count: expected_classification['not_tag'] = not_tag_count tweet_classifier = TweetClassifier( Tweet(), classification_model=test_classification_model) classification = tweet_classifier._classify_words(words, weight=1, delimiter=delimiter) assert expected_classification == classification
def _get_tweet(self, call_response): full_text = self.extract_full_text(call_response) urls = self.extract_urls(call_response) tweet_model = Tweet( screen_name=self.screen_name, id=call_response.id, created_at=call_response.created_at, full_text=full_text, hashtags=self.extract_hashtags(call_response), urls='|'.join(urls), type=self.tweet_type, ) pickled_response = serialize(call_response) raw_data = TweetRawData(tweet=tweet_model, raw_data=pickled_response) tweet_model.tweet_raw_data.append(raw_data) return tweet_model
def test_build_details(): expected_title = 'full_text' expected_type = 'blog' tweet = Tweet(id=1, full_text=f'{expected_title}\ntest', urls='urls', classification='classification') detail_builder = DetailBuilder(tweet) detail = detail_builder.build() assert expected_title == detail.title assert tweet.id == detail.tweet_id assert tweet.urls == detail.url assert expected_type == detail.type assert not detail.is_fully_classified assert tweet.classification == detail.classification assert detail_builder.default_detail_size == detail.count
def test_get(mock_call, mock_tweet_cacher, mock_byte_serialize_object, tweets): tweets._cached_tweets = [] id = 1 created_at = datetime.now() full_text = 'full_text' entities = { 'hashtags': [{ 'text': 'some_text' }, { 'text': 'other_text' }], 'urls': [] } tweet_response = MagicMock(id=id, created_at=created_at, full_text=full_text, entities=entities) delattr(tweet_response, 'retweeted_status') mock_call.return_value = [tweet_response] expected_tweet_model = Tweet( id=id, created_at=created_at, full_text=full_text, hashtags='some_text|other_text', type='favorite', tweet_raw_data=mock_byte_serialize_object.return_value) tweets_list = tweets.get() assert [expected_tweet_model] == tweets_list assert mock_call.called mock_tweet_cacher.assert_called_with(tweets.screen_name, expected_tweet_model) assert mock_tweet_cacher.return_value.cache.called
def _tweet_cacher_tuple(mock_init_cache_dir): tweet = Tweet(id=1) tweet_cacher = TweetCacher(expected_screen_name, tweet) assert mock_init_cache_dir.called return tweet_cacher, tweet
def _tweet(): return Tweet(id=1)
def test_init(): tweet = Tweet() video_extractor = VideoExtractor(tweet) assert tweet == video_extractor.tweet
def test_title(expected_title, full_text): tweet = Tweet(full_text=full_text) detail_builder = DetailBuilder(tweet) assert expected_title == detail_builder.title