Ejemplo n.º 1
0
def test_count_words_in_tweets():
    tweets = [
        Tweet(full_text='word'),
        Tweet(full_text='word\nword'),
        Tweet(full_text=' wor''d. , ! * ( ) = + ` ~ " '' word word'),
    ]

    tweets_processing_service = TweetsProcessingService(tweets)
    tweets_processing_service.count_tweet_words()

    assert 6 == sum([tweet.count for tweet in tweets])
Ejemplo n.º 2
0
def test_detail_builder_init():
    tweet = Tweet()

    detail_builder = DetailBuilder(tweet)

    assert tweet == detail_builder.tweet
    assert 'blog' == detail_builder.default_detail_type
def test_tweet():
    screen_name = 'screen_name'
    created_at = datetime.now()
    full_text = 'full_text'
    _type = 'type'
    hashtags = 'hashtags'
    urls = None
    count = 0
    classification = 'classification'

    expected_raw_data = b'raw_data'
    expected_str = '<Tweet(id=1, ' \
                   f'screen_name={screen_name}, ' \
                   f'created_at={created_at}, ' \
                   f'full_text={full_text}, ' \
                   f'type={_type}, ' \
                   f'hashtags={hashtags}, ' \
                   f'urls={urls}, ' \
                   f'count={count}, ' \
                   f'classification={classification})>'

    tweet = Tweet(screen_name=screen_name,
                  id=1,
                  created_at=created_at,
                  full_text=full_text,
                  hashtags=hashtags,
                  type=_type,
                  count=count,
                  classification=classification)

    assert [] == tweet.details
    assert 0 == tweet.count
    assert None == tweet.is_fully_classified
    assert expected_str == str(tweet)
def test_classify(hashtags, full_text, expected_classification):
    tweet = Tweet(hashtags=hashtags, full_text=full_text)
    tweet_classifier = TweetClassifier(
        tweet, classification_model=test_classification_model)
    tweet_classifier.classify()

    assert expected_classification == tweet.classification
def test_weights(tag_count, not_tag_count, full_text, hashtags, weight_text,
                 weight_hashtag):
    expected_classification = {}

    if tag_count:
        expected_classification['tag'] = tag_count

    if not_tag_count:
        expected_classification['not_tag'] = not_tag_count

    tweet = Tweet(full_text=full_text, hashtags=hashtags)

    tweet_classifier = TweetClassifier(
        tweet,
        classification_model=test_classification_model,
        weight_text=weight_text,
        weight_hashtag=weight_hashtag)

    hashtag_classification = tweet_classifier._classify_hashtags()
    full_text_classification = tweet_classifier._classify_full_text()

    classification = hashtag_classification
    classification += full_text_classification

    assert expected_classification == classification
def test_get_with_retweeted_status(mock_call, mock_extract_hashtags,
                                   mock_tweet_cacher,
                                   mock_byte_serialize_object, tweets):
    tweets._cached_tweets = []
    full_text = 'full_text'

    tweet_response = MagicMock(id=1,
                               retweeted_status=MagicMock(full_text=full_text))
    mock_call.return_value = [tweet_response]

    expected_tweet_model = Tweet(
        id=tweet_response.id,
        created_at=tweet_response.created_at,
        full_text=full_text,
        type='favorite',
        hashtags=mock_extract_hashtags.return_value,
        tweet_raw_data=mock_byte_serialize_object.return_value)

    tweets_list = tweets.get()

    assert [expected_tweet_model] == tweets_list
    assert mock_call.called

    mock_tweet_cacher.assert_called_with(tweets.screen_name,
                                         expected_tweet_model)
    assert mock_tweet_cacher.return_value.cache.called
Ejemplo n.º 7
0
def test_build_sub_classification_model():
    classification = 'classification'

    expected_classification_model = {
        classification: {'full', 'text', 'words', 'swords'}
    }

    tweets = [
        Tweet(hashtags='hashtag', full_text='full text words', classification=classification),
        Tweet(hashtags='hashtag', full_text='full swords', classification=classification)]

    classification_model = {classification: set()}
    tweets_processing_service = TweetsProcessingService(tweets, classification_model=classification_model)
    sub_classification_model = tweets_processing_service._build_sub_classification_model()

    assert classification_model != sub_classification_model
    assert expected_classification_model == sub_classification_model
Ejemplo n.º 8
0
def test_classify_tweets(expected_classification_values, hashtags, full_texts):
    tweets = [Tweet(hashtags=hashtag, full_text=full_text)
              for hashtag, full_text in zip(hashtags, full_texts)]
    tweets_processing_service = TweetsProcessingService(tweets, classification_model=test_classification_model)

    tweets_processing_service.classify_tweets()

    for count, expected_classification_value in enumerate(expected_classification_values):
        assert expected_classification_value == tweets_processing_service.tweets[count].classification
Ejemplo n.º 9
0
def test_sub_classify_unclassified_tweets():
    classification = 'classification'
    classification_model = {
        classification: {'text'}
    }

    expected_tweet_classifications = [classification, classification, classification, '']

    tweets = [
        Tweet(full_text='full text words', classification=classification),
        Tweet(full_text='text', classification=None),
        Tweet(full_text='words', classification=None),
        Tweet(full_text='spoon', classification=None)
    ]

    tweets_processing_service = TweetsProcessingService(tweets, classification_model=classification_model)
    tweets_processing_service.sub_classify_unclassified_tweets()

    assert expected_tweet_classifications == [tweet.classification for tweet in tweets_processing_service.tweets]
def test_tweet_classifier_init():
    tweet = Tweet()
    tweet_classifier = TweetClassifier(tweet)

    assert tweet == tweet_classifier.tweet
    assert global_classification_model == tweet_classifier.classification_model

    tweet_classifier = TweetClassifier(
        tweet, classification_model=test_classification_model)

    assert tweet == tweet_classifier.tweet
    assert test_classification_model == tweet_classifier.classification_model
def test_raw_data_init():
    # class RawData(Base):
    #     __tablename__ = 'raw_data'
    #
    #     id = Column(Integer, primary_key=True)
    #
    #     tweet_id = Column(Integer, ForeignKey('tweet.id'))
    #     tweet = relationship('Tweet', back_populates='raw_data')
    #
    #     raw_data = Column(Binary, default=None)
    tweet_raw_data = TweetRawData(id=1,
                                  tweet_id=2,
                                  tweet=Tweet(id=2),
                                  raw_data=b'raw_data')
Ejemplo n.º 12
0
def test_add_words_to_classification_model():
    classification = 'classification'
    full_text = 'full text"Words'

    expected_classification_model = {
        classification: {'full', 'text', 'words'}
    }

    classification_model = {classification: set()}
    tweet = Tweet(full_text=full_text, classification=classification)

    add_words_to_classification_model(classification_model, tweet)

    assert expected_classification_model == classification_model
def test_classify_words(tag_count, not_tag_count, words, delimiter):
    expected_classification = {}

    if tag_count:
        expected_classification['tag'] = tag_count

    if not_tag_count:
        expected_classification['not_tag'] = not_tag_count

    tweet_classifier = TweetClassifier(
        Tweet(), classification_model=test_classification_model)
    classification = tweet_classifier._classify_words(words,
                                                      weight=1,
                                                      delimiter=delimiter)
    assert expected_classification == classification
Ejemplo n.º 14
0
    def _get_tweet(self, call_response):
        full_text = self.extract_full_text(call_response)
        urls = self.extract_urls(call_response)

        tweet_model = Tweet(
            screen_name=self.screen_name,
            id=call_response.id,
            created_at=call_response.created_at,
            full_text=full_text,
            hashtags=self.extract_hashtags(call_response),
            urls='|'.join(urls),
            type=self.tweet_type,
        )

        pickled_response = serialize(call_response)
        raw_data = TweetRawData(tweet=tweet_model, raw_data=pickled_response)

        tweet_model.tweet_raw_data.append(raw_data)
        return tweet_model
Ejemplo n.º 15
0
def test_build_details():
    expected_title = 'full_text'
    expected_type = 'blog'

    tweet = Tweet(id=1,
                  full_text=f'{expected_title}\ntest',
                  urls='urls',
                  classification='classification')
    detail_builder = DetailBuilder(tweet)

    detail = detail_builder.build()

    assert expected_title == detail.title
    assert tweet.id == detail.tweet_id
    assert tweet.urls == detail.url
    assert expected_type == detail.type
    assert not detail.is_fully_classified
    assert tweet.classification == detail.classification
    assert detail_builder.default_detail_size == detail.count
def test_get(mock_call, mock_tweet_cacher, mock_byte_serialize_object, tweets):
    tweets._cached_tweets = []
    id = 1
    created_at = datetime.now()
    full_text = 'full_text'
    entities = {
        'hashtags': [{
            'text': 'some_text'
        }, {
            'text': 'other_text'
        }],
        'urls': []
    }

    tweet_response = MagicMock(id=id,
                               created_at=created_at,
                               full_text=full_text,
                               entities=entities)
    delattr(tweet_response, 'retweeted_status')
    mock_call.return_value = [tweet_response]

    expected_tweet_model = Tweet(
        id=id,
        created_at=created_at,
        full_text=full_text,
        hashtags='some_text|other_text',
        type='favorite',
        tweet_raw_data=mock_byte_serialize_object.return_value)

    tweets_list = tweets.get()

    assert [expected_tweet_model] == tweets_list
    assert mock_call.called

    mock_tweet_cacher.assert_called_with(tweets.screen_name,
                                         expected_tweet_model)
    assert mock_tweet_cacher.return_value.cache.called
Ejemplo n.º 17
0
def _tweet_cacher_tuple(mock_init_cache_dir):
    tweet = Tweet(id=1)
    tweet_cacher = TweetCacher(expected_screen_name, tweet)
    assert mock_init_cache_dir.called

    return tweet_cacher, tweet
Ejemplo n.º 18
0
def _tweet():
    return Tweet(id=1)
def test_init():
    tweet = Tweet()
    video_extractor = VideoExtractor(tweet)

    assert tweet == video_extractor.tweet
Ejemplo n.º 20
0
def test_title(expected_title, full_text):
    tweet = Tweet(full_text=full_text)
    detail_builder = DetailBuilder(tweet)

    assert expected_title == detail_builder.title