Ejemplo n.º 1
0
	def test_mongo_collection_custom_filter_filters(self):
		collectionone = MongoCollection(
			config['mongo']['host'],
			config['mongo']['port'],
			config['mongo']['user'],
			config['mongo']['password'],
			config['mongo']['database'],
			config['mongo']['collection']
		)
		full_collection_len = len(list(collectionone.set_limit(10).get_iterator()))
		def is_tweet_a_retweet(tweet):
			if 'retweeted' in tweet and tweet['retweeted']:
				return True
			else:
				return False
		num_retweets = len(list(collectionone.set_limit(10).set_custom_filter(is_tweet_a_retweet).get_iterator()))
		
		collectiontwo = MongoCollection(
			config['mongo']['host'],
			config['mongo']['port'],
			config['mongo']['user'],
			config['mongo']['password'],
			config['mongo']['database'],
			config['mongo']['collection']
		)
		def is_not_a_retweet(tweet):
			if 'retweeted' in tweet and tweet['retweeted']:
				return False
			else:
				return True
		num_non_retweets = len(list(collectiontwo.set_limit(10).set_custom_filter(is_not_a_retweet).get_iterator()))

		#the number of retweets and non retweets should add up to the whole collection
		self.assertEqual(num_retweets + num_non_retweets, full_collection_len)
Ejemplo n.º 2
0
 def test_pass_in_mongo(self):
     mongo_to_pass = pymongo.MongoClient(config['mongo']['host'],
                                         int(config['mongo']['port']))
     collection = MongoCollection(config['mongo']['user'],
                                  config['mongo']['password'],
                                  config['mongo']['database'],
                                  config['mongo']['collection'],
                                  passed_mongo=mongo_to_pass)
     self.assertTrue(len(list(collection.set_limit(10).get_iterator())) > 0)
Ejemplo n.º 3
0
 def test_iterator_returns_tweets(self):
     collection = MongoCollection(     \
      config['mongo']['host'],      \
      config['mongo']['port'],      \
      config['mongo']['user'],      \
      config['mongo']['password'],  \
      config['mongo']['database'],  \
      config['mongo']['collection'] \
     )
     self.assertTrue(len(list(collection.set_limit(10).get_iterator())) > 0)
Ejemplo n.º 4
0
	def test_iterator_returns_tweets(self):
		collection = MongoCollection(     \
			config['mongo']['host'],      \
			config['mongo']['port'],      \
			config['mongo']['user'],      \
			config['mongo']['password'],  \
			config['mongo']['database'],  \
			config['mongo']['collection'] \
		)
		self.assertTrue(len(list(collection.set_limit(10).get_iterator())) > 0)
Ejemplo n.º 5
0
	def test_pass_in_mongo(self):
		mongo_to_pass = pymongo.MongoClient(config['mongo']['host'], int(config['mongo']['port']))
		collection = MongoCollection(
			config['mongo']['user'],
			config['mongo']['password'],
			config['mongo']['database'],
			config['mongo']['collection'],
			passed_mongo=mongo_to_pass
		)
		self.assertTrue(len(list(collection.set_limit(10).get_iterator())) > 0)
Ejemplo n.º 6
0
    def test_mongo_collection_custom_filter_filters(self):
        collectionone = MongoCollection(config['mongo']['host'],
                                        config['mongo']['port'],
                                        config['mongo']['user'],
                                        config['mongo']['password'],
                                        config['mongo']['database'],
                                        config['mongo']['collection'])
        full_collection_len = len(
            list(collectionone.set_limit(10).get_iterator()))

        def is_tweet_a_retweet(tweet):
            if 'retweeted' in tweet and tweet['retweeted']:
                return True
            else:
                return False

        num_retweets = len(
            list(
                collectionone.set_limit(10).set_custom_filter(
                    is_tweet_a_retweet).get_iterator()))

        collectiontwo = MongoCollection(config['mongo']['host'],
                                        config['mongo']['port'],
                                        config['mongo']['user'],
                                        config['mongo']['password'],
                                        config['mongo']['database'],
                                        config['mongo']['collection'])

        def is_not_a_retweet(tweet):
            if 'retweeted' in tweet and tweet['retweeted']:
                return False
            else:
                return True

        num_non_retweets = len(
            list(
                collectiontwo.set_limit(10).set_custom_filter(
                    is_not_a_retweet).get_iterator()))

        #the number of retweets and non retweets should add up to the whole collection
        self.assertEqual(num_retweets + num_non_retweets, full_collection_len)
Ejemplo n.º 7
0
	def test_strip_tweets_keeps_fields(self):
		tweet_parser = TweetParser()
		collection = MongoCollection(
			config['mongo']['host'],
			config['mongo']['port'],
			config['mongo']['user'],
			config['mongo']['password'],
			config['mongo']['database'],
			config['mongo']['collection']
		)
		self.maxDiff = None
		it = collection.set_limit(10).strip_tweets(['id', 'entities.user_mentions', 'user.profile_image_url_https']).get_iterator()
		def tweets_have_right_keys(iterator, fields):
			for tweet in iterator:
				keys = [key for key,value in tweet_parser.flatten_dict(tweet)]
				for elem in fields:
					if elem not in keys:
						return False
			return True		
		self.assertTrue(tweets_have_right_keys(it, [['id'], ['entities', 'user_mentions'], ['user', 'profile_image_url_https']]))
Ejemplo n.º 8
0
    def test_strip_tweets_keeps_fields(self):
        tweet_parser = TweetParser()
        collection = MongoCollection(config['mongo']['host'],
                                     config['mongo']['port'],
                                     config['mongo']['user'],
                                     config['mongo']['password'],
                                     config['mongo']['database'],
                                     config['mongo']['collection'])
        self.maxDiff = None
        it = collection.set_limit(10).strip_tweets(
            ['id', 'entities.user_mentions',
             'user.profile_image_url_https']).get_iterator()

        def tweets_have_right_keys(iterator, fields):
            for tweet in iterator:
                keys = [key for key, value in tweet_parser.flatten_dict(tweet)]
                for elem in fields:
                    if elem not in keys:
                        return False
            return True

        self.assertTrue(
            tweets_have_right_keys(it, [['id'], ['entities', 'user_mentions'],
                                        ['user', 'profile_image_url_https']]))