예제 #1
0
	def test_tokenize_reviews(self):
		"""Tests tokenize_reviews_mapper to make sure it is properly running"""
		review = {CATEGORY: 1}

		job = CategoryPredictor()
		token_results = list(job.tokenize_reviews_mapper('all', review))
		result = [('all', {CATEGORY: 1})]
		self.assertEqual(token_results, result)
예제 #2
0
	def test_categories_to_reviews(self):
		"""Tests add_categories_to_reviews to make sure it is properly running"""
		category = [('categories', [CATEGORY]), ('review', TEXT)]

		job = CategoryPredictor()
		category_results = list(job.add_categories_to_reviews_reducer(BIZ_ID, category))
		result = [('all', {CATEGORY: 1}), (CATEGORY, TEXT)]
		self.assertEqual(category_results,result)
예제 #3
0
	def test_review_category(self):
		"""Tests the category_mapper to make sure it is properly running"""
		business = BUSINESS_TEMPLATE % (CATEGORY, BIZ_ID)
		review = REVIEW_TEMPLATE % (TEXT, BIZ_ID)
		job = CategoryPredictor()
		review_results = list(job.review_category_mapper(None, json.loads(review)))
		biz_results = list(job.review_category_mapper(None, json.loads(business)))
		self.assertEqual(review_results, [(BIZ_ID, ('review', TEXT))])
		self.assertEqual(biz_results, [(BIZ_ID, ('categories', [CATEGORY]))])
예제 #4
0
	def test_smoke(self):
		"""Does a complete run with mock data"""
		business = BUSINESS_TEMPLATE % (CATEGORY, BIZ_ID)
		review = REVIEW_TEMPLATE % (LONG_TEXT, BIZ_ID)
		total_input = business + review
		static_stdin = StringIO(total_input)

		job = CategoryPredictor(['-r', 'inline', '--no-conf', '-'])
		job.sandbox(stdin=static_stdin)

		results = []
		with job.make_runner() as runner:
			runner.run()
			for line in runner.stream_output():
				key, value = job.parse_output_line(line)
				results.append(value)

		# Results should be the probability of that category being chosen.
		result = {CATEGORY: 1}
		self.assertEqual(results[0], result)