def test_create_frequency_term_matrix(self):
		"""
		Once have vocab indexed create frequency_term matrix 
		"""
		preprocessor = TextPreprocessor(self.articles)
		split_data = preprocessor.split_data()
		preprocessor.count_vect.fit_transform(split_data['train'])
		frequency_term_matrix = preprocessor.frequency_term_matrix(split_data['train']) #preprocessor.count_vect.transform(split_data['train'])
		
		self.assertTrue(hasattr(frequency_term_matrix, 'transpose'))
	def test_tfidf_weighting(self):
		preprocessor = TextPreprocessor(self.articles)
		split_data = preprocessor.split_data()
		term_freq_matrix = preprocessor.frequency_term_matrix(split_data['train'])

		#calculate the idf for term frequency matrix with fit()
		preprocessor.tf_transformer.fit(term_freq_matrix)
		# once calculated transform the term_freq_matrix
		# to the tf-idf weight matrix
		tf_idf_matrix = preprocessor.tf_transformer.transform(term_freq_matrix)
		
		self.assertTrue(hasattr(tf_idf_matrix.todense(), 'shape'))
	def test_term_frequency_features(self):
		"""
		tf-idf helper test
		The last step before classification
		"""
		#tfidf_transformer = TfidfTransformer()
		preprocessor = TextPreprocessor(self.articles)
		split_data = preprocessor.split_data()
		
		term_freq_matrix = preprocessor.frequency_term_matrix(split_data['train'])
		
		tfidf = preprocessor.tf_transformer.fit(term_freq_matrix)
		self.assertEqual(tfidf.norm, 'l2')