def test_normal_run(self): self.docfreq_args.update(self.extractor_args) weight = 2 for key in self.docfreq_args: if "_weight" in key: self.docfreq_args[key] *= weight # make not 1 arguments = dict_to_arguments(self.docfreq_args) with patch.dict(sourced.ml.extractors.__extractors__, self.extractors, clear=True): result = modify_feature_weights(deepcopy(self.batches), arguments) self.assertEqual(len(result), len(self.batches)) for bathc_res, batch_init in zip(result, self.batches): bathc_res.matrix.sort_indices() batch_init.matrix.sort_indices() self.assertTrue( numpy.array_equal(bathc_res.matrix.indices, batch_init.matrix.indices)) self.assertTrue( numpy.array_equal(bathc_res.matrix.data, batch_init.matrix.data * weight)) self.assertTrue( numpy.array_equal(bathc_res.matrix.indptr, batch_init.matrix.indptr)) pass
def test_empty_batches(self): self.docfreq_args.update(self.extractor_args) arguments = dict_to_arguments(self.docfreq_args) with patch.dict(sourced.ml.extractors.__extractors__, self.extractors, clear=True): result = modify_feature_weights([], arguments) self.assertEqual(len(result), 0)
def test_no_docfreq(self): no_file = tempfile.NamedTemporaryFile(prefix="test_weighting", delete=False) no_file.close() try: os.remove(no_file.name) except: pass no_docfreq = {"docfreq": no_file.name} no_docfreq.update(self.extractor_args) arguments = dict_to_arguments(self.docfreq_args) with patch.dict(sourced.ml.extractors.__extractors__, self.extractors, clear=True): self.assertRaises(Exception, modify_feature_weights(self.batches, arguments))
def test_empty_extractors(self): arguments = dict_to_arguments(self.docfreq_args) with patch.dict(sourced.ml.extractors.__extractors__, self.extractors, clear=True): result = modify_feature_weights(deepcopy(self.batches), arguments) self.assertEqual(len(result), len(self.batches)) for bathc_res, batch_init in zip(result, self.batches): bathc_res.matrix.sort_indices() batch_init.matrix.sort_indices() self.assertTrue(numpy.array_equal(bathc_res.matrix.indices, batch_init.matrix.indices)) self.assertTrue(numpy.array_equal(bathc_res.matrix.data, batch_init.matrix.data)) self.assertTrue(numpy.array_equal(bathc_res.matrix.indptr, batch_init.matrix.indptr))
def test_empty_extractors(self): arguments = dict_to_arguments(self.docfreq_args) with patch.dict(sourced.ml.extractors.__extractors__, self.extractors, clear=True): result = modify_feature_weights(deepcopy(self.batches), arguments) self.assertEqual(len(result), len(self.batches)) for bathc_res, batch_init in zip(result, self.batches): bathc_res.matrix.sort_indices() batch_init.matrix.sort_indices() self.assertTrue( numpy.array_equal(bathc_res.matrix.indices, batch_init.matrix.indices)) self.assertTrue( numpy.array_equal(bathc_res.matrix.data, batch_init.matrix.data)) self.assertTrue( numpy.array_equal(bathc_res.matrix.indptr, batch_init.matrix.indptr))
def test_normal_run(self): self.docfreq_args.update(self.extractor_args) weight = 2 for key in self.docfreq_args: if "_weight" in key: self.docfreq_args[key] *= weight # make not 1 arguments = dict_to_arguments(self.docfreq_args) with patch.dict(sourced.ml.extractors.__extractors__, self.extractors, clear=True): result = modify_feature_weights(deepcopy(self.batches), arguments) self.assertEqual(len(result), len(self.batches)) for bathc_res, batch_init in zip(result, self.batches): bathc_res.matrix.sort_indices() batch_init.matrix.sort_indices() self.assertTrue(numpy.array_equal(bathc_res.matrix.indices, batch_init.matrix.indices)) self.assertTrue(numpy.array_equal(bathc_res.matrix.data, batch_init.matrix.data * weight)) self.assertTrue(numpy.array_equal(bathc_res.matrix.indptr, batch_init.matrix.indptr)) pass