def test_thresholdWeights(self): w = FisherFeatureWeights() w.names = ['a', 'b', 'c'] w.values = [1.0, 2.0, 4.0] w1 = w.Threshold(2) self.assertEqual(w1.names, ['c', 'b']) self.assertAlmostEqual(w1.values, [4.0, 2.0])
def test_thresholdWeights(self): w = FisherFeatureWeights() w.names = ["a", "b", "c"] w.values = [1.0, 2.0, 4.0] w1 = w.Threshold(2) self.assertEqual(w1.names, ["c", "b"]) self.assertAlmostEqual(w1.values, [4.0, 2.0])
def test_NewFromFeatureSet(self): """Fisher score calculation""" feature_set = FeatureSet_Discrete.NewFromFitFile(self.test_fit_path) feature_set.Normalize() result_weights = FisherFeatureWeights.NewFromFeatureSet(feature_set) # test weights generated from test-l.fit: # wndchrm classify -l -f1.0 -vtest_fit-l.weights test-l.fit test-l.fit target_weights = FisherFeatureWeights.NewFromFile( self.test_feat_weight_path) for target_val, res_val in zip(target_weights.values, result_weights.values): self.assertAlmostEqual(target_val, res_val, delta=self.epsilon)
def test_IfNotInterpolatable(self): """You can't graph predicted values if the classes aren't interpolatable.""" testfilename = 'ShouldntBeGraphable.png' fitfilepath = wndchrm_test_dir + sep + 'test-l.fit' fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath) train_set, test_set = fs.Split(randomize=False, quiet=True) train_set.Normalize() fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold() reduced_train_set = train_set.FeatureReduce(fw.names) reduced_test_set = test_set.FeatureReduce(fw.names) test_set.Normalize(train_set, quiet=True) batch_result = DiscreteBatchClassificationResult.New(reduced_train_set, reduced_test_set, fw, quiet=True) graph = PredictedValuesGraph(batch_result) tempfile = self.tempdir + sep + testfilename with self.assertRaises(ValueError): graph.RankOrderedPredictedValuesGraph() graph.SaveToFile(tempfile)
def test_predictDiscrete(self): s1 = self.createSignature(1, -1) s2 = self.createSignature(2, -2) s3 = self.createSignature(3, -3) s4 = self.createSignature(4, -4) trainFts = FeatureSet_Discrete() trainFts.AddSignature(s1, 0) trainFts.AddSignature(s2, 0) trainFts.AddSignature(s3, 1) trainFts.AddSignature(s4, 1) tmp = trainFts.ContiguousDataMatrix() # Values are chosen so that different weights give different predictions s5 = self.createSignature(0, -5) s6 = self.createSignature(5, 0) testFts = FeatureSet_Discrete() testFts.AddSignature(s5, 0) testFts.AddSignature(s6, 0) tmp = testFts.ContiguousDataMatrix() weights = FisherFeatureWeights() weights.names = ['ft [0]', 'ft [1]'] weights.values = [2.0, 1.0] pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights) self.assertEqual(len(pred.individual_results), 2) r1, r2 = pred.individual_results np.testing.assert_almost_equal(r1.marginal_probabilities, [0.975, 0.025], decimal=3) np.testing.assert_almost_equal(r2.marginal_probabilities, [0.025, 0.975], decimal=3) weights = FisherFeatureWeights() weights.names = ['ft [0]', 'ft [1]'] weights.values = [1.0, 2.0] pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights) self.assertEqual(len(pred.individual_results), 2) r1, r2 = pred.individual_results np.testing.assert_almost_equal(r1.marginal_probabilities, [0.025, 0.975], decimal=3) np.testing.assert_almost_equal(r2.marginal_probabilities, [0.975, 0.025], decimal=3)
def test_fisherFeatureWeights(self): sig1, sig2, sig3, sig4 = self.createSignatures() fts = FeatureSet_Discrete() fts.AddSignature(sig1, 0) fts.AddSignature(sig2, 0) fts.AddSignature(sig3, 1) fts.AddSignature(sig4, 1) tmp = fts.ContiguousDataMatrix() # TODO: weight[1]==0, presumably because the intra-class variance=0, # even though feature[1] is a perfect discriminator? fts.Normalize() wts = FisherFeatureWeights.NewFromFeatureSet(fts) np.testing.assert_almost_equal(wts.values, [4.0, 0.0]) self.assertEqual(wts.names, ['ft [0]', 'ft [1]'])
def test_predictDiscrete(self): s1 = self.createSignature(1, -1) s2 = self.createSignature(2, -2) s3 = self.createSignature(3, -3) s4 = self.createSignature(4, -4) trainFts = FeatureSet_Discrete() trainFts.AddSignature(s1, 0) trainFts.AddSignature(s2, 0) trainFts.AddSignature(s3, 1) trainFts.AddSignature(s4, 1) tmp = trainFts.ContiguousDataMatrix() # Values are chosen so that different weights give different predictions s5 = self.createSignature(0, -5) s6 = self.createSignature(5, 0) testFts = FeatureSet_Discrete() testFts.AddSignature(s5, 0) testFts.AddSignature(s6, 0) tmp = testFts.ContiguousDataMatrix() weights = FisherFeatureWeights() weights.names = ['ft [0]', 'ft [1]'] weights.values = [2.0, 1.0] pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights) self.assertEqual(len(pred.individual_results), 2) r1, r2 = pred.individual_results np.testing.assert_almost_equal( r1.marginal_probabilities, [0.975, 0.025], decimal=3) np.testing.assert_almost_equal( r2.marginal_probabilities, [0.025, 0.975], decimal=3) weights = FisherFeatureWeights() weights.names = ['ft [0]', 'ft [1]'] weights.values = [1.0, 2.0] pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights) self.assertEqual(len(pred.individual_results), 2) r1, r2 = pred.individual_results np.testing.assert_almost_equal( r1.marginal_probabilities, [0.025, 0.975], decimal=3) np.testing.assert_almost_equal( r2.marginal_probabilities, [0.975, 0.025], decimal=3)
class TestGraphs(unittest.TestCase): """Test WND-CHARM's graph-making functionality.""" fs = CreateArtificialFeatureSet_Discrete(n_samples=1000, n_classes=10, initial_noise_sigma=100, noise_gradient=10, random_state=43) train_set, test_set = fs.Split(randomize=False, quiet=True) train_set.Normalize(quiet=True) fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold() reduced_train_set = train_set.FeatureReduce(fw.names) reduced_test_set = test_set.FeatureReduce(fw.names) reduced_test_set.Normalize(reduced_train_set, quiet=True) batch_result = DiscreteBatchClassificationResult.New(reduced_train_set, reduced_test_set, fw, quiet=True) def setUp(self): self.tempdir = mkdtemp() def tearDown(self): rmtree(self.tempdir) def CompareGraphs(self, graph, testfilename): """Helper function to check output graphs""" tempfile = self.tempdir + sep + testfilename graph.SaveToFile(tempfile) try: self.assertTrue(filecmp.cmp(testfilename, tempfile)) except AssertionError: print "Files not equal: {0} and {1}".format(testfilename, tempfile) raise @unittest.skipIf(HasMatplotlib, "Skipped if matplotlib IS installed") def test_ErrMsgIfMatplotibNotInstalled(self): """Fail gracefully with informative message if matplotlib""" graph = PredictedValuesGraph(self.batch_result) with self.assertRaises(ImportError): graph.RankOrderedPredictedValuesGraph() with self.assertRaises(ImportError): graph.KernelSmoothedDensityGraph() @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") def test_RankOrderedFromBatchClassificationResult(self): """Rank Ordered Predicted values graph from a single split""" testfilename = 'test_graph_rank_ordered.png' graph = PredictedValuesGraph(self.batch_result) graph.RankOrderedPredictedValuesGraph() self.CompareGraphs(graph, testfilename) @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") def test_KernelSmoothedFromBatchClassificationResult(self): """Kernel Smoothed Probability density graph from a single split""" testfilename = 'test_graph_kernel_smoothed.png' graph = PredictedValuesGraph(self.batch_result) graph.KernelSmoothedDensityGraph() self.CompareGraphs(graph, testfilename) @unittest.skip("Skip until ShuffleSplit has a RandomState param to pass in" ) def test_FromDiscreteClassificationExperimentResults(self): """Rank Ordered Predicted values graph from an experiment result (multiple splits)""" testfilename = 'test_graph_rank_ordered_experiment.png' small_fs = CreateArtificialFeatureSet_Discrete(n_samples=100, n_classes=5, initial_noise_sigma=100, noise_gradient=10, random_state=42) experiment = DiscreteClassificationExperimentResult.NewShuffleSplit( small_fs, quiet=True) graph = PredictedValuesGraph(self.batch_result) graph.RankOrderedPredictedValuesGraph() # graph.SaveToFile( tempfile ) # remove after RandomState for ShuffleSplit is implemented self.CompareGraphs(graph, testfilename) @unittest.skipUnless(HasMatplotlib, "Skipped if matplotlib IS NOT installed") def test_FromHTML(self): """Rank Ordered Predicted values graph from an experiment result (multiple splits)""" testfilename = 'test_graph_fromHTML.png' # Inflate the zipped html file into a temp file import zipfile #zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html' #import zlib #zf = zipfile.ZipFile( zipped_file_path + '.zip', mode='w' ) #zf.write( zipped_file_path, compress_type=zipfile.ZIP_DEFLATED ) #zf.close() zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html.zip' zf = zipfile.ZipFile(zipped_file_path, mode='r') zf.extractall(self.tempdir) tempfile = self.tempdir + sep + testfilename htmlfilepath = self.tempdir + sep + zf.namelist()[0] exp_result = DiscreteClassificationExperimentResult.NewFromHTMLReport( htmlfilepath) graph = PredictedValuesGraph(exp_result) graph.RankOrderedPredictedValuesGraph() graph.SaveToFile(tempfile) try: self.assertTrue(filecmp.cmp(testfilename, tempfile)) except AssertionError: print "Files not equal: {0} and {1}".format(testfilename, tempfile) raise @unittest.skip( "Skip until test training set w/ uninterpolatable class lables is made avail." ) #@unittest.skipUnless( HasMatplotlib, "Skipped if matplotlib IS NOTinstalled" ) def test_IfNotInterpolatable(self): """You can't graph predicted values if the classes aren't interpolatable.""" testfilename = 'ShouldntBeGraphable.png' fitfilepath = wndchrm_test_dir + sep + 'test-l.fit' fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath) train_set, test_set = fs.Split(randomize=False, quiet=True) train_set.Normalize() fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold() reduced_train_set = train_set.FeatureReduce(fw.names) reduced_test_set = test_set.FeatureReduce(fw.names) test_set.Normalize(train_set, quiet=True) batch_result = DiscreteBatchClassificationResult.New(reduced_train_set, reduced_test_set, fw, quiet=True) graph = PredictedValuesGraph(batch_result) tempfile = self.tempdir + sep + testfilename with self.assertRaises(ValueError): graph.RankOrderedPredictedValuesGraph() graph.SaveToFile(tempfile)
class TestWND5Classification(unittest.TestCase): """WND5 Classification""" epsilon = 0.00001 # Define paths to original files test_sig_path = join(test_dir, 't1_s01_c05_ij-l_precalculated.sig') test_fit_path = join(test_dir, 'test-l.fit') test_feat_wght_path = join(test_dir, 'test_fit-l.weights') test_tif_path = join(test_dir, 't1_s01_c05_ij.tif') # Here are the correct values that Python API needs to return: # wndchrm classify -l -f1.0 test-l.fit t1_s01_c05_ij.tif # t1_s01_c05_ij.tif 1.6e-27 0.083 0.917 * 4cell 3.835 # wndchrm classify -l -f0.14765 test-l.fit t1_s01_c05_ij.tif # t1_s01_c05_ij.tif 3.23e-27 0.076 0.924 * 4cell 3.848 # wndchrm classify -l -f0.0685 test-l.fit t1_s01_c05_ij.tif # t1_s01_c05_ij.tif 7.05e-27 0.069 0.931 * 4cell 3.862 correct_marg_probs = {} correct_marg_probs[2919] = [0.083, 0.917] correct_marg_probs[431] = [0.076, 0.924] #correct_marg_probs[200] = [0.044, 0.956] # slight difference in marg probs due to my use of round() below correct_marg_probs[200] = [0.069, 0.931] # Load the original files once and only once for all this class's tests feature_set = FeatureSet_Discrete.NewFromFitFile(test_fit_path) feature_set.Normalize() test_sample = Signatures.NewFromSigFile(test_sig_path, test_tif_path) test_sample.Normalize(feature_set) all_weights = FisherFeatureWeights.NewFromFile(test_feat_wght_path) # -------------------------------------------------------------------------- def Check(self, num_feats=None): weights = self.all_weights.Threshold(num_feats) feat_set = self.feature_set.FeatureReduce(weights.names) sample = self.test_sample.FeatureReduce(weights.names) result = DiscreteImageClassificationResult.NewWND5( feat_set, weights, sample) result_marg_probs = [ round( val, 3 ) \ for val in result.marginal_probabilities ] self.assertSequenceEqual(self.correct_marg_probs[num_feats], result_marg_probs) # -------------------------------------------------------------------------- def test_WND5_all_features(self): """WND5 classification with entire large feature set (2919 features)""" self.Check(2919) # -------------------------------------------------------------------------- def test_WND5_15percent_threshold(self): """WND5 classification with large feature set 15% threshold (431 features)""" self.Check(431) # -------------------------------------------------------------------------- def test_WND5_200_feat_threshold(self): """WND5 classification with large feature set & 200 feature threshold""" self.Check(200)