def test_Normalize(self): """FIXME: THIS TEST BELONGS IN TEST_FEATURESET.PY""" from numpy.testing import assert_allclose result_fs = FeatureSet_Discrete.NewFromFitFile(self.test_fit_path) result_fs.Normalize() target_fs = FeatureSet_Discrete.NewFromFitFile( self.test_normalized_fit_path) assert_allclose(result_fs.data_matrix, target_fs.data_matrix, rtol=self.epsilon)
def test_IfNotInterpolatable(self): """You can't graph predicted values if the classes aren't interpolatable.""" testfilename = 'ShouldntBeGraphable.png' fitfilepath = wndchrm_test_dir + sep + 'test-l.fit' fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath) train_set, test_set = fs.Split(randomize=False, quiet=True) train_set.Normalize() fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold() reduced_train_set = train_set.FeatureReduce(fw.names) reduced_test_set = test_set.FeatureReduce(fw.names) test_set.Normalize(train_set, quiet=True) batch_result = DiscreteBatchClassificationResult.New(reduced_train_set, reduced_test_set, fw, quiet=True) graph = PredictedValuesGraph(batch_result) tempfile = self.tempdir + sep + testfilename with self.assertRaises(ValueError): graph.RankOrderedPredictedValuesGraph() graph.SaveToFile(tempfile)
def crossValidate(ftb, project, featureThreshold, imagesOnly, numSplits): message = '' fullSet = FeatureSet_Discrete() fullSet.source_path = project.getName() classId = 0 for ds in project.listChildren(): message += 'Processing dataset id:%d\n' % ds.getId() message += addToFeatureSet(ftb, ds, fullSet, classId, imagesOnly) classId += 1 tmp = fullSet.ContiguousDataMatrix() experiment = DiscreteClassificationExperimentResult(training_set=fullSet) for i in range(numSplits): trainSet, testSet = fullSet.Split() trainSet.Normalize() testSet.Normalize(trainSet) weights = FisherFeatureWeights.NewFromFeatureSet(trainSet) nFeatures = ceil(len(weights.names) * featureThreshold) message += 'Selecting top %d features\n' % nFeatures weights = weights.Threshold(nFeatures) trainSet = reduceFeatures(trainSet, weights) reducedTestSet = reduceFeatures(testSet, weights) reducedTrainSet = reduceFeatures(trainSet, weights) batchResult = DiscreteBatchClassificationResult.New( reducedTrainSet, reducedTestSet, weights, batch_number=i) experiment.individual_results.append(batchResult) out = StringIO() experiment.Print(output_stream=out) experiment.PerSampleStatistics(output_stream=out) pid = project.getId() WndcharmStorage.addTextFileAnnotationTo( ftb.conn, out.getvalue(), 'Project', pid, 'Wndcharm_Cross_Validation_Results.txt', 'Wndcharm Cross Validation Results for Project:%d' % pid) message += 'Attached cross-validation results\n' #return experiment return message
def test_predictDiscrete(self): s1 = self.createSignature(1, -1) s2 = self.createSignature(2, -2) s3 = self.createSignature(3, -3) s4 = self.createSignature(4, -4) trainFts = FeatureSet_Discrete() trainFts.AddSignature(s1, 0) trainFts.AddSignature(s2, 0) trainFts.AddSignature(s3, 1) trainFts.AddSignature(s4, 1) tmp = trainFts.ContiguousDataMatrix() # Values are chosen so that different weights give different predictions s5 = self.createSignature(0, -5) s6 = self.createSignature(5, 0) testFts = FeatureSet_Discrete() testFts.AddSignature(s5, 0) testFts.AddSignature(s6, 0) tmp = testFts.ContiguousDataMatrix() weights = FisherFeatureWeights() weights.names = ['ft [0]', 'ft [1]'] weights.values = [2.0, 1.0] pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights) self.assertEqual(len(pred.individual_results), 2) r1, r2 = pred.individual_results np.testing.assert_almost_equal(r1.marginal_probabilities, [0.975, 0.025], decimal=3) np.testing.assert_almost_equal(r2.marginal_probabilities, [0.025, 0.975], decimal=3) weights = FisherFeatureWeights() weights.names = ['ft [0]', 'ft [1]'] weights.values = [1.0, 2.0] pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights) self.assertEqual(len(pred.individual_results), 2) r1, r2 = pred.individual_results np.testing.assert_almost_equal(r1.marginal_probabilities, [0.025, 0.975], decimal=3) np.testing.assert_almost_equal(r2.marginal_probabilities, [0.975, 0.025], decimal=3)
def test_PerSampleStatisticsWITHOUTPredictedValue(self): """DISCRETE ShuffleSplit/PerSampleStatistics w/ mini binucleate test set (no predicted value)""" fs = FeatureSet_Discrete.NewFromFitFile('../wndchrm_tests/test-l.fit') exp = DiscreteClassificationExperimentResult.NewShuffleSplit( fs, quiet=True) exp.PerSampleStatistics() self.assertTrue(True)
def test_NewFromFeatureSet(self): """Fisher score calculation""" feature_set = FeatureSet_Discrete.NewFromFitFile(self.test_fit_path) feature_set.Normalize() result_weights = FisherFeatureWeights.NewFromFeatureSet(feature_set) # test weights generated from test-l.fit: # wndchrm classify -l -f1.0 -vtest_fit-l.weights test-l.fit test-l.fit target_weights = FisherFeatureWeights.NewFromFile( self.test_feat_weight_path) for target_val, res_val in zip(target_weights.values, result_weights.values): self.assertAlmostEqual(target_val, res_val, delta=self.epsilon)
def test_fisherFeatureWeights(self): sig1, sig2, sig3, sig4 = self.createSignatures() fts = FeatureSet_Discrete() fts.AddSignature(sig1, 0) fts.AddSignature(sig2, 0) fts.AddSignature(sig3, 1) fts.AddSignature(sig4, 1) tmp = fts.ContiguousDataMatrix() # TODO: weight[1]==0, presumably because the intra-class variance=0, # even though feature[1] is a perfect discriminator? fts.Normalize() wts = FisherFeatureWeights.NewFromFeatureSet(fts) np.testing.assert_almost_equal(wts.values, [4.0, 0.0]) self.assertEqual(wts.names, ['ft [0]', 'ft [1]'])
def test_createFeatureSet(self): sig1, sig2, sig3, sig4 = self.createSignatures() fts = FeatureSet_Discrete() # Add classes out of order fts.AddSignature(sig3, 1) self.assertEqual(fts.num_classes, 2) self.assertEqual(fts.num_features, 2) self.assertEqual(fts.num_images, 1) self.assertEqual(len(fts.data_list), 2) self.assertIsNone(fts.data_list[0]) #self.assertSequenceEqual np.testing.assert_almost_equal(fts.data_list[1], sig3.values) self.assertEqual(fts.classsizes_list, [0, 1]) self.assertEqual(fts.classnames_list, ['UNKNOWN1', 'UNKNOWN2']) fts.AddSignature(sig1, 0) self.assertEqual(fts.num_classes, 2) self.assertEqual(fts.num_features, 2) self.assertEqual(fts.num_images, 2) self.assertEqual(len(fts.data_list), 2) np.testing.assert_almost_equal(fts.data_list[0], sig1.values) np.testing.assert_almost_equal(fts.data_list[1], sig3.values) self.assertEqual(fts.classsizes_list, [1, 1]) self.assertEqual(fts.classnames_list, ['UNKNOWN1', 'UNKNOWN2']) # fts.ContiguousDataMatrix() fails unless there are at least two images # per class, is this really necessary? #tmp = fts.ContiguousDataMatrix() fts.AddSignature(sig2, 0) fts.AddSignature(sig4, 1) self.assertEqual(fts.classsizes_list, [2, 2]) self.assertEqual(fts.num_images, 4) tmp = fts.ContiguousDataMatrix() self.assertEqual(fts.data_matrix.shape, (4, 2)) np.testing.assert_almost_equal(fts.data_matrix[0], sig1.values) np.testing.assert_almost_equal(fts.data_matrix[1], sig2.values) np.testing.assert_almost_equal(fts.data_matrix[2], sig3.values) np.testing.assert_almost_equal(fts.data_matrix[3], sig4.values)
class TestWND5Classification(unittest.TestCase): """WND5 Classification""" epsilon = 0.00001 # Define paths to original files test_sig_path = join(test_dir, 't1_s01_c05_ij-l_precalculated.sig') test_fit_path = join(test_dir, 'test-l.fit') test_feat_wght_path = join(test_dir, 'test_fit-l.weights') test_tif_path = join(test_dir, 't1_s01_c05_ij.tif') # Here are the correct values that Python API needs to return: # wndchrm classify -l -f1.0 test-l.fit t1_s01_c05_ij.tif # t1_s01_c05_ij.tif 1.6e-27 0.083 0.917 * 4cell 3.835 # wndchrm classify -l -f0.14765 test-l.fit t1_s01_c05_ij.tif # t1_s01_c05_ij.tif 3.23e-27 0.076 0.924 * 4cell 3.848 # wndchrm classify -l -f0.0685 test-l.fit t1_s01_c05_ij.tif # t1_s01_c05_ij.tif 7.05e-27 0.069 0.931 * 4cell 3.862 correct_marg_probs = {} correct_marg_probs[2919] = [0.083, 0.917] correct_marg_probs[431] = [0.076, 0.924] #correct_marg_probs[200] = [0.044, 0.956] # slight difference in marg probs due to my use of round() below correct_marg_probs[200] = [0.069, 0.931] # Load the original files once and only once for all this class's tests feature_set = FeatureSet_Discrete.NewFromFitFile(test_fit_path) feature_set.Normalize() test_sample = Signatures.NewFromSigFile(test_sig_path, test_tif_path) test_sample.Normalize(feature_set) all_weights = FisherFeatureWeights.NewFromFile(test_feat_wght_path) # -------------------------------------------------------------------------- def Check(self, num_feats=None): weights = self.all_weights.Threshold(num_feats) feat_set = self.feature_set.FeatureReduce(weights.names) sample = self.test_sample.FeatureReduce(weights.names) result = DiscreteImageClassificationResult.NewWND5( feat_set, weights, sample) result_marg_probs = [ round( val, 3 ) \ for val in result.marginal_probabilities ] self.assertSequenceEqual(self.correct_marg_probs[num_feats], result_marg_probs) # -------------------------------------------------------------------------- def test_WND5_all_features(self): """WND5 classification with entire large feature set (2919 features)""" self.Check(2919) # -------------------------------------------------------------------------- def test_WND5_15percent_threshold(self): """WND5 classification with large feature set 15% threshold (431 features)""" self.Check(431) # -------------------------------------------------------------------------- def test_WND5_200_feat_threshold(self): """WND5 classification with large feature set & 200 feature threshold""" self.Check(200)
def test_incompatibleFeatureVersion(self): s = self.createSignature(1, 10) fts = FeatureSet_Discrete() fts.feature_vector_version = "0.0" self.assertRaises(ValueError, fts.AddSignature, s, 1)
def test_incompatibleFeatureVersion(self): s = self.createSignature(1, 10) fts = FeatureSet_Discrete() fts.feature_vector_version = '0.0' self.assertRaises(ValueError, fts.AddSignature, s, 1)