def test_Normalize(self):
        """FIXME: THIS TEST BELONGS IN TEST_FEATURESET.PY"""

        from numpy.testing import assert_allclose
        result_fs = FeatureSet_Discrete.NewFromFitFile(self.test_fit_path)
        result_fs.Normalize()
        target_fs = FeatureSet_Discrete.NewFromFitFile(
            self.test_normalized_fit_path)

        assert_allclose(result_fs.data_matrix,
                        target_fs.data_matrix,
                        rtol=self.epsilon)
Example #2
0
    def test_IfNotInterpolatable(self):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        fitfilepath = wndchrm_test_dir + sep + 'test-l.fit'
        fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath)
        train_set, test_set = fs.Split(randomize=False, quiet=True)
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold()
        reduced_train_set = train_set.FeatureReduce(fw.names)
        reduced_test_set = test_set.FeatureReduce(fw.names)
        test_set.Normalize(train_set, quiet=True)

        batch_result = DiscreteBatchClassificationResult.New(reduced_train_set,
                                                             reduced_test_set,
                                                             fw,
                                                             quiet=True)
        graph = PredictedValuesGraph(batch_result)

        tempfile = self.tempdir + sep + testfilename

        with self.assertRaises(ValueError):
            graph.RankOrderedPredictedValuesGraph()
            graph.SaveToFile(tempfile)
def crossValidate(ftb, project, featureThreshold, imagesOnly, numSplits):

    message = ''
    fullSet = FeatureSet_Discrete()
    fullSet.source_path = project.getName()

    classId = 0
    for ds in project.listChildren():
        message += 'Processing dataset id:%d\n' % ds.getId()
        message += addToFeatureSet(ftb, ds, fullSet, classId, imagesOnly)
        classId += 1

    tmp = fullSet.ContiguousDataMatrix()
    experiment = DiscreteClassificationExperimentResult(training_set=fullSet)

    for i in range(numSplits):
        trainSet, testSet = fullSet.Split()
        trainSet.Normalize()
        testSet.Normalize(trainSet)

        weights = FisherFeatureWeights.NewFromFeatureSet(trainSet)

        nFeatures = ceil(len(weights.names) * featureThreshold)
        message += 'Selecting top %d features\n' % nFeatures
        weights = weights.Threshold(nFeatures)
        trainSet = reduceFeatures(trainSet, weights)

        reducedTestSet = reduceFeatures(testSet, weights)
        reducedTrainSet = reduceFeatures(trainSet, weights)

	batchResult = DiscreteBatchClassificationResult.New(
            reducedTrainSet, reducedTestSet, weights, batch_number=i)
	experiment.individual_results.append(batchResult)

    out = StringIO()
    experiment.Print(output_stream=out)
    experiment.PerSampleStatistics(output_stream=out)

    pid = project.getId()
    WndcharmStorage.addTextFileAnnotationTo(
        ftb.conn, out.getvalue(), 'Project', pid,
        'Wndcharm_Cross_Validation_Results.txt',
        'Wndcharm Cross Validation Results for Project:%d' % pid)

    message += 'Attached cross-validation results\n'
    #return experiment
    return message
Example #4
0
    def test_predictDiscrete(self):
        s1 = self.createSignature(1, -1)
        s2 = self.createSignature(2, -2)
        s3 = self.createSignature(3, -3)
        s4 = self.createSignature(4, -4)

        trainFts = FeatureSet_Discrete()
        trainFts.AddSignature(s1, 0)
        trainFts.AddSignature(s2, 0)
        trainFts.AddSignature(s3, 1)
        trainFts.AddSignature(s4, 1)
        tmp = trainFts.ContiguousDataMatrix()

        # Values are chosen so that different weights give different predictions
        s5 = self.createSignature(0, -5)
        s6 = self.createSignature(5, 0)

        testFts = FeatureSet_Discrete()
        testFts.AddSignature(s5, 0)
        testFts.AddSignature(s6, 0)
        tmp = testFts.ContiguousDataMatrix()

        weights = FisherFeatureWeights()
        weights.names = ['ft [0]', 'ft [1]']
        weights.values = [2.0, 1.0]

        pred = DiscreteBatchClassificationResult.New(trainFts, testFts,
                                                     weights)
        self.assertEqual(len(pred.individual_results), 2)
        r1, r2 = pred.individual_results
        np.testing.assert_almost_equal(r1.marginal_probabilities,
                                       [0.975, 0.025],
                                       decimal=3)
        np.testing.assert_almost_equal(r2.marginal_probabilities,
                                       [0.025, 0.975],
                                       decimal=3)

        weights = FisherFeatureWeights()
        weights.names = ['ft [0]', 'ft [1]']
        weights.values = [1.0, 2.0]

        pred = DiscreteBatchClassificationResult.New(trainFts, testFts,
                                                     weights)
        self.assertEqual(len(pred.individual_results), 2)
        r1, r2 = pred.individual_results
        np.testing.assert_almost_equal(r1.marginal_probabilities,
                                       [0.025, 0.975],
                                       decimal=3)
        np.testing.assert_almost_equal(r2.marginal_probabilities,
                                       [0.975, 0.025],
                                       decimal=3)
    def test_PerSampleStatisticsWITHOUTPredictedValue(self):
        """DISCRETE ShuffleSplit/PerSampleStatistics w/ mini binucleate test set (no predicted value)"""

        fs = FeatureSet_Discrete.NewFromFitFile('../wndchrm_tests/test-l.fit')
        exp = DiscreteClassificationExperimentResult.NewShuffleSplit(
            fs, quiet=True)
        exp.PerSampleStatistics()
        self.assertTrue(True)
    def test_NewFromFeatureSet(self):
        """Fisher score calculation"""

        feature_set = FeatureSet_Discrete.NewFromFitFile(self.test_fit_path)
        feature_set.Normalize()
        result_weights = FisherFeatureWeights.NewFromFeatureSet(feature_set)

        # test weights generated from test-l.fit:
        # wndchrm classify -l -f1.0 -vtest_fit-l.weights test-l.fit test-l.fit
        target_weights = FisherFeatureWeights.NewFromFile(
            self.test_feat_weight_path)

        for target_val, res_val in zip(target_weights.values,
                                       result_weights.values):
            self.assertAlmostEqual(target_val, res_val, delta=self.epsilon)
Example #7
0
    def test_fisherFeatureWeights(self):
        sig1, sig2, sig3, sig4 = self.createSignatures()

        fts = FeatureSet_Discrete()
        fts.AddSignature(sig1, 0)
        fts.AddSignature(sig2, 0)
        fts.AddSignature(sig3, 1)
        fts.AddSignature(sig4, 1)
        tmp = fts.ContiguousDataMatrix()

        # TODO: weight[1]==0, presumably because the intra-class variance=0,
        # even though feature[1] is a perfect discriminator?
        fts.Normalize()

        wts = FisherFeatureWeights.NewFromFeatureSet(fts)

        np.testing.assert_almost_equal(wts.values, [4.0, 0.0])
        self.assertEqual(wts.names, ['ft [0]', 'ft [1]'])
Example #8
0
    def test_createFeatureSet(self):
        sig1, sig2, sig3, sig4 = self.createSignatures()
        fts = FeatureSet_Discrete()

        # Add classes out of order
        fts.AddSignature(sig3, 1)

        self.assertEqual(fts.num_classes, 2)
        self.assertEqual(fts.num_features, 2)
        self.assertEqual(fts.num_images, 1)
        self.assertEqual(len(fts.data_list), 2)
        self.assertIsNone(fts.data_list[0])
        #self.assertSequenceEqual
        np.testing.assert_almost_equal(fts.data_list[1], sig3.values)

        self.assertEqual(fts.classsizes_list, [0, 1])
        self.assertEqual(fts.classnames_list, ['UNKNOWN1', 'UNKNOWN2'])

        fts.AddSignature(sig1, 0)

        self.assertEqual(fts.num_classes, 2)
        self.assertEqual(fts.num_features, 2)
        self.assertEqual(fts.num_images, 2)
        self.assertEqual(len(fts.data_list), 2)
        np.testing.assert_almost_equal(fts.data_list[0], sig1.values)
        np.testing.assert_almost_equal(fts.data_list[1], sig3.values)

        self.assertEqual(fts.classsizes_list, [1, 1])
        self.assertEqual(fts.classnames_list, ['UNKNOWN1', 'UNKNOWN2'])

        # fts.ContiguousDataMatrix() fails unless there are at least two images
        # per class, is this really necessary?
        #tmp = fts.ContiguousDataMatrix()
        fts.AddSignature(sig2, 0)
        fts.AddSignature(sig4, 1)
        self.assertEqual(fts.classsizes_list, [2, 2])
        self.assertEqual(fts.num_images, 4)

        tmp = fts.ContiguousDataMatrix()
        self.assertEqual(fts.data_matrix.shape, (4, 2))
        np.testing.assert_almost_equal(fts.data_matrix[0], sig1.values)
        np.testing.assert_almost_equal(fts.data_matrix[1], sig2.values)
        np.testing.assert_almost_equal(fts.data_matrix[2], sig3.values)
        np.testing.assert_almost_equal(fts.data_matrix[3], sig4.values)
Example #9
0
class TestWND5Classification(unittest.TestCase):
    """WND5 Classification"""

    epsilon = 0.00001

    # Define paths to original files
    test_sig_path = join(test_dir, 't1_s01_c05_ij-l_precalculated.sig')
    test_fit_path = join(test_dir, 'test-l.fit')
    test_feat_wght_path = join(test_dir, 'test_fit-l.weights')
    test_tif_path = join(test_dir, 't1_s01_c05_ij.tif')

    # Here are the correct values that Python API needs to return:
    # wndchrm classify -l -f1.0 test-l.fit t1_s01_c05_ij.tif
    # t1_s01_c05_ij.tif	1.6e-27	0.083	0.917	*	4cell	3.835
    # wndchrm classify -l -f0.14765 test-l.fit t1_s01_c05_ij.tif
    # t1_s01_c05_ij.tif	3.23e-27	0.076	0.924	*	4cell	3.848
    # wndchrm classify -l -f0.0685 test-l.fit t1_s01_c05_ij.tif
    # t1_s01_c05_ij.tif	7.05e-27	0.069	0.931	*	4cell	3.862

    correct_marg_probs = {}
    correct_marg_probs[2919] = [0.083, 0.917]
    correct_marg_probs[431] = [0.076, 0.924]
    #correct_marg_probs[200] = [0.044, 0.956]
    # slight difference in marg probs due to my use of round() below
    correct_marg_probs[200] = [0.069, 0.931]

    # Load the original files once and only once for all this class's tests
    feature_set = FeatureSet_Discrete.NewFromFitFile(test_fit_path)
    feature_set.Normalize()

    test_sample = Signatures.NewFromSigFile(test_sig_path, test_tif_path)
    test_sample.Normalize(feature_set)

    all_weights = FisherFeatureWeights.NewFromFile(test_feat_wght_path)

    # --------------------------------------------------------------------------
    def Check(self, num_feats=None):
        weights = self.all_weights.Threshold(num_feats)
        feat_set = self.feature_set.FeatureReduce(weights.names)
        sample = self.test_sample.FeatureReduce(weights.names)
        result = DiscreteImageClassificationResult.NewWND5(
            feat_set, weights, sample)
        result_marg_probs = [ round( val, 3 ) \
          for val in result.marginal_probabilities ]
        self.assertSequenceEqual(self.correct_marg_probs[num_feats],
                                 result_marg_probs)

    # --------------------------------------------------------------------------
    def test_WND5_all_features(self):
        """WND5 classification with entire large feature set (2919 features)"""
        self.Check(2919)

    # --------------------------------------------------------------------------
    def test_WND5_15percent_threshold(self):
        """WND5 classification with large feature set 15% threshold (431 features)"""
        self.Check(431)

    # --------------------------------------------------------------------------
    def test_WND5_200_feat_threshold(self):
        """WND5 classification with large feature set & 200 feature threshold"""
        self.Check(200)
Example #10
0
 def test_incompatibleFeatureVersion(self):
     s = self.createSignature(1, 10)
     fts = FeatureSet_Discrete()
     fts.feature_vector_version = "0.0"
     self.assertRaises(ValueError, fts.AddSignature, s, 1)
Example #11
0
 def test_incompatibleFeatureVersion(self):
     s = self.createSignature(1, 10)
     fts = FeatureSet_Discrete()
     fts.feature_vector_version = '0.0'
     self.assertRaises(ValueError, fts.AddSignature, s, 1)