Example #1
0
    def test_thresholdWeights(self):
        w = FisherFeatureWeights()
        w.names = ['a', 'b', 'c']
        w.values = [1.0, 2.0, 4.0]

        w1 = w.Threshold(2)
        self.assertEqual(w1.names, ['c', 'b'])
        self.assertAlmostEqual(w1.values, [4.0, 2.0])
Example #2
0
    def test_thresholdWeights(self):
        w = FisherFeatureWeights()
        w.names = ["a", "b", "c"]
        w.values = [1.0, 2.0, 4.0]

        w1 = w.Threshold(2)
        self.assertEqual(w1.names, ["c", "b"])
        self.assertAlmostEqual(w1.values, [4.0, 2.0])
Example #3
0
    def test_thresholdWeights(self):
        w = FisherFeatureWeights()
        w.names = ['a', 'b', 'c']
        w.values = [1.0, 2.0, 4.0]

        w1 = w.Threshold(2)
        self.assertEqual(w1.names, ['c', 'b'])
        self.assertAlmostEqual(w1.values, [4.0, 2.0])
    def test_NewFromFeatureSet(self):
        """Fisher score calculation"""

        feature_set = FeatureSet_Discrete.NewFromFitFile(self.test_fit_path)
        feature_set.Normalize()
        result_weights = FisherFeatureWeights.NewFromFeatureSet(feature_set)

        # test weights generated from test-l.fit:
        # wndchrm classify -l -f1.0 -vtest_fit-l.weights test-l.fit test-l.fit
        target_weights = FisherFeatureWeights.NewFromFile(
            self.test_feat_weight_path)

        for target_val, res_val in zip(target_weights.values,
                                       result_weights.values):
            self.assertAlmostEqual(target_val, res_val, delta=self.epsilon)
Example #5
0
    def test_IfNotInterpolatable(self):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        fitfilepath = wndchrm_test_dir + sep + 'test-l.fit'
        fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath)
        train_set, test_set = fs.Split(randomize=False, quiet=True)
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold()
        reduced_train_set = train_set.FeatureReduce(fw.names)
        reduced_test_set = test_set.FeatureReduce(fw.names)
        test_set.Normalize(train_set, quiet=True)

        batch_result = DiscreteBatchClassificationResult.New(reduced_train_set,
                                                             reduced_test_set,
                                                             fw,
                                                             quiet=True)
        graph = PredictedValuesGraph(batch_result)

        tempfile = self.tempdir + sep + testfilename

        with self.assertRaises(ValueError):
            graph.RankOrderedPredictedValuesGraph()
            graph.SaveToFile(tempfile)
Example #6
0
    def test_predictDiscrete(self):
        s1 = self.createSignature(1, -1)
        s2 = self.createSignature(2, -2)
        s3 = self.createSignature(3, -3)
        s4 = self.createSignature(4, -4)

        trainFts = FeatureSet_Discrete()
        trainFts.AddSignature(s1, 0)
        trainFts.AddSignature(s2, 0)
        trainFts.AddSignature(s3, 1)
        trainFts.AddSignature(s4, 1)
        tmp = trainFts.ContiguousDataMatrix()

        # Values are chosen so that different weights give different predictions
        s5 = self.createSignature(0, -5)
        s6 = self.createSignature(5, 0)

        testFts = FeatureSet_Discrete()
        testFts.AddSignature(s5, 0)
        testFts.AddSignature(s6, 0)
        tmp = testFts.ContiguousDataMatrix()

        weights = FisherFeatureWeights()
        weights.names = ['ft [0]', 'ft [1]']
        weights.values = [2.0, 1.0]

        pred = DiscreteBatchClassificationResult.New(trainFts, testFts,
                                                     weights)
        self.assertEqual(len(pred.individual_results), 2)
        r1, r2 = pred.individual_results
        np.testing.assert_almost_equal(r1.marginal_probabilities,
                                       [0.975, 0.025],
                                       decimal=3)
        np.testing.assert_almost_equal(r2.marginal_probabilities,
                                       [0.025, 0.975],
                                       decimal=3)

        weights = FisherFeatureWeights()
        weights.names = ['ft [0]', 'ft [1]']
        weights.values = [1.0, 2.0]

        pred = DiscreteBatchClassificationResult.New(trainFts, testFts,
                                                     weights)
        self.assertEqual(len(pred.individual_results), 2)
        r1, r2 = pred.individual_results
        np.testing.assert_almost_equal(r1.marginal_probabilities,
                                       [0.025, 0.975],
                                       decimal=3)
        np.testing.assert_almost_equal(r2.marginal_probabilities,
                                       [0.975, 0.025],
                                       decimal=3)
Example #7
0
    def test_fisherFeatureWeights(self):
        sig1, sig2, sig3, sig4 = self.createSignatures()

        fts = FeatureSet_Discrete()
        fts.AddSignature(sig1, 0)
        fts.AddSignature(sig2, 0)
        fts.AddSignature(sig3, 1)
        fts.AddSignature(sig4, 1)
        tmp = fts.ContiguousDataMatrix()

        # TODO: weight[1]==0, presumably because the intra-class variance=0,
        # even though feature[1] is a perfect discriminator?
        fts.Normalize()

        wts = FisherFeatureWeights.NewFromFeatureSet(fts)

        np.testing.assert_almost_equal(wts.values, [4.0, 0.0])
        self.assertEqual(wts.names, ['ft [0]', 'ft [1]'])
Example #8
0
    def test_predictDiscrete(self):
        s1 = self.createSignature(1, -1)
        s2 = self.createSignature(2, -2)
        s3 = self.createSignature(3, -3)
        s4 = self.createSignature(4, -4)

        trainFts = FeatureSet_Discrete()
        trainFts.AddSignature(s1, 0)
        trainFts.AddSignature(s2, 0)
        trainFts.AddSignature(s3, 1)
        trainFts.AddSignature(s4, 1)
        tmp = trainFts.ContiguousDataMatrix()

        # Values are chosen so that different weights give different predictions
        s5 = self.createSignature(0, -5)
        s6 = self.createSignature(5, 0)

        testFts = FeatureSet_Discrete()
        testFts.AddSignature(s5, 0)
        testFts.AddSignature(s6, 0)
        tmp = testFts.ContiguousDataMatrix()

        weights = FisherFeatureWeights()
        weights.names = ['ft [0]', 'ft [1]']
        weights.values = [2.0, 1.0]

        pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights)
        self.assertEqual(len(pred.individual_results), 2)
        r1, r2 = pred.individual_results
        np.testing.assert_almost_equal(
            r1.marginal_probabilities, [0.975, 0.025], decimal=3)
        np.testing.assert_almost_equal(
            r2.marginal_probabilities, [0.025, 0.975], decimal=3)

        weights = FisherFeatureWeights()
        weights.names = ['ft [0]', 'ft [1]']
        weights.values = [1.0, 2.0]

        pred = DiscreteBatchClassificationResult.New(trainFts, testFts, weights)
        self.assertEqual(len(pred.individual_results), 2)
        r1, r2 = pred.individual_results
        np.testing.assert_almost_equal(
            r1.marginal_probabilities, [0.025, 0.975], decimal=3)
        np.testing.assert_almost_equal(
            r2.marginal_probabilities, [0.975, 0.025], decimal=3)
Example #9
0
class TestGraphs(unittest.TestCase):
    """Test WND-CHARM's graph-making functionality."""

    fs = CreateArtificialFeatureSet_Discrete(n_samples=1000,
                                             n_classes=10,
                                             initial_noise_sigma=100,
                                             noise_gradient=10,
                                             random_state=43)
    train_set, test_set = fs.Split(randomize=False, quiet=True)
    train_set.Normalize(quiet=True)
    fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold()

    reduced_train_set = train_set.FeatureReduce(fw.names)
    reduced_test_set = test_set.FeatureReduce(fw.names)
    reduced_test_set.Normalize(reduced_train_set, quiet=True)

    batch_result = DiscreteBatchClassificationResult.New(reduced_train_set,
                                                         reduced_test_set,
                                                         fw,
                                                         quiet=True)

    def setUp(self):
        self.tempdir = mkdtemp()

    def tearDown(self):
        rmtree(self.tempdir)

    def CompareGraphs(self, graph, testfilename):
        """Helper function to check output graphs"""

        tempfile = self.tempdir + sep + testfilename
        graph.SaveToFile(tempfile)

        try:
            self.assertTrue(filecmp.cmp(testfilename, tempfile))
        except AssertionError:
            print "Files not equal: {0} and {1}".format(testfilename, tempfile)
            raise

    @unittest.skipIf(HasMatplotlib, "Skipped if matplotlib IS installed")
    def test_ErrMsgIfMatplotibNotInstalled(self):
        """Fail gracefully with informative message if matplotlib"""

        graph = PredictedValuesGraph(self.batch_result)
        with self.assertRaises(ImportError):
            graph.RankOrderedPredictedValuesGraph()
        with self.assertRaises(ImportError):
            graph.KernelSmoothedDensityGraph()

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_RankOrderedFromBatchClassificationResult(self):
        """Rank Ordered Predicted values graph from a single split"""

        testfilename = 'test_graph_rank_ordered.png'
        graph = PredictedValuesGraph(self.batch_result)
        graph.RankOrderedPredictedValuesGraph()
        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_KernelSmoothedFromBatchClassificationResult(self):
        """Kernel Smoothed Probability density graph from a single split"""

        testfilename = 'test_graph_kernel_smoothed.png'
        graph = PredictedValuesGraph(self.batch_result)
        graph.KernelSmoothedDensityGraph()

        self.CompareGraphs(graph, testfilename)

    @unittest.skip("Skip until ShuffleSplit has a RandomState param to pass in"
                   )
    def test_FromDiscreteClassificationExperimentResults(self):
        """Rank Ordered Predicted values graph from an experiment result (multiple splits)"""

        testfilename = 'test_graph_rank_ordered_experiment.png'

        small_fs = CreateArtificialFeatureSet_Discrete(n_samples=100,
                                                       n_classes=5,
                                                       initial_noise_sigma=100,
                                                       noise_gradient=10,
                                                       random_state=42)
        experiment = DiscreteClassificationExperimentResult.NewShuffleSplit(
            small_fs, quiet=True)
        graph = PredictedValuesGraph(self.batch_result)
        graph.RankOrderedPredictedValuesGraph()
        # graph.SaveToFile( tempfile ) # remove after RandomState for ShuffleSplit is implemented
        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_FromHTML(self):
        """Rank Ordered Predicted values graph from an experiment result (multiple splits)"""

        testfilename = 'test_graph_fromHTML.png'
        # Inflate the zipped html file into a temp file
        import zipfile

        #zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html'
        #import zlib
        #zf = zipfile.ZipFile( zipped_file_path + '.zip', mode='w' )
        #zf.write( zipped_file_path, compress_type=zipfile.ZIP_DEFLATED )
        #zf.close()

        zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html.zip'
        zf = zipfile.ZipFile(zipped_file_path, mode='r')
        zf.extractall(self.tempdir)
        tempfile = self.tempdir + sep + testfilename
        htmlfilepath = self.tempdir + sep + zf.namelist()[0]
        exp_result = DiscreteClassificationExperimentResult.NewFromHTMLReport(
            htmlfilepath)
        graph = PredictedValuesGraph(exp_result)
        graph.RankOrderedPredictedValuesGraph()
        graph.SaveToFile(tempfile)
        try:
            self.assertTrue(filecmp.cmp(testfilename, tempfile))
        except AssertionError:
            print "Files not equal: {0} and {1}".format(testfilename, tempfile)
            raise

    @unittest.skip(
        "Skip until test training set w/ uninterpolatable class lables is made avail."
    )
    #@unittest.skipUnless( HasMatplotlib, "Skipped if matplotlib IS NOTinstalled" )
    def test_IfNotInterpolatable(self):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        fitfilepath = wndchrm_test_dir + sep + 'test-l.fit'
        fs = FeatureSet_Discrete.NewFromFitFile(fitfilepath)
        train_set, test_set = fs.Split(randomize=False, quiet=True)
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSet(train_set).Threshold()
        reduced_train_set = train_set.FeatureReduce(fw.names)
        reduced_test_set = test_set.FeatureReduce(fw.names)
        test_set.Normalize(train_set, quiet=True)

        batch_result = DiscreteBatchClassificationResult.New(reduced_train_set,
                                                             reduced_test_set,
                                                             fw,
                                                             quiet=True)
        graph = PredictedValuesGraph(batch_result)

        tempfile = self.tempdir + sep + testfilename

        with self.assertRaises(ValueError):
            graph.RankOrderedPredictedValuesGraph()
            graph.SaveToFile(tempfile)
Example #10
0
class TestWND5Classification(unittest.TestCase):
    """WND5 Classification"""

    epsilon = 0.00001

    # Define paths to original files
    test_sig_path = join(test_dir, 't1_s01_c05_ij-l_precalculated.sig')
    test_fit_path = join(test_dir, 'test-l.fit')
    test_feat_wght_path = join(test_dir, 'test_fit-l.weights')
    test_tif_path = join(test_dir, 't1_s01_c05_ij.tif')

    # Here are the correct values that Python API needs to return:
    # wndchrm classify -l -f1.0 test-l.fit t1_s01_c05_ij.tif
    # t1_s01_c05_ij.tif	1.6e-27	0.083	0.917	*	4cell	3.835
    # wndchrm classify -l -f0.14765 test-l.fit t1_s01_c05_ij.tif
    # t1_s01_c05_ij.tif	3.23e-27	0.076	0.924	*	4cell	3.848
    # wndchrm classify -l -f0.0685 test-l.fit t1_s01_c05_ij.tif
    # t1_s01_c05_ij.tif	7.05e-27	0.069	0.931	*	4cell	3.862

    correct_marg_probs = {}
    correct_marg_probs[2919] = [0.083, 0.917]
    correct_marg_probs[431] = [0.076, 0.924]
    #correct_marg_probs[200] = [0.044, 0.956]
    # slight difference in marg probs due to my use of round() below
    correct_marg_probs[200] = [0.069, 0.931]

    # Load the original files once and only once for all this class's tests
    feature_set = FeatureSet_Discrete.NewFromFitFile(test_fit_path)
    feature_set.Normalize()

    test_sample = Signatures.NewFromSigFile(test_sig_path, test_tif_path)
    test_sample.Normalize(feature_set)

    all_weights = FisherFeatureWeights.NewFromFile(test_feat_wght_path)

    # --------------------------------------------------------------------------
    def Check(self, num_feats=None):
        weights = self.all_weights.Threshold(num_feats)
        feat_set = self.feature_set.FeatureReduce(weights.names)
        sample = self.test_sample.FeatureReduce(weights.names)
        result = DiscreteImageClassificationResult.NewWND5(
            feat_set, weights, sample)
        result_marg_probs = [ round( val, 3 ) \
          for val in result.marginal_probabilities ]
        self.assertSequenceEqual(self.correct_marg_probs[num_feats],
                                 result_marg_probs)

    # --------------------------------------------------------------------------
    def test_WND5_all_features(self):
        """WND5 classification with entire large feature set (2919 features)"""
        self.Check(2919)

    # --------------------------------------------------------------------------
    def test_WND5_15percent_threshold(self):
        """WND5 classification with large feature set 15% threshold (431 features)"""
        self.Check(431)

    # --------------------------------------------------------------------------
    def test_WND5_200_feat_threshold(self):
        """WND5 classification with large feature set & 200 feature threshold"""
        self.Check(200)