Example #1
0
    def test_NewFromFeatureSet(self):
        """Fisher score calculation"""

        fs = FeatureSpace.NewFromFitFile(
            self.test_fit_path).Normalize(inplace=True)
        fw = FisherFeatureWeights.NewFromFeatureSpace(fs)

        # test weights generated from test-l.fit:
        # wndchrm classify -l -f1.0 -vtest_fit-l.weights test-l.fit test-l.fit
        target_fw = FisherFeatureWeights.NewFromFile(
            self.test_feat_weight_path)

        for target_val, res_val in zip(target_fw.values, fw.values):
            self.assertAlmostEqual(target_val, res_val, delta=self.epsilon)

        # test slice operator

        orig_len = len(fw)

        sliced = fw[:10]
        self.assertEqual(len(sliced), 10)
        self.assertEqual(len(sliced.feature_names), 10)
        self.assertEqual(len(sliced.values), 10)

        for i in xrange(10):
            self.assertEqual(sliced.feature_names[i], fw.feature_names[i])
            self.assertEqual(sliced.values[i], fw.values[i])

        sliced = fw[50:100:2]

        for i, j in zip(range(len(sliced)), range(50, 100, 2)):
            self.assertEqual(sliced.feature_names[i], fw.feature_names[j])
            self.assertEqual(sliced.values[i], fw.values[j])
Example #2
0
    def test_NewFromFeatureSet(self):
        """Fisher score calculation"""

        feature_set = FeatureSpace.NewFromFitFile(self.test_fit_path)
        feature_set.Normalize(inplace=True)
        result_weights = FisherFeatureWeights.NewFromFeatureSpace(feature_set)

        # test weights generated from test-l.fit:
        # wndchrm classify -l -f1.0 -vtest_fit-l.weights test-l.fit test-l.fit
        target_weights = FisherFeatureWeights.NewFromFile(
            self.test_feat_weight_path)

        for target_val, res_val in zip(target_weights.values,
                                       result_weights.values):
            self.assertAlmostEqual(target_val, res_val, delta=self.epsilon)
Example #3
0
    def test_DiscreteTrainTestSplitWithTiling( self ):
        """Uses a curated subset of the IICBU 2008 Lymphoma dataset, preprocessed as follows:
        auto-deconvolved, eosin channel only, tiled 5x6, 3 classes, 10 imgs per class,
        300 samples per class.
        """

        # Inflate the zipped test fit into a temp file
        import zipfile
        zipped_file_path = pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_t5x6_v3.2features.fit.zip'
        zf = zipfile.ZipFile( zipped_file_path, mode='r' )
        tempdir = mkdtemp()
        zf.extractall( tempdir )

        try:
            fitfilepath = tempdir + sep + zf.namelist()[0]
            #fs = FeatureSet.NewFromFitFile( fitfilepath  )
            fs = FeatureSpace.NewFromFitFile( fitfilepath, tile_num_rows=5, tile_num_cols=6 )
            from numpy.random import RandomState
            prng = RandomState(42)
            train, test = fs.Split( random_state=prng, quiet=True )
            train.Normalize( inplace=True, quiet=True )
            fw = FisherFeatureWeights.NewFromFeatureSpace( train ).Threshold()
            train.FeatureReduce( fw, inplace=True )
            test.FeatureReduce( fw, inplace=True ).Normalize( train, inplace=True, quiet=True )

        finally:
            rmtree( tempdir )
    def test_FitOnFitClassification(self):

        fitfile_path = wndchrm_test_dir + sep + 'test-l.fit'
        #fs = FeatureSet.NewFromFitFile( fitfile_path )
        fs = FeatureSpace.NewFromFitFile(fitfile_path)
        fs.Normalize(inplace=True, quiet=True)
        fw = FisherFeatureWeights.NewFromFeatureSpace(fs).Threshold(438)
        fw.Print(50)
        fs.FeatureReduce(fw, inplace=True)
        pychrm_split = FeatureSpaceClassification.NewWND5(fs,
                                                          fs,
                                                          fw,
                                                          quiet=False)

        from wndcharm.FeatureSpacePredictionExperiment import FeatureSpaceClassificationExperiment
        html_path = pychrm_test_dir + sep + 'test-l_training_error_result.html'
        html_exp = FeatureSpaceClassificationExperiment.NewFromHTMLReport(
            html_path, quiet=False)
        # single split in this html
        html_split = html_exp.individual_results[0]
        for i, (html_result, pychrm_result) in enumerate( zip( html_split.individual_results,\
                pychrm_split.individual_results ) ):
            try:
                self.assertEqual(html_result, pychrm_result)
            except:
                outstr = "Error in comparison # {0}:\n".format(i)
                outstr += "HTML result:\n{0}\n Python API res:\n{1}".format(
                    html_result, pychrm_result)
                raise
Example #5
0
    def test_WND5_all_features(self):
        epsilon = 0.00001

        # Define paths to original files
        test_sig_path = join(test_dir, 't1_s01_c05_ij-l_precalculated.sig')
        test_fit_path = join(test_dir, 'test-l.fit')
        test_feat_wght_path = join(test_dir, 'test_fit-l.weights')
        test_tif_path = join(test_dir, 't1_s01_c05_ij.tif')

        # Here are the correct values that Python API needs to return:
        # wndchrm classify -l -f0.75 test-l.fit t1_s01_c05_ij.tif
        # t1_s01_c05_ij.tif    1.6e-27    0.083    0.917    *    4cell    3.835
        # wndchrm classify -l test-l.fit t1_s01_c05_ij.tif
        # t1_s01_c05_ij.tif    3.19e-27    0.076    0.924    *    4cell    3.848
        # wndchrm classify -l -f0.05 test-l.fit t1_s01_c05_ij.tif
        # t1_s01_c05_ij.tif    1.06e-26    0.066    0.934    *    4cell    3.869

        correct_marg_probs = {}
        correct_marg_probs[2189] = [0.083, 0.917]
        correct_marg_probs[438] = [0.076, 0.924]
        correct_marg_probs[146] = [0.066, 0.934]

        # Load the original files once and only once for all this class's tests
        feature_set = FeatureSpace.NewFromFitFile(test_fit_path)
        fs1 = feature_set.feature_names
        feature_set.Normalize()
        fs2 = feature_set.feature_names
        self.assertSequenceEqual(fs1, fs2)

        test_sample = FeatureVector(source_filepath=test_tif_path, long=True)
        test_sample.LoadSigFile(test_sig_path)
        self.assertSequenceEqual(feature_set.feature_names,
                                 test_sample.feature_names)
        test_sample.Normalize(feature_set)

        all_weights = FisherFeatureWeights.NewFromFile(test_feat_wght_path)

        def Check(num_feats):
            weights = all_weights.Threshold(num_feats)
            feat_set = feature_set.FeatureReduce(weights)
            sample = test_sample.FeatureReduce(weights)
            result = SingleSampleClassification.NewWND5(
                feat_set, weights, sample)
            result_marg_probs = [ round( val, 3 ) \
                    for val in result.marginal_probabilities ]
            for target_val, res_val in zip(correct_marg_probs[num_feats],
                                           result_marg_probs):
                self.assertAlmostEqual(target_val, res_val, delta=epsilon)

        for num_feats in correct_marg_probs:
            Check(num_feats)
Example #6
0
    def test_DiscreteTrainTestSplitNoTiling( self ):
        """Uses binucleate test set"""

        fitfilepath = wndchrm_test_dir + sep + 'test-l.fit'
        fs = FeatureSpace.NewFromFitFile( fitfilepath )

        from numpy.random import RandomState
        prng = RandomState(42)
        full_train, full_test = fs.Split( random_state=prng, quiet=True )
        full_train.Normalize( quiet=True )
        reduced_fw = FisherFeatureWeights.NewFromFeatureSpace( full_train ).Threshold()
        reduced_train = full_train.FeatureReduce( reduced_fw )

        reduced_test = full_test.FeatureReduce( reduced_fw )
        reduced_test.Normalize( reduced_train, quiet=True )

        batch_result = FeatureSpaceClassification.NewWND5( reduced_train,
            reduced_test, reduced_fw, quiet=True )
Example #7
0
    def test_IfNotInterpolatable( self ):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        small_fs = CreateArtificialFeatureSpace_Discrete( 
                        n_samples=20, n_classes=2, random_state=42, interpolatable=False )
        train_set, test_set = small_fs.Split( random_state=False, quiet=True )
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSpace( train_set ).Threshold()
        reduced_train_set = train_set.FeatureReduce( fw )
        reduced_test_set = test_set.FeatureReduce( fw )
        test_set.Normalize( train_set, quiet=True )

        batch_result = FeatureSpaceClassification.NewWND5(
                                    reduced_train_set, reduced_test_set, fw, quiet=True )
        with self.assertRaises( ValueError ):
            graph = PredictedValuesGraph( batch_result )
    def test_TiledTrainTestSplit(self):
        """Uses a fake FeatureSpace"""

        from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete
        fs_kwargs = {}
        fs_kwargs['name'] = "DiscreteArtificialFS 10-class"
        fs_kwargs['n_samples'] = 1000
        fs_kwargs['n_classes'] = 10  # 100 samples per class
        fs_kwargs['num_features_per_signal_type'] = 25
        fs_kwargs['initial_noise_sigma'] = 40
        fs_kwargs['noise_gradient'] = 20
        fs_kwargs['n_samples_per_group'] = 4  # 25 images, 2x2 tiling scheme
        fs_kwargs['interpolatable'] = True
        fs_kwargs['random_state'] = 43
        fs_kwargs['singularity'] = False
        fs_kwargs['clip'] = False

        fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs)

        train, test = fs.Split(random_state=False, quiet=True)
        train.Normalize(inplace=True, quiet=True)
        fw = FisherFeatureWeights.NewFromFeatureSpace(train).Threshold()

        train.FeatureReduce(fw, inplace=True)
        test.FeatureReduce(fw, inplace=True,
                           quiet=True).Normalize(train,
                                                 inplace=True,
                                                 quiet=True)

        result = FeatureSpaceClassification.NewWND5(train, test, fw)
        result.Print()

        for class_name in result.test_set.class_names:
            try:
                self.assertEqual(
                    result.similarity_matrix[class_name][class_name], float(1))
            except:
                print "offending class: {0}, val: {1}".format(
                    class_name,
                    result.similarity_matrix[class_name][class_name])
                raise
class TestGraphs(unittest.TestCase):
    """Test WND-CHARM's graph-making functionality."""

    fs_kwargs = {}
    fs_kwargs['name'] = "DiscreteArtificialFS 10-class"
    fs_kwargs['n_samples'] = 1000
    fs_kwargs['n_classes'] = 10
    fs_kwargs['num_features_per_signal_type'] = 25
    fs_kwargs['initial_noise_sigma'] = 40
    fs_kwargs['noise_gradient'] = 20
    fs_kwargs['n_samples_per_group'] = 1
    fs_kwargs['interpolatable'] = True
    fs_kwargs['random_state'] = 43
    fs_kwargs['singularity'] = False
    fs_kwargs['clip'] = False

    fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs)

    train_set, test_set = fs.Split(random_state=False, quiet=True)
    train_set.Normalize(quiet=True)
    fw = FisherFeatureWeights.NewFromFeatureSpace(train_set).Threshold()

    reduced_train_set = train_set.FeatureReduce(fw)
    reduced_test_set = test_set.FeatureReduce(fw)
    reduced_test_set.Normalize(reduced_train_set, quiet=True)

    batch_result = FeatureSpaceClassification.NewWND5(reduced_train_set,
                                                      reduced_test_set,
                                                      fw,
                                                      quiet=True)

    def setUp(self):
        self.tempdir = mkdtemp()

    def tearDown(self):
        rmtree(self.tempdir)

    def CompareGraphs(self, graph, testfilename):
        """Helper function to check output graphs"""

        # Uncoment to see what graph looks like!
        #graph.SaveToFile( testfilename + 'GRAPH.png' )

        # We used to output the graphs to a png file and do a binary diff on a reference png
        # but there are superficial differences between matplotlib versions that result in
        # the points still being in the right place, but the font is slightly larger,
        # or the text is subtlely offset. So now, we interrogate the matplotlib.figure
        # object and retrieve its coordinates and check them against blessed numpy arrays
        # saved to a npy file.

        axessubplot = graph.figure.gca()

        if len(axessubplot.lines) > 0:
            # line plot
            try:
                all_coords = np.dstack(
                    tuple([
                        group._path._vertices for group in axessubplot.lines
                    ]))
            except AttributeError:
                # older version of matplotlib didn't include leading underscore in attribute
                # "_vertices"
                all_coords = np.dstack(
                    tuple(
                        [group._path.vertices for group in axessubplot.lines]))
        elif len(axessubplot.collections) > 0:
            # scatter plot
            all_coords = np.dstack(
                tuple([group._offsets for group in axessubplot.collections]))
        else:
            self.fail("Graph doesn't have any lines nor points")

        # uncomment to replace old coords
        #np.save( testfilename, all_coords )
        #from os.path import splitext
        #testfilename_base, ext = splitext( testfilename )
        #np.save( testfilename_base + 'NEW.npy', all_coords )
        reference_array = np.load(testfilename)

        if not np.array_equal(all_coords, reference_array):
            if not np.allclose(all_coords, reference_array):
                errmsg = 'Reference graph "{0}" coordinates '.format(testfilename) + \
                    'do not concur with coordinates generated by this test.'
                self.fail(errmsg)

    @unittest.skipIf(HasMatplotlib, "Skipped if matplotlib IS installed")
    def test_ErrMsgIfMatplotibNotInstalled(self):
        """Fail gracefully with informative message if matplotlib"""

        graph = PredictedValuesGraph(self.batch_result)
        with self.assertRaises(ImportError):
            graph.RankOrderedPredictedValuesGraph()
        with self.assertRaises(ImportError):
            graph.KernelSmoothedDensityGraph()

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    @unittest.expectedFailure
    def test_RankOrderedFromBatchClassificationResult(self):
        """Rank Ordered Predicted values graph from a single split"""

        testfilename = 'test_graph_rank_ordered_interpolated_discrete.npy'
        graph = PredictedValuesGraph(self.batch_result)
        graph.RankOrderedPredictedValuesGraph()
        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    @unittest.expectedFailure
    def test_KernelSmoothedFromBatchClassificationResult(self):
        """Kernel Smoothed Probability density graph from a single split"""

        testfilename = 'test_graph_kernel_smoothed.npy'
        graph = PredictedValuesGraph(self.batch_result)
        graph.KernelSmoothedDensityGraph()
        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_FromDiscreteClassificationExperimentResults(self):
        """Rank Ordered Predicted values graph from an experiment result (multiple splits)"""

        testfilename = 'test_graph_rank_ordered_experiment.npy'

        # Make a smaller featureset to do multiple splits
        fs_kwargs = {}
        fs_kwargs['name'] = "DiscreteArtificialFS RANK ORDERED SHUFFLE SPLIT"
        fs_kwargs['n_samples'] = 100  # smaller
        fs_kwargs['n_classes'] = 5  # smaller, 20 samples per class
        fs_kwargs['num_features_per_signal_type'] = 10  # smaller
        fs_kwargs['initial_noise_sigma'] = 50
        fs_kwargs['noise_gradient'] = 20
        fs_kwargs['n_samples_per_group'] = 1
        fs_kwargs['interpolatable'] = True
        fs_kwargs['random_state'] = 42
        fs_kwargs['singularity'] = False
        fs_kwargs['clip'] = False

        small_fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs)

        ss_kwargs = {}
        ss_kwargs['quiet'] = True
        ss_kwargs['n_iter'] = n_iter = 10
        ss_kwargs['train_size'] = train_size = 18  # per-class
        ss_kwargs['test_size'] = test_size = 2  # per-class
        ss_kwargs['random_state'] = 42
        exp = FeatureSpaceClassificationExperiment.NewShuffleSplit(
            small_fs, **ss_kwargs)
        graph = PredictedValuesGraph(exp, use_averaged_results=False)
        graph.RankOrderedPredictedValuesGraph()
        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_HyperparameterOptimizationGraph(self):
        """Accuracy vs. # features or samples with and without LDA feature space transform"""

        testfilename = 'test_graph_rank_ordered_experiment.npy'

        # Make a smaller featureset to do multiple splits
        fs_kwargs = {}
        fs_kwargs['name'] = "DiscreteArtificialFS RANK ORDERED SHUFFLE SPLIT"
        fs_kwargs['n_samples'] = 100  # smaller
        fs_kwargs['n_classes'] = 5  # smaller, 20 samples per class
        fs_kwargs['num_features_per_signal_type'] = 10  # smaller
        fs_kwargs['initial_noise_sigma'] = 50
        fs_kwargs['noise_gradient'] = 20
        fs_kwargs['n_samples_per_group'] = 1
        fs_kwargs['interpolatable'] = True
        fs_kwargs['random_state'] = 42
        fs_kwargs['singularity'] = False
        fs_kwargs['clip'] = False

        small_fs = CreateArtificialFeatureSpace_Discrete(**fs_kwargs)

        ss_kwargs = {}
        ss_kwargs['quiet'] = False
        ss_kwargs['n_iter'] = n_iter = 10
        ss_kwargs['train_size'] = train_size = 18  # per-class
        ss_kwargs['test_size'] = test_size = 2  # per-class
        ss_kwargs['random_state'] = 42
        ss_kwargs['show_raw'] = True
        ss_kwargs['show_lda'] = True
        ss_kwargs['param'] = 'features'
        ss_kwargs['text_angle'] = -30

        graph = HyperparameterOptimizationGraph(small_fs)
        graph.GridSearch(**ss_kwargs)
        #graph.savefig( '/Users/colettace/test_features.png' )

        ss_kwargs['param'] = 'samples'
        ss_kwargs['quiet'] = False
        ss_kwargs['text_angle'] = -30
        graph = HyperparameterOptimizationGraph(small_fs)
        graph.GridSearch(**ss_kwargs)
        #graph.savefig( '/Users/colettace/test_samples.png' )

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOT installed")
    def test_FromHTML(self):
        """Rank Ordered Predicted values graph from an experiment result (multiple splits)"""

        testfilename = 'test_graph_fromHTML.npy'
        # Inflate the zipped html file into a temp file
        import zipfile

        #zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html'
        #import zlib
        #zf = zipfile.ZipFile( zipped_file_path + '.zip', mode='w' )
        #zf.write( zipped_file_path, compress_type=zipfile.ZIP_DEFLATED )
        #zf.close()

        zipped_file_path = pychrm_test_dir + sep + 'c_elegans_terminal_bulb.html.zip'
        zf = zipfile.ZipFile(zipped_file_path, mode='r')
        zf.extractall(self.tempdir)
        htmlfilepath = self.tempdir + sep + zf.namelist()[0]
        graph = PredictedValuesGraph.NewFromHTMLReport(
            htmlfilepath, use_averaged_results=False)
        graph.RankOrderedPredictedValuesGraph()

        self.CompareGraphs(graph, testfilename)

    @unittest.skipUnless(HasMatplotlib,
                         "Skipped if matplotlib IS NOTinstalled")
    def test_IfNotInterpolatable(self):
        """You can't graph predicted values if the classes aren't interpolatable."""

        testfilename = 'ShouldntBeGraphable.png'
        small_fs = CreateArtificialFeatureSpace_Discrete(n_samples=20,
                                                         n_classes=2,
                                                         random_state=42,
                                                         interpolatable=False)
        train_set, test_set = small_fs.Split(random_state=False, quiet=True)
        train_set.Normalize()

        fw = FisherFeatureWeights.NewFromFeatureSpace(train_set).Threshold()
        reduced_train_set = train_set.FeatureReduce(fw)
        reduced_test_set = test_set.FeatureReduce(fw)
        test_set.Normalize(train_set, quiet=True)

        batch_result = FeatureSpaceClassification.NewWND5(reduced_train_set,
                                                          reduced_test_set,
                                                          fw,
                                                          quiet=True)
        with self.assertRaises(ValueError):
            graph = PredictedValuesGraph(batch_result)
    def test_FitOnFit(self):
        """Uses a curated subset of the IICBU 2008 Lymphoma dataset, preprocessed as follows:
        auto-deconvolved, eosin channel only, tiled 5x6, 3 classes, 10 imgs per class,
        300 samples per class.
        """

        # Inflate the zipped test fit into a temp file
        import zipfile
        zipped_file_path = pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_EOSIN_ONLY_t5x6_v3.2features.fit.zip'
        zf = zipfile.ZipFile(zipped_file_path, mode='r')
        tempdir = mkdtemp()
        zf.extractall(tempdir)

        try:
            fitfilepath = tempdir + sep + zf.namelist()[0]

            # Do fit on fit WITHOUT tiling and compare with fit on fit results
            # generated with wndchrm 1.60
            fs = FeatureSpace.NewFromFitFile(fitfilepath).Normalize(
                inplace=True, quiet=True)
            #fs = FeatureSpace.NewFromFitFile( wndchrm_test_dir + sep + 'test-l.fit' )
            #fs.ToFitFile( 'temp.fit' )
            fw = FisherFeatureWeights.NewFromFeatureSpace(fs).Threshold()
            fs.FeatureReduce(fw, inplace=True)
            #            #fw.Print()
            #            #fs.Print(verbose=True)
            pychrm_res = FeatureSpaceClassification.NewWND5(fs, fs, fw)
            pychrm_res.Print()
            #
            #            import cProfile as pr
            #            #import profile as pr
            #            import tempfile
            #            import pstats
            #            prof = tempfile.NamedTemporaryFile()
            #            cmd = 'no_tile_pychrm_result = DiscreteBatchClassificationResult.New( reduced_fs, reduced_fs, fw )'
            #            pr.runctx( cmd, globals(), locals(), prof.name)
            #            p = pstats.Stats(prof.name)
            #            p.sort_stats('time').print_stats(20)
            #            prof.close()

            self.maxDiff = None

            html_path = pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_eosin_t5x6_v3.2feats_REFERENCE_RESULTS_900_samples_TRAINING_ERROR.html'
            wres = FeatureSpaceClassificationExperiment.NewFromHTMLReport(
                html_path)
            wres.Print()
            wc_batch_result = wres.individual_results[
                0]  # only 1 split in fit-on-fit

            # This takes WAY too long:
            #self.assertSequenceEqual( wc_batch_result.individual_results, pychrm_res.individual_results )
            wc_result = np.empty((3 * len(wc_batch_result.individual_results)))
            for i, single_result in enumerate(
                    wc_batch_result.individual_results):
                wc_result[i * 3:(i + 1) *
                          3] = single_result.marginal_probabilities

            pc_result = np.empty((3 * len(pychrm_res.individual_results)))
            for i, single_result in enumerate(pychrm_res.individual_results):
                # HTML report only has 3 decimal places
                pc_result[ i*3 : (i+1)*3 ] = \
                    [ float( "{0:0.3f}".format( val ) ) for val in single_result.marginal_probabilities ]

            from numpy.testing import assert_allclose
            assert_allclose(actual=pc_result, desired=wc_result, atol=0.003)

            #wc_batch_result.Print()
            #pres.Print()

            # ==========================================================
            # Now do the same with tiling, reusing fs from before:

            num_samples_per_group = 30
            n_groups = fs.num_samples / num_samples_per_group
            new_sg_ids = [
                i for i in xrange(n_groups)
                for j in xrange(num_samples_per_group)
            ]
            fs.Update( tile_num_rows=5, tile_num_cols=6, num_samples_per_group=30,\
                    _contiguous_sample_group_ids=new_sg_ids )._RebuildViews()
            with_tile_pychrm_result = FeatureSpaceClassification.NewWND5(
                fs, fs, fw)
            html_path = pychrm_test_dir + sep + 'lymphoma_iicbu2008_subset_eosin_t5x6_v3.2feats_REFERENCE_RESULTS_30_samples_tiled_TRAINING_ERROR.html'
            with_tile_wndchrm_result = \
              FeatureSpaceClassificationExperiment.NewFromHTMLReport( html_path ).individual_results[0]

            #self.assertSequenceEqual( with_tile_pychrm_result.averaged_results, with_tile_wndchrm_result.individual_results )
            wc_result = np.empty(
                (3 * len(with_tile_wndchrm_result.individual_results)))
            for i, single_result in enumerate(
                    with_tile_wndchrm_result.individual_results):
                wc_result[i * 3:(i + 1) *
                          3] = single_result.marginal_probabilities

            pc_result = np.empty(
                (3 * len(with_tile_pychrm_result.averaged_results)))
            for i, single_result in enumerate(
                    with_tile_pychrm_result.averaged_results):
                # HTML report only has 3 decimal places
                pc_result[ i*3 : (i+1)*3 ] = \
                    [ float( "{0:0.3f}".format( val ) ) for val in single_result.marginal_probabilities ]

            assert_allclose(actual=pc_result, desired=wc_result, atol=0.003)

        finally:
            rmtree(tempdir)
Example #11
0
        train_set.ToFitFile()

if feature_usage_fraction:
    if feature_usage_fraction < 0 or feature_usage_fraction > 1.0:
        raise Exception('Feature usage fraction must be on interval [0,1]')
    num_features = int( feature_usage_fraction * train_set.num_features )

if num_features:
    print "Using top {0} Fisher-ranked features.".format( num_features )
else:
    print "Using top 15% Fisher-ranked features."

experiment = FeatureSpaceClassificationExperiment( training_set=train_set )

train_set.Normalize( inplace=True )
weights = FisherFeatureWeights.NewFromFeatureSpace( train_set ).Threshold( num_features )
train_set.FeatureReduce( weights, inplace=True )

if train_set != test_set:
    test_set.FeatureReduce( weights, inplace=True ).Normalize( train_set )

for i in range( num_splits ):
    split = FeatureSpaceClassification.NewWND5( train_set, test_set, weights, batch_number=i )
    experiment.individual_results.append( split )

if outpath:
    experiment.Print( output_filepath=outpath, mode='w' )
    #experiment.PerSampleStatistics( output_filepath=outpath, mode= 'a' )
else:
    experiment.Print()
    #experiment.PerSampleStatistics()
Example #12
0
    sys.exit(0)

if from_scratch:
    # I preprocessed your training set and feature weights and pickled them for speed.
    # Pickle files are binary files that are super fast to load.
    # You don't need to use a pickle file though, you can make one from scratch
    # Here's how:

    # 1. Load the raw c-charm fit file
    full_training_set = FeatureSet_Discrete.NewFromFitFile(input_filename)

    # 3. Normalize the features:
    full_training_set.Normalize()

    # 4. Make Fisher scores based on the normalized training set
    full_fisher_weights = FisherFeatureWeights.NewFromFeatureSet(
        full_training_set)

    # 5. Take only the top 200 features
    reduced_fisher_weights = full_fisher_weights.Threshold(num_features)

    # 6. Reduce the training set feature space to contain only those top 200 features
    reduced_training_set = full_training_set.FeatureReduce(
        reduced_fisher_weights.names)

    # 7. Save your work:
    reduced_training_set.PickleMe(
        os.path.splitext(input_filename)[0] + ".fit.pickled")
    reduced_fisher_weights.PickleMe(
        os.path.splitext(input_filename)[0] + "_w" + str(num_features) +
        ".weights.pickled")
else: