def test_NewShuffleSplitLeastSquares(self):
        """CONTINUOUS SHUFFLE SPLIT LEAST SQUARES"""

        fs_kwargs = {}
        fs_kwargs['name'] = "CONTINUOUS PerSampleStatistics_TESTFS"
        fs_kwargs['n_samples'] = 100
        fs_kwargs['num_features_per_signal_type'] = 5
        fs_kwargs['initial_noise_sigma'] = 5
        fs_kwargs['noise_gradient'] = 5
        fs_kwargs['n_samples_per_group'] = 1
        fs_kwargs['random_state'] = 43
        fs_kwargs['singularity'] = True
        fs_kwargs['clip'] = True

        fs = CreateArtificialFeatureSpace_Continuous(**fs_kwargs)

        ss_kwargs = {}
        ss_kwargs['n_iter'] = 5
        ss_kwargs[
            'name'] = "Continuous Shuffle Split Least Squares POSITIVE CONTROL"
        ss_kwargs['quiet'] = True
        ss_kwargs['random_state'] = 43
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(fs, **ss_kwargs)

        exp.GenerateStats()
        #exp.Print()

        # len( exp ) is supposed to be the number of batch results (split results)
        self.assertIs(len(exp), ss_kwargs['n_iter'])

        # Positive control - Artificial data with defaults should corellate almost perfectly
        self.assertAlmostEqual(exp.pearson_coeff, 1.0, delta=0.02)

        # Negative control - take the bottom quintile of the artificial features
        # which ARE functions of ground truth but should score low on linear correlation,
        # e.g., sin, x^2, etc.

        # With LSTSQ regression of noise features, pearson coeffs tend to be around -0.34 +/- .045
        max_allowable_pearson_coeff = 0.4

        temp_normalized_fs = fs.Normalize(inplace=False)
        ranked_nonzero_features = \
            PearsonFeatureWeights.NewFromFeatureSpace( temp_normalized_fs ).Threshold(_all='nonzero')

        quintile = int(len(ranked_nonzero_features) / 5)
        crappy_features = ranked_nonzero_features.Slice(
            quintile * 4, len(ranked_nonzero_features))
        #crappy_features.Print()
        crap_featureset = fs.FeatureReduce(crappy_features, inplace=False)

        ss_kwargs[
            'name'] = "Continuous Shuffle Split Least Squares NEGATIVE CONTROL"
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(
            crap_featureset, **ss_kwargs)
        exp.GenerateStats()
        exp.PerSampleStatistics()
        #exp.Print()
        self.assertAlmostEqual(exp.pearson_coeff,
                               0.0,
                               delta=max_allowable_pearson_coeff)
Example #2
0
    def test_NewShuffleSplitLinearMultivariateRegression(self):
        """CONTINUOUS SHUFFLE SPLIT LINEAR MULTIVARIATE METHOD"""

        fs_kwargs = {}
        fs_kwargs['name'] = "CONTINUOUS PerSampleStatistics_TESTFS"
        fs_kwargs['n_samples'] = 100
        fs_kwargs['num_features_per_signal_type'] = 5
        fs_kwargs['initial_noise_sigma'] = 5
        fs_kwargs['noise_gradient'] = 5
        fs_kwargs['n_samples_per_group'] = 1
        fs_kwargs['random_state'] = 43
        fs_kwargs['singularity'] = True
        fs_kwargs['clip'] = False

        fs = CreateArtificialFeatureSpace_Continuous(**fs_kwargs)

        ss_kwargs = {}
        ss_kwargs['n_iter'] = 5
        ss_kwargs[
            'name'] = "Continuous Shuffle Split Multivariate-Regression POSITIVE CONTROL"
        ss_kwargs['quiet'] = True
        ss_kwargs['random_state'] = 43
        ss_kwargs['classifier'] = 'linear'
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(fs, **ss_kwargs)

        exp.GenerateStats()
        #exp.Print()

        self.assertIs(len(exp), ss_kwargs['n_iter'])

        # Positive control - Artificial data with defaults should corellate almost perfectly
        self.assertAlmostEqual(exp.pearson_coeff, 1.0, delta=0.03)

        # Negative control - take the bottom quintile of the artificial features
        # which ARE functions of ground truth but should score low on linear correlation,
        # e.g., sin, x^2, etc.

        # Voting method with crap features tends to be around 0.14 +/- 0.04
        max_allowable_pearson_coeff = 0.2

        temp_normalized_fs = fs.Normalize(inplace=False)
        ranked_nonzero_features = \
            PearsonFeatureWeights.NewFromFeatureSpace( temp_normalized_fs ).Threshold(_all='nonzero')

        quintile = int(len(ranked_nonzero_features) / 5)
        crappy_features = ranked_nonzero_features[quintile *
                                                  4:len(ranked_nonzero_features
                                                        )]
        #crappy_features.Print()
        crap_featureset = fs.FeatureReduce(crappy_features)

        ss_kwargs[
            'name'] = "Continuous Shuffle Split Linear Multivariate-Regression NEGATIVE CONTROL",
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(
            crap_featureset, **ss_kwargs)
        exp.GenerateStats()
        #exp.Print()
        self.assertAlmostEqual(exp.pearson_coeff,
                               0.0,
                               delta=max_allowable_pearson_coeff)
Example #3
0
    def test_MultivariateLinearFitOnFitNoTiling(self):

        fake_continuous = CreateArtificialFeatureSpace_Continuous(
            n_samples=100,
            num_features_per_signal_type=5,
            noise_gradient=5,
            initial_noise_sigma=10,
            n_samples_per_group=1)

        fake_continuous.Normalize(quiet=True)
        reduced_fw = PearsonFeatureWeights.NewFromFeatureSpace(
            fake_continuous).Threshold()
        reduced_fs = fake_continuous.FeatureReduce(reduced_fw)
        batch_result = FeatureSpaceRegression.NewMultivariateLinear(
            test_set=reduced_fs, feature_weights=reduced_fw, quiet=True)
Example #4
0
    def test_LeastSquaresFitOnFitLeaveOneOutNoTiling(self):

        fake_continuous = CreateArtificialFeatureSpace_Continuous(
            n_samples=100,
            num_features_per_signal_type=5,
            noise_gradient=5,
            initial_noise_sigma=10,
            n_samples_per_group=1)

        normalized_fs = fake_continuous.Normalize(inplace=False, quiet=True)
        reduced_fw = PearsonFeatureWeights.NewFromFeatureSpace(
            normalized_fs).Threshold()
        reduced_fs = fake_continuous.FeatureReduce(reduced_fw)

        batch_result = FeatureSpaceRegression.NewLeastSquares(
            training_set=reduced_fs,
            test_set=None,
            feature_weights=reduced_fw,
            quiet=True)