def test_NewShuffleSplitLeastSquares(self):
        """CONTINUOUS SHUFFLE SPLIT LEAST SQUARES"""

        fs_kwargs = {}
        fs_kwargs['name'] = "CONTINUOUS PerSampleStatistics_TESTFS"
        fs_kwargs['n_samples'] = 100
        fs_kwargs['num_features_per_signal_type'] = 5
        fs_kwargs['initial_noise_sigma'] = 5
        fs_kwargs['noise_gradient'] = 5
        fs_kwargs['n_samples_per_group'] = 1
        fs_kwargs['random_state'] = 43
        fs_kwargs['singularity'] = True
        fs_kwargs['clip'] = True

        fs = CreateArtificialFeatureSpace_Continuous(**fs_kwargs)

        ss_kwargs = {}
        ss_kwargs['n_iter'] = 5
        ss_kwargs[
            'name'] = "Continuous Shuffle Split Least Squares POSITIVE CONTROL"
        ss_kwargs['quiet'] = True
        ss_kwargs['random_state'] = 43
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(fs, **ss_kwargs)

        exp.GenerateStats()
        #exp.Print()

        # len( exp ) is supposed to be the number of batch results (split results)
        self.assertIs(len(exp), ss_kwargs['n_iter'])

        # Positive control - Artificial data with defaults should corellate almost perfectly
        self.assertAlmostEqual(exp.pearson_coeff, 1.0, delta=0.02)

        # Negative control - take the bottom quintile of the artificial features
        # which ARE functions of ground truth but should score low on linear correlation,
        # e.g., sin, x^2, etc.

        # With LSTSQ regression of noise features, pearson coeffs tend to be around -0.34 +/- .045
        max_allowable_pearson_coeff = 0.4

        temp_normalized_fs = fs.Normalize(inplace=False)
        ranked_nonzero_features = \
            PearsonFeatureWeights.NewFromFeatureSpace( temp_normalized_fs ).Threshold(_all='nonzero')

        quintile = int(len(ranked_nonzero_features) / 5)
        crappy_features = ranked_nonzero_features.Slice(
            quintile * 4, len(ranked_nonzero_features))
        #crappy_features.Print()
        crap_featureset = fs.FeatureReduce(crappy_features, inplace=False)

        ss_kwargs[
            'name'] = "Continuous Shuffle Split Least Squares NEGATIVE CONTROL"
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(
            crap_featureset, **ss_kwargs)
        exp.GenerateStats()
        exp.PerSampleStatistics()
        #exp.Print()
        self.assertAlmostEqual(exp.pearson_coeff,
                               0.0,
                               delta=max_allowable_pearson_coeff)
Ejemplo n.º 2
0
    def test_ContinuousFitOnFit(self):
        from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete

        fs_discrete = CreateArtificialFeatureSpace_Discrete(
            n_samples=1000,
            n_classes=10,
            num_features_per_signal_type=30,
            noise_gradient=5,
            initial_noise_sigma=10,
            n_samples_per_group=1,
            interpolatable=True)

        tempdir = mkdtemp()
        path_to_fit = tempdir + sep + 'Artificial.fit'

        try:
            fs_discrete.ToFitFile(path_to_fit)
            fs_continuous = FeatureSpace.NewFromFitFile(path_to_fit,
                                                        discrete=False)

            fs_continuous.Normalize(quiet=True)
            fw_reduced = PearsonFeatureWeights.NewFromFeatureSpace(
                fs_continuous).Threshold()
            fs_reduced = fs_continuous.FeatureReduce(fw_reduced)
            batch_result = FeatureSpaceRegression.NewMultivariateLinear(
                fs_reduced, fw_reduced, quiet=True)

        finally:
            rmtree(tempdir)
Ejemplo n.º 3
0
    def test_ContinuousTrainTestSplitWithTiling(self):
        """Uses a synthetic preprocessed as follows: 500 total samples, 25 tiles per group
        240 total features"""

        from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Continuous

        fs = CreateArtificialFeatureSpace_Continuous(
            n_samples=500,
            num_features_per_signal_type=20,
            n_samples_per_group=25)

        from numpy.random import RandomState
        prng = RandomState(42)
        #fs.Print( verbose=True )
        #print "\n\n\n********************\n\n\n"
        train, test = fs.Split(random_state=prng, quiet=True)
        #full_train.Print( verbose=True )
        #full_test.Print( verbose=True )

        train.Normalize(inplace=True, quiet=True)
        fw = PearsonFeatureWeights.NewFromFeatureSpace(train).Threshold()
        train.FeatureReduce(fw, inplace=True)
        test.FeatureReduce(fw, inplace=True).Normalize(train,
                                                       inplace=True,
                                                       quiet=True)
Ejemplo n.º 4
0
    def test_NewShuffleSplitLinearMultivariateRegression(self):
        """CONTINUOUS SHUFFLE SPLIT LINEAR MULTIVARIATE METHOD"""

        fs_kwargs = {}
        fs_kwargs['name'] = "CONTINUOUS PerSampleStatistics_TESTFS"
        fs_kwargs['n_samples'] = 100
        fs_kwargs['num_features_per_signal_type'] = 5
        fs_kwargs['initial_noise_sigma'] = 5
        fs_kwargs['noise_gradient'] = 5
        fs_kwargs['n_samples_per_group'] = 1
        fs_kwargs['random_state'] = 43
        fs_kwargs['singularity'] = True
        fs_kwargs['clip'] = False

        fs = CreateArtificialFeatureSpace_Continuous(**fs_kwargs)

        ss_kwargs = {}
        ss_kwargs['n_iter'] = 5
        ss_kwargs[
            'name'] = "Continuous Shuffle Split Multivariate-Regression POSITIVE CONTROL"
        ss_kwargs['quiet'] = True
        ss_kwargs['random_state'] = 43
        ss_kwargs['classifier'] = 'linear'
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(fs, **ss_kwargs)

        exp.GenerateStats()
        #exp.Print()

        self.assertIs(len(exp), ss_kwargs['n_iter'])

        # Positive control - Artificial data with defaults should corellate almost perfectly
        self.assertAlmostEqual(exp.pearson_coeff, 1.0, delta=0.03)

        # Negative control - take the bottom quintile of the artificial features
        # which ARE functions of ground truth but should score low on linear correlation,
        # e.g., sin, x^2, etc.

        # Voting method with crap features tends to be around 0.14 +/- 0.04
        max_allowable_pearson_coeff = 0.2

        temp_normalized_fs = fs.Normalize(inplace=False)
        ranked_nonzero_features = \
            PearsonFeatureWeights.NewFromFeatureSpace( temp_normalized_fs ).Threshold(_all='nonzero')

        quintile = int(len(ranked_nonzero_features) / 5)
        crappy_features = ranked_nonzero_features[quintile *
                                                  4:len(ranked_nonzero_features
                                                        )]
        #crappy_features.Print()
        crap_featureset = fs.FeatureReduce(crappy_features)

        ss_kwargs[
            'name'] = "Continuous Shuffle Split Linear Multivariate-Regression NEGATIVE CONTROL",
        exp = FeatureSpaceRegressionExperiment.NewShuffleSplit(
            crap_featureset, **ss_kwargs)
        exp.GenerateStats()
        #exp.Print()
        self.assertAlmostEqual(exp.pearson_coeff,
                               0.0,
                               delta=max_allowable_pearson_coeff)
Ejemplo n.º 5
0
    def test_MultivariateLinearFitOnFitNoTiling(self):

        fake_continuous = CreateArtificialFeatureSpace_Continuous(
            n_samples=100,
            num_features_per_signal_type=5,
            noise_gradient=5,
            initial_noise_sigma=10,
            n_samples_per_group=1)

        fake_continuous.Normalize(quiet=True)
        reduced_fw = PearsonFeatureWeights.NewFromFeatureSpace(
            fake_continuous).Threshold()
        reduced_fs = fake_continuous.FeatureReduce(reduced_fw)
        batch_result = FeatureSpaceRegression.NewMultivariateLinear(
            test_set=reduced_fs, feature_weights=reduced_fw, quiet=True)
Ejemplo n.º 6
0
    def test_LeastSquaresFitOnFitLeaveOneOutNoTiling(self):

        fake_continuous = CreateArtificialFeatureSpace_Continuous(
            n_samples=100,
            num_features_per_signal_type=5,
            noise_gradient=5,
            initial_noise_sigma=10,
            n_samples_per_group=1)

        normalized_fs = fake_continuous.Normalize(inplace=False, quiet=True)
        reduced_fw = PearsonFeatureWeights.NewFromFeatureSpace(
            normalized_fs).Threshold()
        reduced_fs = fake_continuous.FeatureReduce(reduced_fw)

        batch_result = FeatureSpaceRegression.NewLeastSquares(
            training_set=reduced_fs,
            test_set=None,
            feature_weights=reduced_fw,
            quiet=True)