def test_SplitOptions( self ): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True, random_state=42) # default train_set, test_set = fs_discrete.Split( random_state=42, quiet=True ) self.assertEqual( train_set.shape, (750, 600) ) self.assertEqual( test_set.shape, (250, 600) ) # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace # when setting test_size = 0 i = 50 retval = fs_discrete.Split( train_size=i, test_size=0, random_state=42, quiet=True ) self.assertEqual( type(retval), FeatureSpace ) self.assertEqual( retval.num_samples, i * fs_discrete.num_classes ) # dummyproofing self.assertRaises( ValueError, fs_discrete.Split, train_size='trash' ) self.assertRaises( ValueError, fs_discrete.Split, train_size=1.1 ) self.assertRaises( ValueError, fs_discrete.Split, test_size='trash' ) self.assertRaises( ValueError, fs_discrete.Split, test_size=1.1 ) # What if the feature set number of groups within a class are less than called for # when specifying by integer? self.assertRaises( ValueError, test_set.Split, test_size=25 ) # What happens when input fs has unbalanced classes, some of which have enough # to satisfy train_size/test_size params, and some don't remove_these = range(250,300) + range(700,750) fs_class_2_and_7_smaller = \ fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these ) self.assertRaises( ValueError, fs_class_2_and_7_smaller.Split, train_size=80, test_size=20 ) # Test balanced_classes: train_fs, test_fs = fs_class_2_and_7_smaller.Split() # Training set number rounds down (apparently). from math import floor expected_num_samps_per_train_class = int( floor(50*0.75) ) expected_num_samps_per_test_class = 50 - expected_num_samps_per_train_class err_msg = "Balanced classes {} set split error, class {}, expected {}, got {}" for i, (n_train, n_test) in enumerate( zip( train_fs.class_sizes, test_fs.class_sizes )): self.assertEqual( n_train, expected_num_samps_per_train_class, msg=\ err_msg.format( "TRAIN", i, expected_num_samps_per_train_class, n_train ) ) self.assertEqual( n_test, expected_num_samps_per_test_class, msg=\ err_msg.format( "TEST", i, expected_num_samps_per_test_class, n_test ) )
def test_SplitOptions(self): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=10, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True, random_state=42) # default train_set, test_set = fs_discrete.Split(random_state=42, quiet=True) self.assertEqual(train_set.shape, (750, 600)) self.assertEqual(test_set.shape, (250, 600)) # Supposed to only return single FeatureSpace instead of 2-tuple of FeatureSpace # when setting test_size = 0 i = 50 retval = fs_discrete.Split(train_size=i, test_size=0, random_state=42, quiet=True) self.assertEqual(type(retval), FeatureSpace) self.assertEqual(retval.num_samples, i * fs_discrete.num_classes) # dummyproofing self.assertRaises(ValueError, fs_discrete.Split, train_size='trash') self.assertRaises(ValueError, fs_discrete.Split, train_size=1.1) self.assertRaises(ValueError, fs_discrete.Split, test_size='trash') self.assertRaises(ValueError, fs_discrete.Split, test_size=1.1) # What if the feature set number of groups within a class are less than called for # when specifying by integer? self.assertRaises(ValueError, test_set.Split, test_size=25) # What happens when input fs has unbalanced classes, some of which have enough # to satisfy train_size/test_size params, and some don't remove_these = range(250, 300) + range(700, 750) fs_class_2_and_7_smaller = \ fs_discrete.SampleReduce( leave_out_sample_group_ids=remove_these ) self.assertRaises(ValueError, fs_class_2_and_7_smaller.Split, train_size=80, test_size=20)
def test_SampleReduce( self ): from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Discrete n_classes = 10 #======================================================== # Section 1: LEAVE IN, Untiled Discrete (w/ classes) FeatureSpace instances fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=n_classes, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1, interpolatable=True) # Reduce to 9 classes from 10, one sample per class # Drop the last class: desired = range(50, 950, 100) A = fs_discrete.SampleReduce( desired ) # Further reduce to 8 classes A.RemoveClass( "FakeClass-055.6", inplace=True ) correct_samplenames = ['FakeClass-100.0_050', 'FakeClass-077.8_050', 'FakeClass-033.3_050', 'FakeClass-011.1_050', 'FakeClass+011.1_050', 'FakeClass+033.3_050', 'FakeClass+055.6_050', 'FakeClass+077.8_050'] #The actual alphanumeric sort order is different from the value sort order #correct_samplenames = ['FakeClass+011.1_050', 'FakeClass+033.3_050', 'FakeClass+055.6_050', 'FakeClass+077.8_050', 'FakeClass-011.1_050', 'FakeClass-033.3_050', 'FakeClass-077.8_050', 'FakeClass-100.0_050'] self.assertEqual( correct_samplenames, A._contiguous_sample_names ) correct_classnames = ['FakeClass-100.0', 'FakeClass-077.8', 'FakeClass-033.3', 'FakeClass-011.1', 'FakeClass+011.1', 'FakeClass+033.3', 'FakeClass+055.6', 'FakeClass+077.8' ] #correct_classnames = ['FakeClass+011.1', 'FakeClass+033.3', 'FakeClass+055.6', 'FakeClass+077.8', 'FakeClass-011.1', 'FakeClass-033.3', 'FakeClass-077.8', 'FakeClass-100.0'] self.assertEqual( correct_classnames, A.class_names ) del A #======================================================== # Section 2: LEAVE OUT, UNTiled Feature sets, Discrete FeatureSpace instances UNdesired = range(50, 950, 100) C = fs_discrete.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( C.num_samples, fs_discrete.num_samples - len( UNdesired ) ) # Single integers for leave_out_list is ok UNdesired = 50 C = fs_discrete.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( C.num_samples, fs_discrete.num_samples - 1 ) del C #======================================================== # Section 3: LEAVE IN, Tiled Feature sets, Discrete FeatureSpace instances num_tiles = 4 fs_discrete = CreateArtificialFeatureSpace_Discrete( n_samples=1000, n_classes=n_classes, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=num_tiles, interpolatable=True) desired = range(5, 95, 10) # Rearrange into 9 classes D = fs_discrete.SampleReduce( desired ) # Total num samples should be 9 classes, 1 sample group per class, 4 tiles per SG = 36 self.assertEqual( num_tiles * len( desired ), D.num_samples ) del D #======================================================== # Section 4: LEAVE OUT, WITH Tiled Feature sets, Discrete FeatureSpace instances # You can't leave out a sample group that doesn't exist UNdesired = range(50000, 50010) self.assertRaises( ValueError, fs_discrete.SampleReduce, leave_out_sample_group_ids=UNdesired ) # Can't leave out trash UNdesired = ['foo', 'bar'] self.assertRaises( TypeError, fs_discrete.SampleReduce, leave_out_sample_group_ids=UNdesired ) # This input is ok: UNdesired = range(5, 95, 10) E = fs_discrete.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( E.num_samples, fs_discrete.num_samples - len( UNdesired ) * num_tiles ) del E #======================================================== # Section 5: LEAVE IN, Untiled Continuous FeatureSpace instances from wndcharm.ArtificialFeatureSpace import CreateArtificialFeatureSpace_Continuous fs_cont = CreateArtificialFeatureSpace_Continuous( n_samples=1000, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=1) # dummyproof desired = ['foo', 'bar'] self.assertRaises( TypeError, fs_cont.SampleReduce, desired ) desired = range(50, 950) F = fs_cont.SampleReduce( desired ) self.assertEqual( F.num_samples, len(desired) ) del F #======================================================== # Section 6: LEAVE OUT, Untiled Continuous FeatureSpace instances UNdesired = range(50, 950) G = fs_cont.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( G.num_samples, fs_cont.num_samples - len(UNdesired) ) del G # single int is ok H = fs_cont.SampleReduce( leave_out_sample_group_ids=998 ) self.assertEqual( H.num_samples, fs_cont.num_samples - 1 ) del H #======================================================== # Section 7: LEAVE IN, TILED Continuous FeatureSpace instances fs_cont = CreateArtificialFeatureSpace_Continuous( n_samples=1000, num_features_per_signal_type=30, noise_gradient=5, initial_noise_sigma=10, n_samples_per_group=num_tiles) desired = range(50, 95) I = fs_cont.SampleReduce( desired ) self.assertEqual( I.num_samples, len(desired) * num_tiles ) del I # single int is ok, ALTHOUGH NOT SURE WHY YOU'D EVER WANT A FS WITH A SINGLE SAMPLE J = fs_cont.SampleReduce( 98 ) self.assertEqual( J.num_samples, num_tiles ) del J #======================================================== # Section 8: LEAVE OUT, TILED Continuous FeatureSpace instances UNdesired = range(50, 95) K = fs_cont.SampleReduce( leave_out_sample_group_ids=UNdesired ) self.assertEqual( K.num_samples, fs_cont.num_samples - len(UNdesired) * num_tiles ) del K # single int is ok L = fs_cont.SampleReduce( leave_out_sample_group_ids=98 ) self.assertEqual( L.num_samples, fs_cont.num_samples - num_tiles ) del L