def kmeans_006_submission(): # 2014-03-28 10:53:22 - Base - INFO - Cross validation completed in 1487.18687487. Scores: # 2014-03-28 10:53:22 - Base - INFO - [-0.11018943 -0.10946863] # Final submission crop = 150 s = 15 n_centroids = 3000 images = get_images(crop=crop, s=s) kmeans_generator = train_kmeans_generator(images, n_centroids=n_centroids) train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) # wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform(test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_006.csv')
def extra_trees_submission(): # Somehow the submission on the leaderboard scores 0.22 crop = 150 s = 15 n_centroids = 3000 images = get_images(crop=crop, s=s) kmeans_generator = train_kmeans_generator(images, n_centroids=n_centroids) train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeExtraTreesEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) # wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform(test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_008.csv')
def rbm_001(): s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() images = images.reshape((images.shape[0], 15 * 15 * 3)) # rbm needs inputs to be between 0 and 1 scaler = MinMaxScaler() images = scaler.fit_transform(images) # Training takes a long time, says 80 seconds per iteration, but seems like longer # And this is only with 256 components rbm = BernoulliRBM(verbose=1) rbm.fit(images) train_x = rbm.transform(images) train_y = classes.train_solutions.data # 0.138 CV on 50% of the dataset wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
def kmeans_007(): """ Increasing crop/scale size, rf size, centroids, and patches all at once. 2014-02-18 02:45:15 - Base - INFO - Cross validation completed in 5426.04788399. Scores: 2014-02-18 02:45:15 - Base - INFO - [-0.10834319 -0.10825868] """ n_centroids = 5000 s = 50 crop = 200 # Originally, 1600 centroids for 400,000 patches, or 250 patches per centroid # 800000 / 5000 = will give us 160 patches per centroid n_patches = 800000 rf_size = 20 # 31 x 31 = 961 patches per image, which is 10x more patches than the original settings # If we set stride 2, then it's 16 x 16 patches = 256, only twice as many patches stride = 2 train_x_crop_scale = CropScaleImageTransformer(training=True, crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) images = train_x_crop_scale.transform() patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) patches = patch_extractor.transform(images) kmeans_generator = KMeansFeatureGenerator( n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_007'.format(n_centroids), n_iterations=20, n_jobs=-1, ) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_007.npy', stride_size=stride, memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 250 }, n_jobs=-1) wrapper.cross_validation(train_x, train_y, parallel_estimator=True) """
def get_images(crop=150, s=15): train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) images = train_x_crop_scale.transform() return images
def get_images(crop=150, s=15): train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) images = train_x_crop_scale.transform() return images
def kmeans_006_submission(): # Final submission n_centroids = 3000 s = 15 crop = 150 n_patches = 400000 rf_size = 5 logger.info("Training with n_centroids {}".format(n_centroids)) train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator(n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_006_centroids_{}'.format(n_centroids), n_iterations=20, n_jobs=-1,) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform(test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_006.csv')
def rf_size_10(): # Pretty bad as well # 2014-03-28 13:04:07 - Base - INFO - Cross validation completed in 1475.74401999. Scores: # 2014-03-28 13:04:07 - Base - INFO - [-0.12217214 -0.12209735] n_centroids = 3000 s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator(n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_008_rf10'.format(n_centroids), n_iterations=20, n_jobs=-1,) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_008_rf10.npy'.format(n_centroids), memmap=True) train_y = classes.train_solutions.data del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
def get_images(crop=150, s=15): """ Iterates over each image file, cropping it to 150 pixels, then scaling it to 15 pixels. Returns an ndarray (possibly memmapped) of size (n_images, 15, 15, 3) """ train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) images = train_x_crop_scale.transform() return images
def get_images(crop=150, s=15): """ Iterates over each image file, cropping it to 150 pixels, then scaling it to 15 pixels. Returns an ndarray (possibly memmapped) of size (n_images, 15, 15, 3) """ train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) images = train_x_crop_scale.transform() return images
def kmeans_007(): """ Increasing crop/scale size, rf size, centroids, and patches all at once. 2014-02-18 02:45:15 - Base - INFO - Cross validation completed in 5426.04788399. Scores: 2014-02-18 02:45:15 - Base - INFO - [-0.10834319 -0.10825868] """ n_centroids = 5000 s = 50 crop = 200 # Originally, 1600 centroids for 400,000 patches, or 250 patches per centroid # 800000 / 5000 = will give us 160 patches per centroid n_patches = 800000 rf_size = 20 # 31 x 31 = 961 patches per image, which is 10x more patches than the original settings # If we set stride 2, then it's 16 x 16 patches = 256, only twice as many patches stride = 2 train_x_crop_scale = CropScaleImageTransformer(training=True, crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) images = train_x_crop_scale.transform() patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) patches = patch_extractor.transform(images) kmeans_generator = KMeansFeatureGenerator(n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_007'.format(n_centroids), n_iterations=20, n_jobs=-1,) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_007.npy', stride_size=stride, memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 250}, n_jobs=-1) wrapper.cross_validation(train_x, train_y, parallel_estimator=True) """
def kmeans_006_submission(): # 2014-03-28 10:53:22 - Base - INFO - Cross validation completed in 1487.18687487. Scores: # 2014-03-28 10:53:22 - Base - INFO - [-0.11018943 -0.10946863] # Final submission crop = 150 s = 15 n_centroids = 3000 images = get_images(crop=crop, s=s) kmeans_generator = train_kmeans_generator(images, n_centroids=n_centroids) train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'.format( n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) # wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform( test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'. format(n_centroids), memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_006.csv')
def extra_trees_submission(): # Somehow the submission on the leaderboard scores 0.22 crop = 150 s = 15 n_centroids = 3000 images = get_images(crop=crop, s=s) kmeans_generator = train_kmeans_generator(images, n_centroids=n_centroids) train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'.format( n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeExtraTreesEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) # wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform( test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'. format(n_centroids), memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_008.csv')
def rbm_001(): s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() images = images.reshape((images.shape[0], 15 * 15 * 3)) # rbm needs inputs to be between 0 and 1 scaler = MinMaxScaler() images = scaler.fit_transform(images) # Training takes a long time, says 80 seconds per iteration, but seems like longer # And this is only with 256 components rbm = BernoulliRBM(verbose=1) rbm.fit(images) train_x = rbm.transform(images) train_y = classes.train_solutions.data # 0.138 CV on 50% of the dataset wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
# like logging, grid search and cross validation. # For fit, it is basically equivalent to calling fit on the estimator # The estimator takes the X and y and trains a ridge regression (sklearn.linear_model.Ridge), # predicts using the ridge regressor, then uses the results of the prediction to train a random forest. wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) # Crop and scale the test images test_images = test_x_crop_scale.transform() # Generate the test features test_x = kmeans_generator.transform( test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'.format( n_centroids), memmap=True) # Predict on the test features
def kmeans_006(): """ Testing number of centroids [(1000, array([-0.10926318, -0.10853047])), (2000, array([-0.10727502, -0.10710292])), (2500, array([-0.107019 , -0.10696262])), (3000, array([-0.10713973, -0.1066932 ]))] """ n_centroids_vals = [1000, 2000, 2500, 3000] scores = [] for n_centroids in n_centroids_vals: s = 15 crop = 150 n_patches = 400000 rf_size = 5 logger.info("Training with n_centroids {}".format(n_centroids)) train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator( n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_006_centroids_{}'.format(n_centroids), n_iterations=20, n_jobs=-1, ) patch_extractor = models.KMeansFeatures.PatchSampler( n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'. format(n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 250 }, n_jobs=-1) wrapper.cross_validation(train_x, train_y, n_folds=2, parallel_estimator=True) score = (n_centroids, wrapper.cv_scores) logger.info("Scores: {}".format(score)) scores.append(score) del wrapper gc.collect()
classes.logstream.setLevel(classes.logging.DEBUG) a = models.RandomForest.RandomForestCascadeModel(cv_sample=0.1) a.run('cv') a.run('train') b = models.RandomForest.RandomForestModel(cv_sample=0.1) b.estimator.set_params(n_estimators=10) b.run('train') import models from models.Base import CropScaleImageTransformer from models.KMeansFeatures import KMeansFeatureGenerator train_x_crop_scale = CropScaleImageTransformer(training=True, # result_path='data/data_train_crop_150_scale_15.npy', crop_size=150, scaled_size=15, n_jobs=-1, memmap=True) raw_images = train_x_crop_scale.transform() patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=1000, patch_size=5, n_jobs=-1) reds = raw_images[0:100, :, :, 0] a = patch_extractor.transform(reds) kmeans_generator = KMeansFeatureGenerator(n_centroids=10, rf_size=5, result_path='foo.npy',
def kmeans_006(): """ Testing number of centroids [(1000, array([-0.10926318, -0.10853047])), (2000, array([-0.10727502, -0.10710292])), (2500, array([-0.107019 , -0.10696262])), (3000, array([-0.10713973, -0.1066932 ]))] """ n_centroids_vals = [1000, 2000, 2500, 3000] scores = [] for n_centroids in n_centroids_vals: s = 15 crop = 150 n_patches = 400000 rf_size = 5 logger.info("Training with n_centroids {}".format(n_centroids)) train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator(n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_006_centroids_{}'.format(n_centroids), n_iterations=20, n_jobs=-1,) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 250}, n_jobs=-1) wrapper.cross_validation(train_x, train_y, n_folds=2, parallel_estimator=True) score = (n_centroids, wrapper.cv_scores) logger.info("Scores: {}".format(score)) scores.append(score) del wrapper gc.collect()
def rf_size_10(): # Pretty bad as well # 2014-03-28 13:04:07 - Base - INFO - Cross validation completed in 1475.74401999. Scores: # 2014-03-28 13:04:07 - Base - INFO - [-0.12217214 -0.12209735] n_centroids = 3000 s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator( n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_008_rf10'.format(n_centroids), n_iterations=20, n_jobs=-1, ) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_008_rf10.npy'.format( n_centroids), memmap=True) train_y = classes.train_solutions.data del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) wrapper.cross_validation(train_x, train_y, sample=0.5, parallel_estimator=True)
def kmeans_004(): """ Tuning the scale/crop and RF size parameters First number is the scaling, cropped to 200, with rf size of 5. 75 scaling took forever ot transform, so killed [(30, array([-0.11374265, -0.1134896 ])) (50, array([-0.11677854, -0.11696837]))] Trying again with larger RF size of 10. As a note, scale to 30 with rf 10 takes about 25 minutes to extract features on the train set Scale to 50 with rf 10 takes almost 90 minutes. [(30, array([-0.10828216, -0.1081058 ])), (50, array([-0.10840914, -0.10868195]))] Interesting that scale size of 50 does worse Crop is not 150, so this is not really an apples to apples comparison with kmeans_003 It is possibly worth making a submission with scale 30 and rf size 10 """ crops = [200] # Should probably also add 250 scales = [30, 50] # Scaling is probably the most important part here scores = [] for s in scales: crop = 200 n_centroids = 1600 n_patches = 400000 # rf_size = int(round(s * .2)) rf_size = 10 logger.info("Training with crop {}, scale {}, patch size {}, patches {}, centroids {}".format(crop, s, rf_size, n_patches, n_centroids)) train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) # spherical generator kmeans_generator = KMeansFeatureGenerator(n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_004_scale_{}_rf_{}'.format(s, rf_size), n_iterations=20, n_jobs=-1,) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() logger.info("Images ndarray shape: {}".format(images.shape)) patches = patch_extractor.transform(images) logger.info("Patches ndarray shape: {}".format(patches.shape)) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_004_scale_{}_rf_{}.npy'.format(s, rf_size), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() logger.info("Train X ndarray shape: {}".format(train_x.shape)) wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 250}, n_jobs=-1) wrapper.cross_validation(train_x, train_y, n_folds=2, parallel_estimator=True) scores.append((s, wrapper.cv_scores)) del wrapper gc.collect()
def kmeans_003(): """ Grid search for Ridge RF parameters Not sure whether to use spherical or minibatch, so maybe do one run with both .106 on the leaderboard. So the difference in CV scores narrowed """ train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_150_scale_15.npy', crop_size=150, scaled_size=15, n_jobs=-1, memmap=True) # spherical generator kmeans_generator = KMeansFeatureGenerator(n_centroids=1600, rf_size=5, result_path='data/mdl_kmeans_002_new', n_iterations=20, n_jobs=-1,) # minibatch generator # kmeans_generator = models.KMeansFeatures.KMeansFeatureGenerator(n_centroids=1600, # rf_size=5, # result_path='data/mdl_kmeans_002_new_minibatch', # method='minibatch', # n_init=1, # n_jobs=-1,) # Don't need to fit, as already cached patches = '' kmeans_generator.fit(patches) images = train_x_crop_scale.transform() # Problematic here - memory usage spikes to ~ 11GB when threads return # train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_002_new.npy', memmap=True) train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_002_new.npy', memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() # mdl = models.Ridge.RidgeRFEstimator(alpha=14, n_estimators=250, n_jobs=-1) wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 14, 'n_estimators': 500}, n_jobs=-1) params = { 'alpha': [150, 250, 500, 750, 1000], 'n_estimators': [250] } # 500 trees and alpha 25 gives cv of .10972 on 2-fold CV, but 25 was on the upper range of the search space, # So need to re-run with larger range of alpha # Will hit 30GB of ram with 500 trees. wrapper.grid_search(train_x, train_y, params, refit=False, parallel_estimator=True) # [mean: -0.11024, std: 0.00018, params: {'n_estimators': 250, 'alpha': 20.0}, # mean: -0.11000, std: 0.00019, params: {'n_estimators': 250, 'alpha': 25.0}, # mean: -0.10969, std: 0.00018, params: {'n_estimators': 250, 'alpha': 35}, # mean: -0.10934, std: 0.00019, params: {'n_estimators': 250, 'alpha': 50}, # mean: -0.10892, std: 0.00025, params: {'n_estimators': 250, 'alpha': 75}, # mean: -0.10860, std: 0.00025, params: {'n_estimators': 250, 'alpha': 100}, # mean: -0.10828, std: 0.00019, params: {'n_estimators': 250, 'alpha': 150}, # mean: -0.10789, std: 0.00016, params: {'n_estimators': 250, 'alpha': 250}, # mean: -0.10775, std: 0.00024, params: {'n_estimators': 250, 'alpha': 500}, # mean: -0.10779, std: 0.00022, params: {'n_estimators': 250, 'alpha': 750}, # mean: -0.10784, std: 0.00023, params: {'n_estimators': 250, 'alpha': 1000}] # Fit the final model wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_150_scale_15.npy', crop_size=150, scaled_size=15, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform(test_images, save_to_file='data/data_kmeans_test_features_003_new.npy', memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_003.csv')
def kmeans_005(): """ Testing whether extracting patches from train and test images works better [(500000, False, array([-0.10799986, -0.10744586])), (500000, True, array([-0.10790803, -0.10733288])), (600000, False, array([-0.10812188, -0.10735988])), (600000, True, array([-0.10778652, -0.10752664]))] """ n_patches_vals = [500000, 600000, 700000] include_test_images = [False, True] scores = [] for n_patches in n_patches_vals: for incl in include_test_images: s = 15 crop = 150 n_centroids = 1600 rf_size = 5 logger.info("Training with n_patches {}, with test images {}".format(n_patches, incl)) train_x_crop_scale = CropScaleImageTransformer(training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator(n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_005_patches_{}_test{}'.format(n_patches, incl), n_iterations=20, n_jobs=-1,) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() if incl: test_images = test_x_crop_scale.transform() images = np.vstack([images, test_images]) logger.info("Extracting patches from images ndarray shape: {}".format(images.shape)) patches = patch_extractor.transform(images) logger.info("Patches ndarray shape: {}".format(patches.shape)) kmeans_generator.fit(patches) del patches gc.collect() # Reload the original images images = train_x_crop_scale.transform() logger.info("Generating features on images ndarray shape: {}".format(images.shape)) train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_005_patches_{}_test_{}.npy'.format(n_patches, incl), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 250}, n_jobs=-1) wrapper.cross_validation(train_x, train_y, n_folds=2, parallel_estimator=True) score = (n_patches, incl, wrapper.cv_scores) logger.info("Score: {}".format(score)) scores.append(score) del wrapper gc.collect()
def ensemble_001(): """ Ensemble of kmeans and random forest results Conducting some analysis of whether the errors from these two models for individual Ys are different Ensembled error is .1149. Kmeans is better on every class than RF. """ n_centroids = 3000 s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer(training=True, crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator( n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_ensemble_001', n_iterations=20, n_jobs=-1, ) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() X = kmeans_generator.transform(images, save_to_file='data/data_ensemble_001.npy', memmap=True) Y = classes.train_solutions.data # Unload some objects del images gc.collect() # Get the input for the RF so that we can split together sampler = SampleTransformer(training=True, steps=2, step_size=20, n_jobs=-1) pX = sampler.transform() # manual split of train and test train_x, test_x, ptrain_x, ptest_x, train_y, test_y = train_test_split( X, pX, Y, test_size=0.5) wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) wrapper.fit(train_x, train_y) kmeans_preds = wrapper.predict(test_x) pWrapper = ModelWrapper(RandomForestRegressor, { 'n_estimators': 500, 'verbose': 3 }, n_jobs=-1) pWrapper.fit(ptrain_x, train_y) pixel_preds = pWrapper.predict(ptest_x) logger.info('Kmeans') classes.colwise_rmse(kmeans_preds, test_y) classes.rmse(kmeans_preds, test_y) logger.info('Pixel RF') classes.colwise_rmse(pixel_preds, test_y) classes.rmse(pixel_preds, test_y) logger.info("Ensembling predictions") etrain_x = np.hstack( (wrapper.predict(train_x), pWrapper.predict(ptrain_x))) etest_x = np.hstack((kmeans_preds, pixel_preds)) eWrapper = ModelWrapper(RandomForestRegressor, { 'n_estimators': 500, 'verbose': 3 }, n_jobs=-1) eWrapper.fit(etrain_x, train_y) ensemble_preds = eWrapper.predict(etest_x) classes.colwise_rmse(ensemble_preds, test_y) classes.rmse(ensemble_preds, test_y)
def kmeans_005(): """ Testing whether extracting patches from train and test images works better [(500000, False, array([-0.10799986, -0.10744586])), (500000, True, array([-0.10790803, -0.10733288])), (600000, False, array([-0.10812188, -0.10735988])), (600000, True, array([-0.10778652, -0.10752664]))] """ n_patches_vals = [500000, 600000, 700000] include_test_images = [False, True] scores = [] for n_patches in n_patches_vals: for incl in include_test_images: s = 15 crop = 150 n_centroids = 1600 rf_size = 5 logger.info( "Training with n_patches {}, with test images {}".format( n_patches, incl)) train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format( crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format( crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator( n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_005_patches_{}_test{}'.format( n_patches, incl), n_iterations=20, n_jobs=-1, ) patch_extractor = models.KMeansFeatures.PatchSampler( n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() if incl: test_images = test_x_crop_scale.transform() images = np.vstack([images, test_images]) logger.info( "Extracting patches from images ndarray shape: {}".format( images.shape)) patches = patch_extractor.transform(images) logger.info("Patches ndarray shape: {}".format(patches.shape)) kmeans_generator.fit(patches) del patches gc.collect() # Reload the original images images = train_x_crop_scale.transform() logger.info( "Generating features on images ndarray shape: {}".format( images.shape)) train_x = kmeans_generator.transform( images, save_to_file= 'data/data_kmeans_features_005_patches_{}_test_{}.npy'.format( n_patches, incl), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 250 }, n_jobs=-1) wrapper.cross_validation(train_x, train_y, n_folds=2, parallel_estimator=True) score = (n_patches, incl, wrapper.cv_scores) logger.info("Score: {}".format(score)) scores.append(score) del wrapper gc.collect()
def kmeans_006_submission(): # Final submission n_centroids = 3000 s = 15 crop = 150 n_patches = 400000 rf_size = 5 logger.info("Training with n_centroids {}".format(n_centroids)) train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator( n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_006_centroids_{}'.format(n_centroids), n_iterations=20, n_jobs=-1, ) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_006_centroids_{}.npy'.format( n_centroids), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform( test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'. format(n_centroids), memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_006.csv')
def kmeans_004(): """ Tuning the scale/crop and RF size parameters First number is the scaling, cropped to 200, with rf size of 5. 75 scaling took forever ot transform, so killed [(30, array([-0.11374265, -0.1134896 ])) (50, array([-0.11677854, -0.11696837]))] Trying again with larger RF size of 10. As a note, scale to 30 with rf 10 takes about 25 minutes to extract features on the train set Scale to 50 with rf 10 takes almost 90 minutes. [(30, array([-0.10828216, -0.1081058 ])), (50, array([-0.10840914, -0.10868195]))] Interesting that scale size of 50 does worse Crop is not 150, so this is not really an apples to apples comparison with kmeans_003 It is possibly worth making a submission with scale 30 and rf size 10 """ crops = [200] # Should probably also add 250 scales = [30, 50] # Scaling is probably the most important part here scores = [] for s in scales: crop = 200 n_centroids = 1600 n_patches = 400000 # rf_size = int(round(s * .2)) rf_size = 10 logger.info( "Training with crop {}, scale {}, patch size {}, patches {}, centroids {}" .format(crop, s, rf_size, n_patches, n_centroids)) train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) # spherical generator kmeans_generator = KMeansFeatureGenerator( n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_kmeans_004_scale_{}_rf_{}'.format( s, rf_size), n_iterations=20, n_jobs=-1, ) patch_extractor = models.KMeansFeatures.PatchSampler( n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() logger.info("Images ndarray shape: {}".format(images.shape)) patches = patch_extractor.transform(images) logger.info("Patches ndarray shape: {}".format(patches.shape)) kmeans_generator.fit(patches) del patches gc.collect() train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_004_scale_{}_rf_{}.npy'. format(s, rf_size), memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() logger.info("Train X ndarray shape: {}".format(train_x.shape)) wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 250 }, n_jobs=-1) wrapper.cross_validation(train_x, train_y, n_folds=2, parallel_estimator=True) scores.append((s, wrapper.cv_scores)) del wrapper gc.collect()
def kmeans_003(): """ Grid search for Ridge RF parameters Not sure whether to use spherical or minibatch, so maybe do one run with both .106 on the leaderboard. So the difference in CV scores narrowed """ train_x_crop_scale = CropScaleImageTransformer( training=True, result_path='data/data_train_crop_150_scale_15.npy', crop_size=150, scaled_size=15, n_jobs=-1, memmap=True) # spherical generator kmeans_generator = KMeansFeatureGenerator( n_centroids=1600, rf_size=5, result_path='data/mdl_kmeans_002_new', n_iterations=20, n_jobs=-1, ) # minibatch generator # kmeans_generator = models.KMeansFeatures.KMeansFeatureGenerator(n_centroids=1600, # rf_size=5, # result_path='data/mdl_kmeans_002_new_minibatch', # method='minibatch', # n_init=1, # n_jobs=-1,) # Don't need to fit, as already cached patches = '' kmeans_generator.fit(patches) images = train_x_crop_scale.transform() # Problematic here - memory usage spikes to ~ 11GB when threads return # train_x = kmeans_generator.transform(images, save_to_file='data/data_kmeans_features_002_new.npy', memmap=True) train_x = kmeans_generator.transform( images, save_to_file='data/data_kmeans_features_002_new.npy', memmap=True) train_y = classes.train_solutions.data # Unload some objects del images gc.collect() # mdl = models.Ridge.RidgeRFEstimator(alpha=14, n_estimators=250, n_jobs=-1) wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 14, 'n_estimators': 500 }, n_jobs=-1) params = {'alpha': [150, 250, 500, 750, 1000], 'n_estimators': [250]} # 500 trees and alpha 25 gives cv of .10972 on 2-fold CV, but 25 was on the upper range of the search space, # So need to re-run with larger range of alpha # Will hit 30GB of ram with 500 trees. wrapper.grid_search(train_x, train_y, params, refit=False, parallel_estimator=True) # [mean: -0.11024, std: 0.00018, params: {'n_estimators': 250, 'alpha': 20.0}, # mean: -0.11000, std: 0.00019, params: {'n_estimators': 250, 'alpha': 25.0}, # mean: -0.10969, std: 0.00018, params: {'n_estimators': 250, 'alpha': 35}, # mean: -0.10934, std: 0.00019, params: {'n_estimators': 250, 'alpha': 50}, # mean: -0.10892, std: 0.00025, params: {'n_estimators': 250, 'alpha': 75}, # mean: -0.10860, std: 0.00025, params: {'n_estimators': 250, 'alpha': 100}, # mean: -0.10828, std: 0.00019, params: {'n_estimators': 250, 'alpha': 150}, # mean: -0.10789, std: 0.00016, params: {'n_estimators': 250, 'alpha': 250}, # mean: -0.10775, std: 0.00024, params: {'n_estimators': 250, 'alpha': 500}, # mean: -0.10779, std: 0.00022, params: {'n_estimators': 250, 'alpha': 750}, # mean: -0.10784, std: 0.00023, params: {'n_estimators': 250, 'alpha': 1000}] # Fit the final model wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, { 'alpha': 500, 'n_estimators': 500 }, n_jobs=-1) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer( training=False, result_path='data/data_test_crop_150_scale_15.npy', crop_size=150, scaled_size=15, n_jobs=-1, memmap=True) test_images = test_x_crop_scale.transform() test_x = kmeans_generator.transform( test_images, save_to_file='data/data_kmeans_test_features_003_new.npy', memmap=True) res = wrapper.predict(test_x) sub = classes.Submission(res) sub.to_file('sub_kmeans_003.csv')
def ensemble_001(): """ Ensemble of kmeans and random forest results Conducting some analysis of whether the errors from these two models for individual Ys are different Ensembled error is .1149. Kmeans is better on every class than RF. """ n_centroids = 3000 s = 15 crop = 150 n_patches = 400000 rf_size = 5 train_x_crop_scale = CropScaleImageTransformer(training=True, crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) kmeans_generator = KMeansFeatureGenerator(n_centroids=n_centroids, rf_size=rf_size, result_path='data/mdl_ensemble_001', n_iterations=20, n_jobs=-1,) patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=n_patches, patch_size=rf_size, n_jobs=-1) images = train_x_crop_scale.transform() patches = patch_extractor.transform(images) kmeans_generator.fit(patches) del patches gc.collect() X = kmeans_generator.transform(images, save_to_file='data/data_ensemble_001.npy', memmap=True) Y = classes.train_solutions.data # Unload some objects del images gc.collect() # Get the input for the RF so that we can split together sampler = SampleTransformer(training=True, steps=2, step_size=20, n_jobs=-1) pX = sampler.transform() # manual split of train and test train_x, test_x, ptrain_x, ptest_x, train_y, test_y = train_test_split(X, pX, Y, test_size=0.5) wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) wrapper.fit(train_x, train_y) kmeans_preds = wrapper.predict(test_x) pWrapper = ModelWrapper(RandomForestRegressor, {'n_estimators': 500, 'verbose': 3}, n_jobs=-1) pWrapper.fit(ptrain_x, train_y) pixel_preds = pWrapper.predict(ptest_x) logger.info('Kmeans') classes.colwise_rmse(kmeans_preds, test_y) classes.rmse(kmeans_preds, test_y) logger.info('Pixel RF') classes.colwise_rmse(pixel_preds, test_y) classes.rmse(pixel_preds, test_y) logger.info("Ensembling predictions") etrain_x = np.hstack((wrapper.predict(train_x), pWrapper.predict(ptrain_x))) etest_x = np.hstack((kmeans_preds, pixel_preds)) eWrapper = ModelWrapper(RandomForestRegressor, {'n_estimators': 500, 'verbose': 3}, n_jobs=-1) eWrapper.fit(etrain_x, train_y) ensemble_preds = eWrapper.predict(etest_x) classes.colwise_rmse(ensemble_preds, test_y) classes.rmse(ensemble_preds, test_y)
# Unload some objects for memory del images gc.collect() # ModelWrapper is a convenience class that we created to automate some of the typical tasks # like logging, grid search and cross validation. # For fit, it is basically equivalent to calling fit on the estimator # The estimator takes the X and y and trains a ridge regression (sklearn.linear_model.Ridge), # predicts using the ridge regressor, then uses the results of the prediction to train a random forest. wrapper = ModelWrapper(models.Ridge.RidgeRFEstimator, {'alpha': 500, 'n_estimators': 500}, n_jobs=-1) wrapper.fit(train_x, train_y) test_x_crop_scale = CropScaleImageTransformer(training=False, result_path='data/data_test_crop_{}_scale_{}.npy'.format(crop, s), crop_size=crop, scaled_size=s, n_jobs=-1, memmap=True) # Crop and scale the test images test_images = test_x_crop_scale.transform() # Generate the test features test_x = kmeans_generator.transform(test_images, save_to_file='data/data_test_kmeans_features_006_centroids_{}.npy'.format(n_centroids), memmap=True) # Predict on the test features res = wrapper.predict(test_x) # Generate a submission file sub = classes.Submission(res) sub.to_file('sub_kmeans_006.csv')
classes.logstream.setLevel(classes.logging.DEBUG) a = models.RandomForest.RandomForestCascadeModel(cv_sample=0.1) a.run('cv') a.run('train') b = models.RandomForest.RandomForestModel(cv_sample=0.1) b.estimator.set_params(n_estimators=10) b.run('train') import models from models.Base import CropScaleImageTransformer from models.KMeansFeatures import KMeansFeatureGenerator train_x_crop_scale = CropScaleImageTransformer( training=True, # result_path='data/data_train_crop_150_scale_15.npy', crop_size=150, scaled_size=15, n_jobs=-1, memmap=True) raw_images = train_x_crop_scale.transform() patch_extractor = models.KMeansFeatures.PatchSampler(n_patches=1000, patch_size=5, n_jobs=-1) reds = raw_images[0:100, :, :, 0] a = patch_extractor.transform(reds) kmeans_generator = KMeansFeatureGenerator(n_centroids=10, rf_size=5,