return (shuffled_input_data, shuffled_training_data) def shuffle_feature(input_data: np.ndarray, iFeature: int) -> np.ndarray: features = np.split(input_data, input_data.shape[1], axis=1) shuffled_feature = np.copy(features[iFeature]) np.random.shuffle(shuffled_feature) features[iFeature] = shuffled_feature result = np.stack(features, axis=1).squeeze() return result if __name__ == '__main__': print("Reading Data") pts_data, x_data_raw, y_data0 = read_csv_data("pts_merged_final.csv") x_data_norm0 = EstimatorBase.normalize(x_data_raw[:, 0:n_inputs]) if make_plots: fig, ax = plt.subplots() else: fig, ax = None, None for iVersion in range(nVersions): x_data_norm, y_data = shuffle_data(x_data_norm0, y_data0) modParms = parameters[modelType] modParms['random_state'] = iVersion estimator: EstimatorBase = EstimatorBase.new(modelType) estimator.update_parameters(**modParms) print( f"Executing {modelType} estimator, parameters: { estimator.instance_parameters.items() } " )
def __init__(self, **kwargs): EstimatorBase.__init__(self, **kwargs)
xTrainFile = os.path.join( outDir, f"{aviris_tile}_corr_v2p9_{version}_{nbands}.nc") x_dataset: xa.Dataset = xa.open_dataset(xTrainFile) x_data_raw = x_dataset.band_data x_data_full = x_data_raw.stack(samples=('y', 'x')).transpose() x_data = x_data_full.isel(samples=get_indices(valid_mask)).assign_coords( samples=samples_coord) x_binned_data, y_binned_data = get_binned_sampling(x_data, y_data, n_bins, n_samples_per_bin) x_data_train = x_binned_data.values y_data_train = y_binned_data.values modParms = parameters[modelType] estimator: EstimatorBase = EstimatorBase.new(modelType) estimator.update_parameters(**modParms) print( f"Executing {modelType} estimator, parameters: { estimator.instance_parameters.items() } " ) ts_percent = (y_data_train.size * 100.0) / y_data.size print( f"Using {y_data_train.size} samples out of {y_data.size}: {ts_percent:.3f}%" ) estimator.fit(x_data_train, y_data_train) print(f"Performance {modelType}: ") train_prediction = estimator.predict(x_data.values) mse_train = mean_squared_error(y_data.values, train_prediction) print(f" ----> TRAIN SCORE: MSE= {mse_train:.2f}")
return [nFeatures, f"{test_mse:.3f}", f"{gen_mse:.3f}" ] + [int(x) for x in rfe.support_] if __name__ == '__main__': print("Reading Data") n_folds = 4 reduction_step = 1 nTrials = 100 n_estimators = 50 max_depth = 20 nFeatures = 4 nproc = 8 pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv") x_data_norm: np.ndarray = EstimatorBase.normalize(x_data_raw) nFeaturesList = [nFeatures] * nTrials for iFold in range(n_folds): pts_train, pts_valid, x_data_train, x_data_test, y_data_train, y_data_test = getKFoldSplit( pts_data, x_data_norm, y_data_raw, n_folds, iFold) modParms = dict(n_estimators=n_estimators, max_depth=10) estimator: EstimatorBase = EstimatorBase.new("rf") estimator.update_parameters(**modParms) print("Computing feature reductions") run_feature_reduction = functools.partial(feature_reduction, estimator, x_data_train, y_data_train, x_data_test, y_data_test, reduction_step)
padded_fe.append(feature_importances[iRBN]) iRBN = iRBN + 1 else: padded_fe.append(0.0) return np.array(padded_fe) if __name__ == '__main__': print("Reading Data") pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv") n_inputs = x_data_raw.shape[1] band_names = [f"B-{iB}" for iB in range(1, n_inputs + 1)] n_total_samples = x_data_raw.shape[0] n_training_samples = int(n_total_samples * training_fraction) x_data_train: np.ndarray = EstimatorBase.normalize( x_data_raw[:n_training_samples]) y_data_train = y_data_raw[:n_training_samples] x_data_test: np.ndarray = EstimatorBase.normalize( x_data_raw[n_training_samples:]) y_data_test = y_data_raw[n_training_samples:] modParms = dict(n_estimators=70, max_depth=20) estimator: EstimatorBase = EstimatorBase.new("rf") estimator.update_parameters(**modParms) print("Computing base fit") predictions = [] feature_importance = [] scores = [] train_data_reduced = x_data_train.copy()
def __init__( self, **kwargs ): EstimatorBase.__init__(self, handles_validation=True, **kwargs ) self.init_weights = None self.final_weights = None self.init_biases = None self.final_biases = None
return (shuffled_input_data, shuffled_training_data) def shuffle_feature(input_data: np.ndarray, iFeature: int) -> np.ndarray: features = np.split(input_data, input_data.shape[1], axis=1) shuffled_feature = np.copy(features[iFeature]) np.random.shuffle(shuffled_feature) features[iFeature] = shuffled_feature result = np.stack(features, axis=1).squeeze() return result if __name__ == '__main__': print("Reading Data") pts_data, x_data_raw, y_data = read_csv_data("pts_merged_final.csv") x_data_norm = EstimatorBase.normalize(x_data_raw[:, 0:n_inputs]) band_names = [f"B-{iB}" for iB in range(1, n_inputs + 1)] for modelType in modelTypes: barplots = MultiBar(f"{modelType} Feature Importance: Shuffle Method", band_names) feature_importances = [] for iVersion in range(nVersions): saved_model_path = os.path.join( outDir, f"model.{modelType}.T{iVersion}.pkl") print(f"Loading estimator from {saved_model_path}") filehandler = open(saved_model_path, "rb") estimator = pickle.load(filehandler) baseline_prediction = estimator.predict(x_data_norm) baseline_mse = mean_squared_error(y_data, baseline_prediction)