return [nFeatures, f"{test_mse:.3f}", f"{gen_mse:.3f}" ] + [int(x) for x in rfe.support_] if __name__ == '__main__': print("Reading Data") n_folds = 4 reduction_step = 1 nTrials = 100 n_estimators = 50 max_depth = 20 nFeatures = 4 nproc = 8 pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv") x_data_norm: np.ndarray = EstimatorBase.normalize(x_data_raw) nFeaturesList = [nFeatures] * nTrials for iFold in range(n_folds): pts_train, pts_valid, x_data_train, x_data_test, y_data_train, y_data_test = getKFoldSplit( pts_data, x_data_norm, y_data_raw, n_folds, iFold) modParms = dict(n_estimators=n_estimators, max_depth=10) estimator: EstimatorBase = EstimatorBase.new("rf") estimator.update_parameters(**modParms) print("Computing feature reductions") run_feature_reduction = functools.partial(feature_reduction, estimator, x_data_train, y_data_train, x_data_test, y_data_test, reduction_step)
return (shuffled_input_data, shuffled_training_data) def shuffle_feature(input_data: np.ndarray, iFeature: int) -> np.ndarray: features = np.split(input_data, input_data.shape[1], axis=1) shuffled_feature = np.copy(features[iFeature]) np.random.shuffle(shuffled_feature) features[iFeature] = shuffled_feature result = np.stack(features, axis=1).squeeze() return result if __name__ == '__main__': print("Reading Data") pts_data, x_data_raw, y_data0 = read_csv_data("pts_merged_final.csv") x_data_norm0 = EstimatorBase.normalize(x_data_raw[:, 0:n_inputs]) if make_plots: fig, ax = plt.subplots() else: fig, ax = None, None for iVersion in range(nVersions): x_data_norm, y_data = shuffle_data(x_data_norm0, y_data0) modParms = parameters[modelType] modParms['random_state'] = iVersion estimator: EstimatorBase = EstimatorBase.new(modelType) estimator.update_parameters(**modParms) print( f"Executing {modelType} estimator, parameters: { estimator.instance_parameters.items() } " )
padded_fe.append(feature_importances[iRBN]) iRBN = iRBN + 1 else: padded_fe.append(0.0) return np.array(padded_fe) if __name__ == '__main__': print("Reading Data") pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv") n_inputs = x_data_raw.shape[1] band_names = [f"B-{iB}" for iB in range(1, n_inputs + 1)] n_total_samples = x_data_raw.shape[0] n_training_samples = int(n_total_samples * training_fraction) x_data_train: np.ndarray = EstimatorBase.normalize( x_data_raw[:n_training_samples]) y_data_train = y_data_raw[:n_training_samples] x_data_test: np.ndarray = EstimatorBase.normalize( x_data_raw[n_training_samples:]) y_data_test = y_data_raw[n_training_samples:] modParms = dict(n_estimators=70, max_depth=20) estimator: EstimatorBase = EstimatorBase.new("rf") estimator.update_parameters(**modParms) print("Computing base fit") predictions = [] feature_importance = [] scores = [] train_data_reduced = x_data_train.copy()