예제 #1
0
    return [nFeatures, f"{test_mse:.3f}", f"{gen_mse:.3f}"
            ] + [int(x) for x in rfe.support_]


if __name__ == '__main__':
    print("Reading Data")
    n_folds = 4
    reduction_step = 1
    nTrials = 100
    n_estimators = 50
    max_depth = 20
    nFeatures = 4
    nproc = 8

    pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv")
    x_data_norm: np.ndarray = EstimatorBase.normalize(x_data_raw)
    nFeaturesList = [nFeatures] * nTrials

    for iFold in range(n_folds):
        pts_train, pts_valid, x_data_train, x_data_test, y_data_train, y_data_test = getKFoldSplit(
            pts_data, x_data_norm, y_data_raw, n_folds, iFold)

        modParms = dict(n_estimators=n_estimators, max_depth=10)
        estimator: EstimatorBase = EstimatorBase.new("rf")
        estimator.update_parameters(**modParms)

        print("Computing feature reductions")
        run_feature_reduction = functools.partial(feature_reduction, estimator,
                                                  x_data_train, y_data_train,
                                                  x_data_test, y_data_test,
                                                  reduction_step)
예제 #2
0
    return (shuffled_input_data, shuffled_training_data)


def shuffle_feature(input_data: np.ndarray, iFeature: int) -> np.ndarray:
    features = np.split(input_data, input_data.shape[1], axis=1)
    shuffled_feature = np.copy(features[iFeature])
    np.random.shuffle(shuffled_feature)
    features[iFeature] = shuffled_feature
    result = np.stack(features, axis=1).squeeze()
    return result


if __name__ == '__main__':
    print("Reading Data")
    pts_data, x_data_raw, y_data0 = read_csv_data("pts_merged_final.csv")
    x_data_norm0 = EstimatorBase.normalize(x_data_raw[:, 0:n_inputs])

    if make_plots:
        fig, ax = plt.subplots()
    else:
        fig, ax = None, None

    for iVersion in range(nVersions):
        x_data_norm, y_data = shuffle_data(x_data_norm0, y_data0)
        modParms = parameters[modelType]
        modParms['random_state'] = iVersion
        estimator: EstimatorBase = EstimatorBase.new(modelType)
        estimator.update_parameters(**modParms)
        print(
            f"Executing {modelType} estimator, parameters: { estimator.instance_parameters.items() } "
        )
예제 #3
0
            padded_fe.append(feature_importances[iRBN])
            iRBN = iRBN + 1
        else:
            padded_fe.append(0.0)
    return np.array(padded_fe)


if __name__ == '__main__':
    print("Reading Data")
    pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv")
    n_inputs = x_data_raw.shape[1]
    band_names = [f"B-{iB}" for iB in range(1, n_inputs + 1)]
    n_total_samples = x_data_raw.shape[0]
    n_training_samples = int(n_total_samples * training_fraction)

    x_data_train: np.ndarray = EstimatorBase.normalize(
        x_data_raw[:n_training_samples])
    y_data_train = y_data_raw[:n_training_samples]
    x_data_test: np.ndarray = EstimatorBase.normalize(
        x_data_raw[n_training_samples:])
    y_data_test = y_data_raw[n_training_samples:]

    modParms = dict(n_estimators=70, max_depth=20)
    estimator: EstimatorBase = EstimatorBase.new("rf")
    estimator.update_parameters(**modParms)
    print("Computing base fit")

    predictions = []
    feature_importance = []
    scores = []

    train_data_reduced = x_data_train.copy()