예제 #1
0
    return (shuffled_input_data, shuffled_training_data)


def shuffle_feature(input_data: np.ndarray, iFeature: int) -> np.ndarray:
    features = np.split(input_data, input_data.shape[1], axis=1)
    shuffled_feature = np.copy(features[iFeature])
    np.random.shuffle(shuffled_feature)
    features[iFeature] = shuffled_feature
    result = np.stack(features, axis=1).squeeze()
    return result


if __name__ == '__main__':
    print("Reading Data")
    pts_data, x_data_raw, y_data0 = read_csv_data("pts_merged_final.csv")
    x_data_norm0 = EstimatorBase.normalize(x_data_raw[:, 0:n_inputs])

    if make_plots:
        fig, ax = plt.subplots()
    else:
        fig, ax = None, None

    for iVersion in range(nVersions):
        x_data_norm, y_data = shuffle_data(x_data_norm0, y_data0)
        modParms = parameters[modelType]
        modParms['random_state'] = iVersion
        estimator: EstimatorBase = EstimatorBase.new(modelType)
        estimator.update_parameters(**modParms)
        print(
            f"Executing {modelType} estimator, parameters: { estimator.instance_parameters.items() } "
        )
예제 #2
0
 def __init__(self, **kwargs):
     EstimatorBase.__init__(self, **kwargs)
예제 #3
0
    xTrainFile = os.path.join(
        outDir, f"{aviris_tile}_corr_v2p9_{version}_{nbands}.nc")
    x_dataset: xa.Dataset = xa.open_dataset(xTrainFile)
    x_data_raw = x_dataset.band_data
    x_data_full = x_data_raw.stack(samples=('y', 'x')).transpose()
    x_data = x_data_full.isel(samples=get_indices(valid_mask)).assign_coords(
        samples=samples_coord)

    x_binned_data, y_binned_data = get_binned_sampling(x_data, y_data, n_bins,
                                                       n_samples_per_bin)
    x_data_train = x_binned_data.values
    y_data_train = y_binned_data.values

    modParms = parameters[modelType]
    estimator: EstimatorBase = EstimatorBase.new(modelType)
    estimator.update_parameters(**modParms)
    print(
        f"Executing {modelType} estimator, parameters: { estimator.instance_parameters.items() } "
    )
    ts_percent = (y_data_train.size * 100.0) / y_data.size
    print(
        f"Using {y_data_train.size} samples out of {y_data.size}: {ts_percent:.3f}%"
    )
    estimator.fit(x_data_train, y_data_train)
    print(f"Performance {modelType}: ")

    train_prediction = estimator.predict(x_data.values)
    mse_train = mean_squared_error(y_data.values, train_prediction)
    print(f" ----> TRAIN SCORE: MSE= {mse_train:.2f}")
예제 #4
0
    return [nFeatures, f"{test_mse:.3f}", f"{gen_mse:.3f}"
            ] + [int(x) for x in rfe.support_]


if __name__ == '__main__':
    print("Reading Data")
    n_folds = 4
    reduction_step = 1
    nTrials = 100
    n_estimators = 50
    max_depth = 20
    nFeatures = 4
    nproc = 8

    pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv")
    x_data_norm: np.ndarray = EstimatorBase.normalize(x_data_raw)
    nFeaturesList = [nFeatures] * nTrials

    for iFold in range(n_folds):
        pts_train, pts_valid, x_data_train, x_data_test, y_data_train, y_data_test = getKFoldSplit(
            pts_data, x_data_norm, y_data_raw, n_folds, iFold)

        modParms = dict(n_estimators=n_estimators, max_depth=10)
        estimator: EstimatorBase = EstimatorBase.new("rf")
        estimator.update_parameters(**modParms)

        print("Computing feature reductions")
        run_feature_reduction = functools.partial(feature_reduction, estimator,
                                                  x_data_train, y_data_train,
                                                  x_data_test, y_data_test,
                                                  reduction_step)
예제 #5
0
            padded_fe.append(feature_importances[iRBN])
            iRBN = iRBN + 1
        else:
            padded_fe.append(0.0)
    return np.array(padded_fe)


if __name__ == '__main__':
    print("Reading Data")
    pts_data, x_data_raw, y_data_raw = read_csv_data("pts_merged_final.csv")
    n_inputs = x_data_raw.shape[1]
    band_names = [f"B-{iB}" for iB in range(1, n_inputs + 1)]
    n_total_samples = x_data_raw.shape[0]
    n_training_samples = int(n_total_samples * training_fraction)

    x_data_train: np.ndarray = EstimatorBase.normalize(
        x_data_raw[:n_training_samples])
    y_data_train = y_data_raw[:n_training_samples]
    x_data_test: np.ndarray = EstimatorBase.normalize(
        x_data_raw[n_training_samples:])
    y_data_test = y_data_raw[n_training_samples:]

    modParms = dict(n_estimators=70, max_depth=20)
    estimator: EstimatorBase = EstimatorBase.new("rf")
    estimator.update_parameters(**modParms)
    print("Computing base fit")

    predictions = []
    feature_importance = []
    scores = []

    train_data_reduced = x_data_train.copy()
예제 #6
0
 def __init__( self, **kwargs ):
     EstimatorBase.__init__(self, handles_validation=True, **kwargs )
     self.init_weights = None
     self.final_weights = None
     self.init_biases = None
     self.final_biases = None
    return (shuffled_input_data, shuffled_training_data)


def shuffle_feature(input_data: np.ndarray, iFeature: int) -> np.ndarray:
    features = np.split(input_data, input_data.shape[1], axis=1)
    shuffled_feature = np.copy(features[iFeature])
    np.random.shuffle(shuffled_feature)
    features[iFeature] = shuffled_feature
    result = np.stack(features, axis=1).squeeze()
    return result


if __name__ == '__main__':
    print("Reading Data")
    pts_data, x_data_raw, y_data = read_csv_data("pts_merged_final.csv")
    x_data_norm = EstimatorBase.normalize(x_data_raw[:, 0:n_inputs])
    band_names = [f"B-{iB}" for iB in range(1, n_inputs + 1)]
    for modelType in modelTypes:
        barplots = MultiBar(f"{modelType} Feature Importance: Shuffle Method",
                            band_names)
        feature_importances = []
        for iVersion in range(nVersions):
            saved_model_path = os.path.join(
                outDir, f"model.{modelType}.T{iVersion}.pkl")
            print(f"Loading estimator from {saved_model_path}")
            filehandler = open(saved_model_path, "rb")
            estimator = pickle.load(filehandler)

            baseline_prediction = estimator.predict(x_data_norm)
            baseline_mse = mean_squared_error(y_data, baseline_prediction)