예제 #1
0
                            (X.shape[0], 12, int(X.shape[-1] / (12))))
    return X_reshaped


# prepare data
X_cubes = make_data_cubes(X, order)
X_arrays = make_data_arrays(X, order)
X_train, X_test, X_train_arrays, X_test_arrays, X_train_cubes, X_test_cubes, y_train, y_test = train_test_split(
    X, X_arrays, X_cubes, y, test_size=.20, random_state=random_state)

# set up experiments
## Random Forest
rfc = RandomForestClassifier(n_estimators=100, random_state=random_state)
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
rfc_results = reports(y_test, y_pred)
pickle.dump(rfc_results, open(RESULTS_PATH + 'rfc_results.pkl', 'wb'))
pickle.dump(rfc, open(MODELS_PATH + 'random_forest.pkl', 'wb'))

## HybridSN
cnn = PixelBasedHybridSpectralNet(X_train_cubes[0].shape,
                                  y.max() + 1,
                                  MODELS_PATH + 'pixelbased_hybridsn.hdf5')
cnn.fit(X_train_cubes, y_train, epochs=200)
y_pred = cnn.predict(X_test_cubes)
cnn_results = reports(y_test, y_pred)
pickle.dump(cnn_results, open(RESULTS_PATH + 'HybridSN_results.pkl', 'wb'))

## ResNet50
resnet = PixelBasedResNet50(X_train_arrays[0].shape,
                            y.max() + 1,
예제 #2
0
df = df.dropna()

X, y, obj = df.drop(columns=['X','Y','Object', 'Label']).values, df.Label.values, df.Object.values

## Encode Labels
label_map = {k:v for k, v in zip(range(len(np.unique(y))), np.unique(y))}
y = np.fromiter(map(lambda x: {v:k for k,v in label_map.items()}[x], y), dtype=int)
## preprocess data
scaler = StandardScaler()
X = scaler.fit_transform(X)
## generate transfer map
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25)
rfc = RandomForestClassifier()
rfc.fit(X_train, y_train)
y_pred = rfc.predict(X_test)
cm = reports(y_test, y_pred, {i:i for i in range(len(label_map))})[1]
cmun = cm.unstack().reset_index()
cmun = cmun[
    ~cmun[['level_0', 'level_1']].isin(['UA', 'PA','Total']).values.any(axis=1)
]
cmun = cmun[
    cmun['level_0']!=cmun['level_1']
]
cmun[0] = cmun[0].apply(lambda x: x.replace(',', '')).astype(int)
cmun = cmun.sort_values(0, ascending=False).drop_duplicates(['level_0'], keep='first')
transfer_map = {k:v for k,v in zip(cmun['level_0'], cmun['level_1'])}

random_state = 0

## classifiers for filters
filts = (
        df_test[norm_pca_cols].values)
    autoencoders[label] = autoencoder

from sklearn.preprocessing import StandardScaler

y_pred1 = pd.DataFrame(StandardScaler().fit_transform(df_test[[
    '0.0_mse', '1.0_mse', '2.0_mse', '4.0_mse', '5.0_mse', '6.0_mse',
    '7.0_mse', '8.0_mse'
]]),
                       columns=np.unique(y_train),
                       index=df_test.index).idxmin(axis=1)

y_pred2 = pd.DataFrame(df_test[[
    '0.0_mse', '1.0_mse', '2.0_mse', '4.0_mse', '5.0_mse', '6.0_mse',
    '7.0_mse', '8.0_mse'
]].values,
                       columns=np.unique(y_train),
                       index=df_test.index).idxmin(axis=1)

df_test['y_pred1'] = y_pred1
df_test['y_pred2'] = y_pred2

mlp = MLPEncoderClassifier(autoencoders.values(),
                           int(np.unique(y_train).max()) + 1)
mlp.fit(X_train, y_train)

df_test['mlp_pred'] = mlp.predict(df_test[norm_pca_cols].values)
df_test.to_csv(PROCESSED_PATH + 'autoencoder_mlp_classifier_results.csv')

reports(df_test['Megaclasse'], df_test['mlp_pred'])
예제 #4
0
i = 0
skf = StratifiedKFold(n_splits=n_splits_cnn,
                      shuffle=True,
                      random_state=random_state)
for _, split_indices in skf.split(X_coords, np.zeros(X_coords.shape[0])):
    i += 1
    print(f'Prediction progress: {(i/n_splits_cnn)*100}%')
    X_split = X_coords[split_indices]
    X_patches = get_patches(X_split, X_lookup, window_size)
    indices.append(X_split)
    y_pre.append(ConvNet.predict(X_patches))

#df_final = coords.copy()
#y_pred = pd.Series(data=np.concatenate(y_pre), index=np.concatenate(indices), name='y_pred').sort_index()
#df_final = df_final.join(y_pred)

y_pred = pd.DataFrame(data=np.concatenate(
    [np.expand_dims(np.concatenate(y_pre), 1),
     np.concatenate(indices)],
    axis=1),
                      columns=['y_pred', 'y', 'x'])

df = df.join(y_pred.set_index(['y', 'x']), on=['y', 'x'])
df.to_csv(PROCESSED_PATH + 'classification_results.csv')

reports(df[~df['train_set']].dropna()['Megaclasse'],
        df[~df['train_set']].dropna()['y_pred'],
        {i: i
         for i in df['Megaclasse'].unique()})[-1]
plt.imshow(df[~df['train_set']].pivot('y', 'x', 'y_pred'))
예제 #5
0
df_meta = df[['x','y','Megaclasse']]

# drop least important features
features = pd.read_csv(FEATURE_RANK_PATH).iloc[:70,0]
cols_mapper = df.columns.to_series()\
    .apply(lambda x: x.split('_')[1]+'_'+x.split('_')[-1] if len(x.split('_'))==4 else x)\
    .to_dict()
df_bands = df.rename(columns=cols_mapper)[features.to_list()]

# get data in simple format
X = df_bands.values
y = df_meta.Megaclasse.values.astype(int)

# make predictions
y_pred = clf.predict(X)
cross_spatial_results = reports(y, y_pred)
pickle.dump(cross_spatial_results, open(RESULTS_PATH+'near_final_cross_spatial_results.pkl','wb'))

# rfc
y_pred = rfc.predict(X)
cross_spatial_results = reports(y, y_pred)
pickle.dump(cross_spatial_results, open(RESULTS_PATH+'near_final_RF_cross_spatial_results.pkl','wb'))

# ---------------------------------------------------------------------------- #
# Cross Temporal Validation
# ---------------------------------------------------------------------------- #

# read data
df = pd.read_csv(DATA_PATH+'2020_01_RS_1_n_features_320.csv')
df = df.dropna()
예제 #6
0
                                      file)).sort_values(['X', 'Y'])
        try:
            df = df.iloc[train_id].loc[df[pixel_selection_col].astype(float) ==
                                       1.0]
        except KeyError:
            df = df.iloc[train_id].loc[df['cluster_status'].astype(float) ==
                                       1.0]

        X = df[band_cols].values
        y = df['Label'].values
        print(f'Training Random Forest...')
        rf = RandomForestClassifier(n_estimators=100,
                                    random_state=random_state)
        rf.fit(X, y)
        y_pred = label_encoder.transform(rf.predict(X_test))
        models[file] = reports(label_encoder.transform(y_test), y_pred,
                               target_names)

file = 'no_selection'
print(f'Starting experiment {file}...')
df = pd.read_csv(MERGED_CSV).sort_values(['X', 'Y']).dropna().iloc[train_id]
X = df[band_cols].values
y = df['Label'].values
print(f'Training Random Forest...')
rf = RandomForestClassifier(n_estimators=100, random_state=random_state)
rf.fit(X, y)
y_pred = label_encoder.transform(rf.predict(X_test))
models[file] = reports(label_encoder.transform(y_test), y_pred, target_names)

scores = {}
for name, results in models.items():
    scores[name] = results[-1]
예제 #7
0
scaler = StandardScaler()
scaler.fit(X_train)
scaler.transform(X_train.values, copy=False)
scaler.transform(X_test.values, copy=False)

scores = []
for method in features_selected.columns:
    rfc = RandomForestClassifier(100, random_state=0)
    features = features_selected[method]
    _X_tr = X_train[features[features].index]
    _y_tr = y_train.copy()
    rfc.fit(_X_tr, _y_tr)
    _X_te = X_test[features[features].index]
    _y_te = y_test.copy()
    _y_pred = rfc.predict(_X_te)
    scores.append(reports(_y_te, _y_pred)[-1].rename({'Score': method}))

pd.DataFrame(features_selected[features_selected].count(),
    columns=['# features used'])\
    .join(pd.concat(scores, 1).T)\
    .sort_values('# features used', ascending=False)\
    .rename(index={'Selected':'Intersect'})\
    .to_csv('feature_selection_results.csv')

################################################################################
## define noise introduction procedure

## define filters

## define classifiers
예제 #8
0
    'x', 'y', 'Megaclasse').values  # to plot entire study area's ground truth
train_gt = df_pre_train.pivot(
    'x', 'y', 'Megaclasse').values  # to plot training area's ground truth
test_gt = df_test.pivot(
    'x', 'y', 'Megaclasse').values  # to plot test area's ground truth

train_rgb  # to plot training area
ps_rgb = pivot_rgb(df_selected, xy_cols, rgb_cols)  # to plot selected pixels
ps_gt = df_selected.pivot(
    'x', 'y', 'Megaclasse').values  # to plot selected pixels' ground truth

test_rgb  # to plot test area
test_gt  # to plot ground truth
pred_labels = df_final.pivot('x', 'y', 'y_pred').values  # to plot predictions

reports(df_final['y_true'], df_final['y_pred'], labels)[-1]

rgbrgb = pivot_rgb(df_final, xy_cols, rgb_cols)

plot_image(
    [
        #    np.moveaxis(total_rgb,0,1), np.moveaxis(train_rgb,0,1), np.moveaxis(test_rgb,0,1),
        #    np.moveaxis(total_gt,0,1), np.moveaxis(train_gt,0,1), np.moveaxis(test_gt,0,1),
        #    np.moveaxis(train_rgb,0,1), np.moveaxis(ps_rgb,0,1), np.moveaxis(ps_gt,0,1),
        np.moveaxis(test_rgb, 0, 1),
        np.moveaxis(test_gt, 0, 1),
        pred_labels
    ],
    #    num_rows=4#, figsize=(80,20)
)