def predict(): yaml_file = open("../sensing_data/models/dnn_tf_1_1.yaml", 'r') loaded_model_yaml = yaml_file.read() yaml_file.close() dnn_pred = model_from_yaml(loaded_model_yaml) # load weights into new model dnn_pred.load_weights("../sensing_data/models/dnn_tf_1_1.h5") print("Loaded model from disk") dnn_pred.compile(loss='categorical_crossentropy', optimizer='Adam', metrics=['accuracy']) dnn_pred.summary() X, y, shape = data.load_prediction(ratio=1, normalize=False, osm_roads=False, split_struct=False, army_gt=False) normalizer = preprocessing.Normalizer().fit(X) X = normalizer.transform(X) y_pred = dnn_pred.predict(X) y_pred = [np.argmax(pred) for pred in tqdm(y_pred)] kappa = cohen_kappa_score(y - 1, y_pred) print(f'Kappa: {kappa}') print(classification_report(y - 1, y_pred)) y_pred = np.array(y_pred) yr = y_pred.reshape(shape) viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte)
def test_pred(): start = time.time() X, y, shape = data.load_prediction(ratio=1, normalize=False, osm_roads=False, split_struct=False) normalizer = preprocessing.Normalizer().fit(X) X = normalizer.transform(X)
def main(argv): real_start = time.time() split_struct=False osm_roads=False # train_size = int(100_000) train_size = int(19_386_625*0.2) X_train, y_train, X_test, y_test,_,_,_ = data.load(train_size, normalize=True, osm_roads=osm_roads, split_struct=split_struct) start = time.time() # Build a sv and compute the feature importances sv = svm.SVC(C=6.685338321430641, gamma=6.507029881541734) print("Fitting data...") sv.fit(X_train, y_train) end = time.time() elapsed = end-start print("Training time: " + str(timedelta(seconds=elapsed))) yt_pred = sv.predict(X_train) kappa = cohen_kappa_score(y_train, yt_pred) print(f'Train Kappa: {kappa}') print(classification_report(y_train, yt_pred)) y_pred = sv.predict(X_test) kappa = cohen_kappa_score(y_test, y_pred) print(f'Kappa: {kappa}') print(classification_report(y_test, y_pred)) return 0 dump(sv, '../sensing_data/models/svm_static_group3.joblib') print("Saved model to disk") # Testing trash X, y, shape = data.load_prediction( ratio=1, normalize=True, osm_roads=osm_roads, split_struct=split_struct) start_pred = time.time() y_pred = sv.predict(X) print("Predict time: " + str(timedelta(seconds=time.time()-start_pred))) kappa = cohen_kappa_score(y, y_pred) print(f'Kappa: {kappa}') print(classification_report(y, y_pred)) yr = y_pred.reshape(shape) viz.createGeotiff(OUT_RASTER, yr, DS_FOLDER + "clipped_sentinel2_B08.vrt", gdal.GDT_Byte) end = time.time() elapsed = end-real_start print("Total run time: " + str(timedelta(seconds=elapsed)))
def model(dfs): start = time.time() train_size = int(19386625 * 0.2) split_struct = True osm_roads = False X_train, y_train, X_test, y_test, _, _, normalizer = data.load( train_size, normalize=True, osm_roads=osm_roads, split_struct=split_struct) X_train = np.expand_dims(X_train, axis=2) X_test = np.expand_dims(X_test, axis=2) input_shape = X_train.shape[1] logits = 5 y_train = y_train - 1 y_test = y_test - 1 #class_weights = class_weight.compute_class_weight('balanced', # np.unique(y_train), # y_train) y_train_onehot = tf.keras.utils.to_categorical(y_train, num_classes=logits) n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[ 2], y_train_onehot.shape[1] model_cnn = Sequential() model_cnn.add( Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(n_timesteps, n_features))) model_cnn.add(MaxPooling1D(pool_size=2)) model_cnn.add(Conv1D(filters=128, kernel_size=3, activation='relu')) model_cnn.add(MaxPooling1D(pool_size=2)) model_cnn.add(Conv1D(filters=128, kernel_size=3, activation='relu')) model_cnn.add(Flatten()) model_cnn.add(Dense(128, activation='relu')) model_cnn.add(Dropout(0.5)) model_cnn.add(Dense(64, activation='relu')) model_cnn.add(Dense(n_outputs, activation='softmax')) model_cnn.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['mae', 'acc']) model_cnn.summary() es = EarlyStopping(monitor='val_loss', min_delta=0.0001, patience=5, verbose=0, mode='auto') model_cnn.fit(X_train, y_train_onehot, epochs=100, validation_split=0.2, callbacks=[es]) yt_pred_onehot = model_cnn.predict(X_train) yt_pred = [np.argmax(pred) for pred in yt_pred_onehot] kappa = cohen_kappa_score(y_train, yt_pred) print(f'Train Kappa: {kappa}') print(classification_report(y_train, yt_pred)) y_pred_onehot = model_cnn.predict(X_test) y_pred = [np.argmax(pred) for pred in y_pred_onehot] kappa = cohen_kappa_score(y_test, y_pred) print(f'Validation Kappa: {kappa}') print(classification_report(y_test, y_pred)) # Testing trash X, y, shape = data.load_prediction(ratio=1, normalize=normalizer, osm_roads=osm_roads, split_struct=split_struct, army_gt=False) print(X.shape, y.shape) y_pred = model_cnn.predict(X) y_pred = [np.argmax(pred) for pred in y_pred] kappa = cohen_kappa_score(y - 1, y_pred) print(f'Kappa: {kappa}') print(classification_report(y - 1, y_pred)) y_pred = np.array(y_pred) yr = y_pred.reshape(shape) viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte) end = time.time() elapsed = end - start print("Run time: " + str(timedelta(seconds=elapsed)))
def main(argv): real_start = time.time() train_size = int(19386625 * 0.2) split_struct = True osm_roads = False X_train, y_train, X_test, y_test, _, _, _ = data.load( train_size, normalize=False, osm_roads=osm_roads, split_struct=split_struct) start = time.time() # Build a forest and compute the feature importances forest = RandomForestClassifier(n_estimators=500, min_samples_leaf=4, min_samples_split=2, max_depth=130, class_weight='balanced', n_jobs=-1, verbose=1) print("Fitting data...") forest.fit(X_train, y_train) end = time.time() elapsed = end - start print("Training time: " + str(timedelta(seconds=elapsed))) yt_pred = forest.predict(X_train) kappa = cohen_kappa_score(y_train, yt_pred) print(f'Train Kappa: {kappa}') print(classification_report(y_train, yt_pred)) y_pred = forest.predict(X_test) kappa = cohen_kappa_score(y_test, y_pred) print(f'Validation Kappa: {kappa}') print(classification_report(y_test, y_pred)) return 0 dump(forest, '../sensing_data/models/forest_tsfull_group2.joblib') print("Saved model to disk") X, y, shape = data.load_prediction(ratio=1, normalize=None, osm_roads=osm_roads, split_struct=split_struct, army_gt=False) start_pred = time.time() y_pred_classes = forest.predict(X) # y_pred_proba = forest.predict_proba(X) # y_pred_classes = np.array( # [np.argmax(yi, axis=-1) + 1 for yi in tqdm(y_pred_proba)]) print("Predict time: " + str(timedelta(seconds=time.time() - start_pred))) kappa = cohen_kappa_score(y, y_pred_classes) print(f'Kappa: {kappa}') print(classification_report(y, y_pred_classes)) yr = y_pred_classes.reshape(shape) viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte) print("Creating uncertainty matrix...") start_matrix = time.time() # y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 3)) # viz.createGeotiff(OUT_PROBA_RASTER + "estrutura.tiff", # y_pred_proba_reshaped[:, :, 0], REF_FILE, gdal.GDT_Float32) # # viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff", # # y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32) # viz.createGeotiff(OUT_PROBA_RASTER + "restante.tiff", # y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32) # viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff", # y_pred_proba_reshaped[:, :, 2], REF_FILE, gdal.GDT_Float32) end = time.time() elapsed = end - start_matrix print("Matrix creation time: " + str(timedelta(seconds=elapsed))) end = time.time() elapsed = end - real_start print("Total run time: " + str(timedelta(seconds=elapsed)))
def main(argv): parser = argparse.ArgumentParser(description='Trains a xgboost model.') parser.add_argument("--roads", type=str_2_bool, nargs='?', const=True, default=False, help="Activate OSM roads") parser.add_argument("--fselect", type=str_2_bool, nargs='?', const=True, default=False, help="Activate feature selection roads") args = parser.parse_args() road_flag = args.roads selector_flag = args.fselect if road_flag: print("Using roads...") obj = 'binary:hinge' if args.roads else 'multi:softmax' real_start = time.time() train_size = int(19386625 * 0.2) X, y, X_test, y_test = data.load(train_size, normalize=False, balance=False, osm_roads=road_flag) start = time.time() forest = xgb.XGBClassifier( colsample_bytree=0.7553707061597048, gamma=5, gpu_id=0, learning_rate=0.2049732654267658, max_depth=8, min_child_weight=1, max_delta_step=9.075685204314162, n_estimators=1500, n_jobs=4, objective=obj, # binary:hinge if binary classification predictor='gpu_predictor', tree_method='gpu_hist') if selector_flag: print("Feature importances running...") selector = fselector.Fselector(forest, mode="elastic", thold=0.25) transformer = selector.select(X, y) print("Transforming data...") X = transformer.transform(X) X_test = transformer.transform(X_test) print("Fitting data...") forest.fit(X, y) end = time.time() elapsed = end - start print("Training time: " + str(timedelta(seconds=elapsed))) y_pred = forest.predict(X_test) kappa = cohen_kappa_score(y_test, y_pred) print(f'Kappa: {kappa}') print(classification_report(y_test, y_pred)) print(confusion_matrix(y_test, y_pred)) dump(forest, '../sensing_data/models/boosted.joblib') print("Saved model to disk") # Testing trash X, y, shape = data.load_prediction(ratio=1, normalize=False, osm_roads=road_flag) start_pred = time.time() # batch test X_h = X[:len(X) // 2] X_h1 = X[len(X) // 2:] forest.get_booster().set_param('predictor', 'cpu_predictor') print("Predict 0%...") y_pred = forest.predict_proba(X_h) print("Predict 50%...") y_pred2 = forest.predict_proba(X_h1) print("Predict 100%...") y_pred_proba = np.concatenate((y_pred, y_pred2)) y_pred_classes = np.array( [np.argmax(yi, axis=-1) + 1 for yi in tqdm(y_pred_proba)]) print("Predict time: " + str(timedelta(seconds=time.time() - start_pred))) kappa = cohen_kappa_score(y, y_pred_classes) print(f'Kappa: {kappa}') print(classification_report(y, y_pred_classes)) y_pred_classes_reshaped = y_pred_classes.reshape(shape) viz.createGeotiff(OUT_RASTER, y_pred_classes_reshaped, REF_FILE, gdal.GDT_Byte) print("Creating uncertainty matrix...") start_matrix = time.time() y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 3)) viz.createGeotiff(OUT_PROBA_RASTER + "estrutura.tiff", y_pred_proba_reshaped[:, :, 0], REF_FILE, gdal.GDT_Float32) # viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff", # y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32) viz.createGeotiff(OUT_PROBA_RASTER + "restante.tiff", y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32) viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff", y_pred_proba_reshaped[:, :, 2], REF_FILE, gdal.GDT_Float32) end = time.time() elapsed = end - start_matrix print("Matrix creation time: " + str(timedelta(seconds=elapsed))) end = time.time() elapsed = end - real_start print("Total run time: " + str(timedelta(seconds=elapsed)))
def main(argv): parser = argparse.ArgumentParser(description='Trains a xgboost model.') parser.add_argument("--roads", type=str_2_bool, nargs='?', const=True, default=False, help="Activate OSM roads") parser.add_argument("--fselect", type=str_2_bool, nargs='?', const=True, default=False, help="Activate feature selection") args = parser.parse_args() road_flag = args.roads selector_flag = args.fselect if road_flag: print("Using roads...") if selector_flag: print("Using feature selection...") obj = 'binary:hinge' real_start = time.time() split_struct = False osm_roads = False train_size = int(19386625 * 0.2) # train_size = int(1607*1015*0.2) X_train, y_train, X_test, y_test, _, _, _ = data.load( train_size, map_classes=False, normalize=False, osm_roads=osm_roads, split_struct=split_struct, gt_raster='cos_new_gt_2015t.tiff') start = time.time() #XGB_binary_building = {'colsample_bytree': 0.7343021353976351, 'gamma': 0, 'learning_rate': 0.16313076998849083, 'max_delta_step': 8.62355770678575, 'max_depth': 8, 'min_child_weight': 3, 'n_estimators': 1500, 'predictor': 'cpu_predictor', 'tree_method': 'hist'} forest = xgb.XGBClassifier( colsample_bytree=0.7343021353976351, gamma=0, gpu_id=0, learning_rate=0.16313076998849083, max_depth=8, min_child_weight=3, max_delta_step=8.62355770678575, n_estimators=1500, n_jobs=-1, #objective=obj, # binary:hinge if binary classification predictor='cpu_predictor', tree_method='hist') if selector_flag: print("Feature importances running...") # svm cant handle full training data x_train_feature, _, y_train_feature, _ = train_test_split( X_test, y_test, test_size=0, train_size=100_000) selector = fselector.Fselector(forest, mode="importances", thold=0.80) transformer = selector.select(x_train_feature, y_train_feature) features = transformer.get_support() # feature_names = data.get_features() # feature_names = feature_names[features] print(features) print("Transforming data...") print("Before: ", X_train.shape) X = transformer.transform(X) X_test = transformer.transform(X_test) print("After: ", X_train.shape) print("Fitting data...") forest.fit(X_train, y_train) end = time.time() elapsed = end - start print("Training time: " + str(timedelta(seconds=elapsed))) yt_pred = forest.predict(X_train) yt_pred[yt_pred > 0.5] = 1 yt_pred[yt_pred <= 0.5] = 0 kappa = cohen_kappa_score(y_train, yt_pred) print(f'Train Kappa: {kappa}') print(classification_report(y_train, yt_pred)) y_pred = forest.predict(X_test) y_pred[y_pred > 0.5] = 1 y_pred[y_pred <= 0.5] = 0 kappa = cohen_kappa_score(y_test, y_pred) print(f'Validation Kappa: {kappa}') print(classification_report(y_test, y_pred)) dump(forest, '../sensing_data/models/boosted_test_group1.joblib') print("Saved model to disk") # Testing trash X, y, shape = data.load_prediction(ratio=1, map_classes=False, normalize=False, osm_roads=osm_roads, split_struct=split_struct, gt_raster='cos_new_gt_2015t.tif') start_pred = time.time() # batch test X_h = X[:len(X) // 2] X_h1 = X[len(X) // 2:] forest.get_booster().set_param('predictor', 'cpu_predictor') print("Predict 0%...") y_pred = forest.predict(X_h) print("Predict 50%...") y_pred2 = forest.predict(X_h1) print("Predict 100%...") y_pred_classes = np.concatenate((y_pred, y_pred2)) y_pred_classes[y_pred_classes > 0.5] = 1 y_pred_classes[y_pred_classes <= 0.5] = 0 print("Predict time: " + str(timedelta(seconds=time.time() - start_pred))) kappa = cohen_kappa_score(y, y_pred_classes) print(f'Kappa: {kappa}') print(classification_report(y, y_pred_classes)) y_pred_classes_reshaped = y_pred_classes.reshape(shape) viz.createGeotiff(OUT_RASTER, y_pred_classes_reshaped, REF_FILE, gdal.GDT_Byte) return print("Creating uncertainty matrix...") start_matrix = time.time() y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 4)) viz.createGeotiff(OUT_PROBA_RASTER + "estrutura_urbana.tiff", y_pred_proba_reshaped[:, :, 0], REF_FILE, gdal.GDT_Float32) viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff", y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32) # viz.createGeotiff(OUT_PROBA_RASTER + "outras.tiff", # y_pred_proba_reshaped[:, :, 2], REF_FILE, gdal.GDT_Float32) viz.createGeotiff(OUT_PROBA_RASTER + "natural.tiff", y_pred_proba_reshaped[:, :, 3], REF_FILE, gdal.GDT_Float32) viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff", y_pred_proba_reshaped[:, :, 4], REF_FILE, gdal.GDT_Float32) end = time.time() elapsed = end - start_matrix print("Matrix creation time: " + str(timedelta(seconds=elapsed))) end = time.time() elapsed = end - real_start print("Total run time: " + str(timedelta(seconds=elapsed)))