コード例 #1
0
def predict():
    yaml_file = open("../sensing_data/models/dnn_tf_1_1.yaml", 'r')
    loaded_model_yaml = yaml_file.read()
    yaml_file.close()
    dnn_pred = model_from_yaml(loaded_model_yaml)
    # load weights into new model
    dnn_pred.load_weights("../sensing_data/models/dnn_tf_1_1.h5")
    print("Loaded model from disk")

    dnn_pred.compile(loss='categorical_crossentropy',
                     optimizer='Adam',
                     metrics=['accuracy'])

    dnn_pred.summary()

    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=False,
                                       osm_roads=False,
                                       split_struct=False,
                                       army_gt=False)

    normalizer = preprocessing.Normalizer().fit(X)
    X = normalizer.transform(X)

    y_pred = dnn_pred.predict(X)
    y_pred = [np.argmax(pred) for pred in tqdm(y_pred)]

    kappa = cohen_kappa_score(y - 1, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y - 1, y_pred))

    y_pred = np.array(y_pred)
    yr = y_pred.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte)
コード例 #2
0
def main(argv):
    real_start = time.time()

    split_struct=False
    osm_roads=False

    # train_size = int(100_000)
    train_size = int(19_386_625*0.2)
    X_train, y_train, X_test, y_test,_,_,_ = data.load(train_size, normalize=True, osm_roads=osm_roads, split_struct=split_struct)

    start = time.time()
    # Build a sv and compute the feature importances
    sv = svm.SVC(C=6.685338321430641, gamma=6.507029881541734)

    print("Fitting data...")
    sv.fit(X_train, y_train)

    end = time.time()
    elapsed = end-start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    yt_pred = sv.predict(X_train)
    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred = sv.predict(X_test)
    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y_test, y_pred))
    return 0

    dump(sv, '../sensing_data/models/svm_static_group3.joblib')
    print("Saved model to disk")
    # Testing trash
    X, y, shape = data.load_prediction(
        ratio=1, normalize=True, osm_roads=osm_roads, split_struct=split_struct)

    start_pred = time.time()
    y_pred = sv.predict(X)
    print("Predict time: " + str(timedelta(seconds=time.time()-start_pred)))

    kappa = cohen_kappa_score(y, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred))

    yr = y_pred.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, DS_FOLDER +
                      "clipped_sentinel2_B08.vrt", gdal.GDT_Byte)

    end = time.time()
    elapsed = end-real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))
def test_pred():
    start = time.time()

    raster_ds = gdal.Open(DS_FOLDER + "/ignored/static/freq_cinza.tif",
                          gdal.GA_ReadOnly)
    band = raster_ds.GetRasterBand(1).ReadAsArray()

    # Square average kernel gives box blur.
    filter_kernel = [[1, 1, 1], [1, 0, 1], [1, 1, 1]]
    band_convolved = scipy.signal.convolve2d(band,
                                             filter_kernel,
                                             mode='same',
                                             boundary='fill',
                                             fillvalue=0)

    viz.createGeotiff(DS_FOLDER + "/ignored/static/convolved.tif",
                      band_convolved,
                      DS_FOLDER + "/ignored/static/freq_cinza.tif",
                      gdal.GDT_Float32)

    end = time.time()
    elapsed = end - start
    print("Run time: " + str(timedelta(seconds=elapsed)))
コード例 #4
0
def main(argv):
    src_dss = [f for f in os.listdir(SRC_FOLDER) if ".tif" in f]
    src_dss.sort()
    sets = [f.split("clipped")[0] for f in src_dss]
    sets = np.unique(sets)

    for f_id in tqdm(sets):
        data = [
            f for f in os.listdir(SRC_FOLDER) if f.split("clipped")[0] == f_id
        ]

        ref = SRC_FOLDER + data[7]

        nir = getBand(data[7])
        green = getBand(data[2])
        red = getBand(data[3])
        swir = getBand(data[10])

        id1 = ndvi(nir, red, ref)
        id2 = ndwi(green, nir, ref)
        id3 = ndbi(swir, nir, ref)
        id4 = evi(nir, red, ref)

        id1[~np.isfinite(id1)] = 0
        id2[~np.isfinite(id2)] = 0
        id3[~np.isfinite(id3)] = 0
        id4[~np.isfinite(id4)] = 0

        createGeotiff(SRC_FOLDER + f_id + "clipped_pad_pad_ndvi.tif", id1, ref,
                      gdal.GDT_Float32)
        createGeotiff(SRC_FOLDER + f_id + "clipped_pad_pad_ndwi.tif", id2, ref,
                      gdal.GDT_Float32)
        createGeotiff(SRC_FOLDER + f_id + "clipped_pad_pad_ndbi.tif", id3, ref,
                      gdal.GDT_Float32)
        createGeotiff(SRC_FOLDER + f_id + "clipped_pad_pad_evi.tif", id4, ref,
                      gdal.GDT_Float32)
コード例 #5
0
def model(dfs):
    start = time.time()
    train_size = int(19386625 * 0.2)

    split_struct = True
    osm_roads = False

    X_train, y_train, X_test, y_test, _, _, normalizer = data.load(
        train_size,
        normalize=True,
        osm_roads=osm_roads,
        split_struct=split_struct)

    X_train = np.expand_dims(X_train, axis=2)
    X_test = np.expand_dims(X_test, axis=2)

    input_shape = X_train.shape[1]
    logits = 5

    y_train = y_train - 1
    y_test = y_test - 1

    #class_weights = class_weight.compute_class_weight('balanced',
    #                                                 np.unique(y_train),
    #                                              y_train)

    y_train_onehot = tf.keras.utils.to_categorical(y_train, num_classes=logits)

    n_timesteps, n_features, n_outputs = X_train.shape[1], X_train.shape[
        2], y_train_onehot.shape[1]

    model_cnn = Sequential()
    model_cnn.add(
        Conv1D(filters=64,
               kernel_size=3,
               activation='relu',
               input_shape=(n_timesteps, n_features)))
    model_cnn.add(MaxPooling1D(pool_size=2))

    model_cnn.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model_cnn.add(MaxPooling1D(pool_size=2))

    model_cnn.add(Conv1D(filters=128, kernel_size=3, activation='relu'))
    model_cnn.add(Flatten())
    model_cnn.add(Dense(128, activation='relu'))
    model_cnn.add(Dropout(0.5))
    model_cnn.add(Dense(64, activation='relu'))
    model_cnn.add(Dense(n_outputs, activation='softmax'))

    model_cnn.compile(loss='categorical_crossentropy',
                      optimizer='rmsprop',
                      metrics=['mae', 'acc'])
    model_cnn.summary()

    es = EarlyStopping(monitor='val_loss',
                       min_delta=0.0001,
                       patience=5,
                       verbose=0,
                       mode='auto')

    model_cnn.fit(X_train,
                  y_train_onehot,
                  epochs=100,
                  validation_split=0.2,
                  callbacks=[es])

    yt_pred_onehot = model_cnn.predict(X_train)
    yt_pred = [np.argmax(pred) for pred in yt_pred_onehot]

    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred_onehot = model_cnn.predict(X_test)
    y_pred = [np.argmax(pred) for pred in y_pred_onehot]

    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Validation Kappa: {kappa}')
    print(classification_report(y_test, y_pred))

    # Testing trash
    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=normalizer,
                                       osm_roads=osm_roads,
                                       split_struct=split_struct,
                                       army_gt=False)
    print(X.shape, y.shape)

    y_pred = model_cnn.predict(X)
    y_pred = [np.argmax(pred) for pred in y_pred]

    kappa = cohen_kappa_score(y - 1, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y - 1, y_pred))

    y_pred = np.array(y_pred)
    yr = y_pred.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte)

    end = time.time()
    elapsed = end - start
    print("Run time: " + str(timedelta(seconds=elapsed)))
コード例 #6
0
def main(argv):
    bands = {
        "B01": [],
        "B02": [],
        "B03": [],
        "B04": [],
        "B05": [],
        "B06": [],
        "B07": [],
        "B08": [],
        "B8A": [],
        "B09": [],
        "B10": [],
        "B11": [],
        "B12": [],
        "ndvi": [],
        "evi": [],
        "ndbi": [],
        "ndwi": []
    }

    src_dss = [
        f for f in os.listdir(SRC_FOLDER)
        if (".jp2" in f) or (".tif" in f) or (".img" in f)
    ]
    src_dss.sort()

    # Reference files
    for f in src_dss:
        try:
            bands[f.split("_")[3].split(".")[0]].append(SRC_FOLDER + f)
        except KeyError:
            print("ignoring")

    refDs = gdal.Open(
        "../sensing_data/clipped/" + ROI +
        "ignored/static/clipped_sentinel2_B08.vrt", gdal.GA_ReadOnly)
    band = refDs.GetRasterBand(1).ReadAsArray()
    ref_shape = band.shape

    for b in tqdm(bands):
        # change to np array (0,m) when possible timeseries.append([bandsData], axis=0), or faster (n,m) -> a[0..n] = [1,2,...]
        timeseries = []
        if (len(bands[b]) > 0):
            # Open raster dataset
            for raster in bands[b]:
                rasterDS = gdal.Open(raster, gdal.GA_ReadOnly)
                # Extract band's data and transform into a numpy array
                bandsData = rasterDS.GetRasterBand(1).ReadAsArray()
                # static fix for clip mismatch problem
                timeseries.append(bandsData[:ref_shape[0], :ref_shape[1]])

        timeseries = np.array(timeseries)
        timeseries[~np.isfinite(timeseries)] = 0

        # Using quartiles, change to 0.05 quantiles later if load isn't too much...
        mean_ts = np.mean(timeseries, axis=0)  # mean
        q0 = np.quantile(timeseries, 0.00, axis=0)  # minimum
        q1 = np.quantile(timeseries, 0.25, axis=0)  # first quantile
        q2 = np.quantile(timeseries, 0.50, axis=0)  # median
        q3 = np.quantile(timeseries, 0.75, axis=0)  # third quantile
        q4 = np.quantile(timeseries, 1.0, axis=0)  # maximum
        std = np.std(timeseries, axis=0)  # standard dev
        variance = np.sqrt(std)  # variance

        d_type = gdal.GDT_UInt16
        if "i" in band:
            d_type = gdal.GDT_Float32

        viz.createGeotiff(
            DST_FOLDER + b + "_mean.tiff", mean_ts,
            "../sensing_data/clipped/vila-de-rei/ignored/static/clipped_sentinel2_B08.vrt",
            d_type)
        viz.createGeotiff(
            DST_FOLDER + b + "_q0.tiff", q0,
            "../sensing_data/clipped/vila-de-rei/ignored/static/clipped_sentinel2_B08.vrt",
            d_type)
        viz.createGeotiff(
            DST_FOLDER + b + "_q1.tiff", q1,
            "../sensing_data/clipped/vila-de-rei/ignored/static/clipped_sentinel2_B08.vrt",
            d_type)
        viz.createGeotiff(
            DST_FOLDER + b + "_q2.tiff", q2,
            "../sensing_data/clipped/vila-de-rei/ignored/static/clipped_sentinel2_B08.vrt",
            d_type)
        viz.createGeotiff(
            DST_FOLDER + b + "_q3.tiff", q3,
            "../sensing_data/clipped/vila-de-rei/ignored/static/clipped_sentinel2_B08.vrt",
            d_type)
        viz.createGeotiff(
            DST_FOLDER + b + "_q4.tiff", q4,
            "../sensing_data/clipped/vila-de-rei/ignored/static/clipped_sentinel2_B08.vrt",
            d_type)
        viz.createGeotiff(
            DST_FOLDER + b + "_var.tiff", variance,
            "../sensing_data/clipped/vila-de-rei/ignored/static/clipped_sentinel2_B08.vrt",
            d_type)
    h, mask = cv2.findHomography(points1, points2)
    return h


if __name__ == '__main__':

    # Read reference image
    refFilename = SRC_FOLDER + "0clipped_T29SND_20160430T112122_B08.tif"
    labelDS = gdal.Open(refFilename, gdal.GA_ReadOnly)
    imReference = labelDS.GetRasterBand(1).ReadAsArray()
    imReference = cv2.imread(refFilename, 0)

    # Read to align image
    imFilename = SRC_FOLDER + "6clipped_T29SND_20160629T112112_B08.tif"
    labelDS = gdal.Open(imFilename, gdal.GA_ReadOnly)
    im = labelDS.GetRasterBand(1).ReadAsArray()
    im = cv2.imread(imFilename, 0)

    # The estimated homography will be stored in h.
    h = align_images(im, imReference)

    # Write aligned image to disk.
    outFilename = DST_FOLDER + "test.tif"
    width = im.shape[0]
    height = im.shape[1]

    imReg = cv2.warpPerspective(im, h, (width, height))
    print(imReg.shape)

    createGeotiff(outFilename, im, refFilename, gdal.GDT_UInt16)
コード例 #8
0
def main(argv):
    real_start = time.time()
    train_size = int(19386625 * 0.2)

    split_struct = True
    osm_roads = False

    X_train, y_train, X_test, y_test, _, _, _ = data.load(
        train_size,
        normalize=False,
        osm_roads=osm_roads,
        split_struct=split_struct)

    start = time.time()
    # Build a forest and compute the feature importances
    forest = RandomForestClassifier(n_estimators=500,
                                    min_samples_leaf=4,
                                    min_samples_split=2,
                                    max_depth=130,
                                    class_weight='balanced',
                                    n_jobs=-1,
                                    verbose=1)
    print("Fitting data...")
    forest.fit(X_train, y_train)

    end = time.time()
    elapsed = end - start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    yt_pred = forest.predict(X_train)
    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred = forest.predict(X_test)
    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Validation Kappa: {kappa}')
    print(classification_report(y_test, y_pred))
    return 0

    dump(forest, '../sensing_data/models/forest_tsfull_group2.joblib')
    print("Saved model to disk")

    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=None,
                                       osm_roads=osm_roads,
                                       split_struct=split_struct,
                                       army_gt=False)

    start_pred = time.time()
    y_pred_classes = forest.predict(X)

    # y_pred_proba = forest.predict_proba(X)
    # y_pred_classes = np.array(
    #     [np.argmax(yi, axis=-1) + 1 for yi in tqdm(y_pred_proba)])
    print("Predict time: " + str(timedelta(seconds=time.time() - start_pred)))

    kappa = cohen_kappa_score(y, y_pred_classes)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred_classes))

    yr = y_pred_classes.reshape(shape)

    viz.createGeotiff(OUT_RASTER, yr, REF_FILE, gdal.GDT_Byte)

    print("Creating uncertainty matrix...")
    start_matrix = time.time()

    # y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 3))

    # viz.createGeotiff(OUT_PROBA_RASTER + "estrutura.tiff",
    #                   y_pred_proba_reshaped[:, :, 0], REF_FILE, gdal.GDT_Float32)
    # # viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff",
    # #                   y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "restante.tiff",
    #                   y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff",
    #                   y_pred_proba_reshaped[:, :, 2], REF_FILE, gdal.GDT_Float32)

    end = time.time()
    elapsed = end - start_matrix
    print("Matrix creation time: " + str(timedelta(seconds=elapsed)))

    end = time.time()
    elapsed = end - real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))
def main(argv):
    bands = {"VV": [], "VH": [], "VVVH": []}

    src_dss = [
        f for f in os.listdir(SRC_FOLDER)
        if (".jp2" in f) or (".tif" in f) or (".img" in f)
    ]
    src_dss.sort()
    ref_dss = SRC_FOLDER + src_dss[0]  # Get the first one

    for vh, vv in pairwise(src_dss):
        raster_ds = gdal.Open(SRC_FOLDER + vv, gdal.GA_ReadOnly)
        vv_data = raster_ds.GetRasterBand(1).ReadAsArray()
        raster_ds = gdal.Open(SRC_FOLDER + vh, gdal.GA_ReadOnly)
        vh_data = raster_ds.GetRasterBand(1).ReadAsArray()
        vv_vh_data = np.divide(vv_data, vh_data)
        viz.createGeotiff(SRC_FOLDER + vv.split(".")[0] + "VH.tif", vv_vh_data,
                          ref_dss, gdal.GDT_Float32)

    src_dss = [
        f for f in os.listdir(SRC_FOLDER)
        if (".jp2" in f) or (".tif" in f) or (".img" in f)
    ]
    src_dss.sort()

    # 3clipped_Gamma0_VH.img
    for f in src_dss:
        try:
            key = f.split("_")[2].split(".")[0]
            bands[key].append(SRC_FOLDER + f)
        except KeyError:
            print("ignoring")

    ref_ds = gdal.Open(ref_dss, gdal.GA_ReadOnly)
    band = ref_ds.GetRasterBand(1).ReadAsArray()
    ref_shape = band.shape

    for b in tqdm(bands):
        # change to np array (0,m) when possible timeseries.append([bandsData], axis=0), or faster (n,m) -> a[0..n] = [1,2,...]
        timeseries = []

        if (len(bands[b]) > 0):
            # Open raster dataset
            for raster in bands[b]:
                raster_ds = gdal.Open(raster, gdal.GA_ReadOnly)
                # Extract band's data and transform into a numpy array
                bands_data = raster_ds.GetRasterBand(1).ReadAsArray()
                # static fix for clip mismatch problem
                timeseries.append(bands_data[:ref_shape[0], :ref_shape[1]])

            timeseries = np.array(timeseries)
            timeseries[~np.isfinite(timeseries)] = 0

            # Using quartiles, change to 0.05 quantiles later if load isn't too much...
            mean_ts = np.mean(timeseries, axis=0)  # mean
            q0 = np.quantile(timeseries, 0.00, axis=0)  # minimum
            q1 = np.quantile(timeseries, 0.25, axis=0)  # first quantile
            q2 = np.quantile(timeseries, 0.50, axis=0)  # median
            q3 = np.quantile(timeseries, 0.75, axis=0)  # third quantile
            q4 = np.quantile(timeseries, 1.0, axis=0)  # maximum
            std = np.std(timeseries, axis=0)  # standard dev
            variance = np.sqrt(std)  # variance

            viz.createGeotiff(DST_FOLDER + b + "_mean.tiff", mean_ts, ref_dss,
                              gdal.GDT_Float32)
            viz.createGeotiff(DST_FOLDER + b + "_q0.tiff", q0, ref_dss,
                              gdal.GDT_Float32)
            viz.createGeotiff(DST_FOLDER + b + "_q1.tiff", q1, ref_dss,
                              gdal.GDT_Float32)
            viz.createGeotiff(DST_FOLDER + b + "_q2.tiff", q2, ref_dss,
                              gdal.GDT_Float32)
            viz.createGeotiff(DST_FOLDER + b + "_q3.tiff", q3, ref_dss,
                              gdal.GDT_Float32)
            viz.createGeotiff(DST_FOLDER + b + "_q4.tiff", q4, ref_dss,
                              gdal.GDT_Float32)
            viz.createGeotiff(DST_FOLDER + b + "_var.tiff", variance, ref_dss,
                              gdal.GDT_Float32)
def main(argv):
    parser = argparse.ArgumentParser(description='Trains a xgboost model.')
    parser.add_argument("--roads",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate OSM roads")
    parser.add_argument("--fselect",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate feature selection roads")

    args = parser.parse_args()

    road_flag = args.roads
    selector_flag = args.fselect

    if road_flag:
        print("Using roads...")

    obj = 'binary:hinge' if args.roads else 'multi:softmax'

    real_start = time.time()
    train_size = int(19386625 * 0.2)
    X, y, X_test, y_test = data.load(train_size,
                                     normalize=False,
                                     balance=False,
                                     osm_roads=road_flag)

    start = time.time()

    forest = xgb.XGBClassifier(
        colsample_bytree=0.7553707061597048,
        gamma=5,
        gpu_id=0,
        learning_rate=0.2049732654267658,
        max_depth=8,
        min_child_weight=1,
        max_delta_step=9.075685204314162,
        n_estimators=1500,
        n_jobs=4,
        objective=obj,  # binary:hinge if binary classification
        predictor='gpu_predictor',
        tree_method='gpu_hist')

    if selector_flag:
        print("Feature importances running...")
        selector = fselector.Fselector(forest, mode="elastic", thold=0.25)
        transformer = selector.select(X, y)

        print("Transforming data...")
        X = transformer.transform(X)
        X_test = transformer.transform(X_test)

    print("Fitting data...")
    forest.fit(X, y)

    end = time.time()
    elapsed = end - start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    y_pred = forest.predict(X_test)

    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Kappa: {kappa}')
    print(classification_report(y_test, y_pred))
    print(confusion_matrix(y_test, y_pred))

    dump(forest, '../sensing_data/models/boosted.joblib')
    print("Saved model to disk")

    # Testing trash
    X, y, shape = data.load_prediction(ratio=1,
                                       normalize=False,
                                       osm_roads=road_flag)

    start_pred = time.time()
    # batch test
    X_h = X[:len(X) // 2]
    X_h1 = X[len(X) // 2:]

    forest.get_booster().set_param('predictor', 'cpu_predictor')

    print("Predict 0%...")
    y_pred = forest.predict_proba(X_h)
    print("Predict 50%...")
    y_pred2 = forest.predict_proba(X_h1)
    print("Predict 100%...")

    y_pred_proba = np.concatenate((y_pred, y_pred2))
    y_pred_classes = np.array(
        [np.argmax(yi, axis=-1) + 1 for yi in tqdm(y_pred_proba)])
    print("Predict time: " + str(timedelta(seconds=time.time() - start_pred)))

    kappa = cohen_kappa_score(y, y_pred_classes)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred_classes))

    y_pred_classes_reshaped = y_pred_classes.reshape(shape)

    viz.createGeotiff(OUT_RASTER, y_pred_classes_reshaped, REF_FILE,
                      gdal.GDT_Byte)

    print("Creating uncertainty matrix...")
    start_matrix = time.time()

    y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 3))

    viz.createGeotiff(OUT_PROBA_RASTER + "estrutura.tiff",
                      y_pred_proba_reshaped[:, :,
                                            0], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff",
    #                   y_pred_proba_reshaped[:, :, 1], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "restante.tiff",
                      y_pred_proba_reshaped[:, :,
                                            1], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff",
                      y_pred_proba_reshaped[:, :,
                                            2], REF_FILE, gdal.GDT_Float32)

    end = time.time()
    elapsed = end - start_matrix
    print("Matrix creation time: " + str(timedelta(seconds=elapsed)))

    end = time.time()
    elapsed = end - real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))
コード例 #11
0
def main(argv):
    parser = argparse.ArgumentParser(description='Trains a xgboost model.')
    parser.add_argument("--roads",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate OSM roads")
    parser.add_argument("--fselect",
                        type=str_2_bool,
                        nargs='?',
                        const=True,
                        default=False,
                        help="Activate feature selection")

    args = parser.parse_args()

    road_flag = args.roads
    selector_flag = args.fselect

    if road_flag:
        print("Using roads...")

    if selector_flag:
        print("Using feature selection...")

    obj = 'binary:hinge'

    real_start = time.time()

    split_struct = False
    osm_roads = False

    train_size = int(19386625 * 0.2)
    # train_size = int(1607*1015*0.2)

    X_train, y_train, X_test, y_test, _, _, _ = data.load(
        train_size,
        map_classes=False,
        normalize=False,
        osm_roads=osm_roads,
        split_struct=split_struct,
        gt_raster='cos_new_gt_2015t.tiff')

    start = time.time()

    #XGB_binary_building = {'colsample_bytree': 0.7343021353976351, 'gamma': 0, 'learning_rate': 0.16313076998849083, 'max_delta_step': 8.62355770678575, 'max_depth': 8, 'min_child_weight': 3, 'n_estimators': 1500, 'predictor': 'cpu_predictor', 'tree_method': 'hist'}

    forest = xgb.XGBClassifier(
        colsample_bytree=0.7343021353976351,
        gamma=0,
        gpu_id=0,
        learning_rate=0.16313076998849083,
        max_depth=8,
        min_child_weight=3,
        max_delta_step=8.62355770678575,
        n_estimators=1500,
        n_jobs=-1,
        #objective=obj,  # binary:hinge if binary classification
        predictor='cpu_predictor',
        tree_method='hist')

    if selector_flag:
        print("Feature importances running...")
        # svm cant handle full training data
        x_train_feature, _, y_train_feature, _ = train_test_split(
            X_test, y_test, test_size=0, train_size=100_000)

        selector = fselector.Fselector(forest, mode="importances", thold=0.80)
        transformer = selector.select(x_train_feature, y_train_feature)

        features = transformer.get_support()
        # feature_names = data.get_features()
        # feature_names = feature_names[features]
        print(features)
        print("Transforming data...")
        print("Before: ", X_train.shape)
        X = transformer.transform(X)
        X_test = transformer.transform(X_test)
        print("After: ", X_train.shape)

    print("Fitting data...")
    forest.fit(X_train, y_train)

    end = time.time()
    elapsed = end - start
    print("Training time: " + str(timedelta(seconds=elapsed)))

    yt_pred = forest.predict(X_train)

    yt_pred[yt_pred > 0.5] = 1
    yt_pred[yt_pred <= 0.5] = 0

    kappa = cohen_kappa_score(y_train, yt_pred)
    print(f'Train Kappa: {kappa}')
    print(classification_report(y_train, yt_pred))

    y_pred = forest.predict(X_test)

    y_pred[y_pred > 0.5] = 1
    y_pred[y_pred <= 0.5] = 0

    kappa = cohen_kappa_score(y_test, y_pred)
    print(f'Validation Kappa: {kappa}')
    print(classification_report(y_test, y_pred))

    dump(forest, '../sensing_data/models/boosted_test_group1.joblib')
    print("Saved model to disk")

    # Testing trash
    X, y, shape = data.load_prediction(ratio=1,
                                       map_classes=False,
                                       normalize=False,
                                       osm_roads=osm_roads,
                                       split_struct=split_struct,
                                       gt_raster='cos_new_gt_2015t.tif')

    start_pred = time.time()
    # batch test
    X_h = X[:len(X) // 2]
    X_h1 = X[len(X) // 2:]

    forest.get_booster().set_param('predictor', 'cpu_predictor')

    print("Predict 0%...")
    y_pred = forest.predict(X_h)
    print("Predict 50%...")
    y_pred2 = forest.predict(X_h1)
    print("Predict 100%...")

    y_pred_classes = np.concatenate((y_pred, y_pred2))
    y_pred_classes[y_pred_classes > 0.5] = 1
    y_pred_classes[y_pred_classes <= 0.5] = 0

    print("Predict time: " + str(timedelta(seconds=time.time() - start_pred)))

    kappa = cohen_kappa_score(y, y_pred_classes)
    print(f'Kappa: {kappa}')
    print(classification_report(y, y_pred_classes))

    y_pred_classes_reshaped = y_pred_classes.reshape(shape)

    viz.createGeotiff(OUT_RASTER, y_pred_classes_reshaped, REF_FILE,
                      gdal.GDT_Byte)

    return

    print("Creating uncertainty matrix...")
    start_matrix = time.time()

    y_pred_proba_reshaped = y_pred_proba.reshape((shape[0], shape[1], 4))

    viz.createGeotiff(OUT_PROBA_RASTER + "estrutura_urbana.tiff",
                      y_pred_proba_reshaped[:, :,
                                            0], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "estrada.tiff",
                      y_pred_proba_reshaped[:, :,
                                            1], REF_FILE, gdal.GDT_Float32)
    # viz.createGeotiff(OUT_PROBA_RASTER + "outras.tiff",
    #                     y_pred_proba_reshaped[:, :, 2], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "natural.tiff",
                      y_pred_proba_reshaped[:, :,
                                            3], REF_FILE, gdal.GDT_Float32)
    viz.createGeotiff(OUT_PROBA_RASTER + "agua.tiff",
                      y_pred_proba_reshaped[:, :,
                                            4], REF_FILE, gdal.GDT_Float32)

    end = time.time()
    elapsed = end - start_matrix
    print("Matrix creation time: " + str(timedelta(seconds=elapsed)))

    end = time.time()
    elapsed = end - real_start
    print("Total run time: " + str(timedelta(seconds=elapsed)))