def create_vrt_bands(img_path, output_vrt, bands): image_ds = gdal.Open(img_path, gdal.GA_ReadOnly) vrt_bands = [] if bands is None: bands = range(1, (image_ds.RasterCount + 1)) for band in bands: vrt_filepath = dl_utils.new_filepath( img_path, suffix=str(band), ext='vrt', directory=dl_utils.basedir(output_vrt)) command = ["gdalbuildvrt"] command += ["-b", str(band)] command += [vrt_filepath] command += [img_path] subprocess.call(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) vrt_bands += [vrt_filepath] return vrt_bands
def prepare_chunks(image_file, band, chunk_x_size, in_nodata): image_ds = gdal.Open(image_file, gdal.GA_ReadOnly) x_size = image_ds.RasterXSize y_Size = image_ds.RasterYSize indexes = [] for xoff in range(0, x_size, chunk_x_size): if (xoff + chunk_x_size) > x_size: chunk_x_size = x_size - xoff suffix = 'b' + str(band) + '_' + 'x' + str(xoff) chunk_id = dl_utils.new_filepath(image_file, suffix=suffix, ext='', directory='') indexes.append({ 'id': chunk_id, 'image_file': image_file, 'band': band, 'xoff': xoff, 'yoff': 0, 'win_xsize': chunk_x_size, 'win_ysize': y_Size, 'nodata': in_nodata }) return indexes
def exec(model_dir, chips_dir, eval_size): tf.logging.set_verbosity(tf.logging.INFO) start_time = time.time() param_path = dl_utils.new_filepath('train_params.dat', directory=model_dir) params = dl_utils.load_object(param_path) tf.set_random_seed(params['seed']) if eval_size <= 0: eval_size = params['eval_size'] if chips_dir is None: chips_dir = params['chips_dir'] dat_path, exp_path, mtd_path = dl_utils.chips_data_files(chips_dir) train_data, eval_data, train_expect, eval_expect, chips_info = dl_utils.train_test_split( dat_path, exp_path, mtd_path, eval_size) print("Evaluating the model stored into " + model_dir) estimator = tf.estimator.Estimator(model_fn=md.description, params=params, model_dir=model_dir) do_evaluation(estimator, eval_data, eval_expect, 'EVALUATING', params)
def standardize(images, band, stats, output_dir, convert_int16, bands, chunk_x_size): for image_path in images: output_image_path = dl_utils.new_filepath(image_path, suffix = 'stand', \ directory=output_dir) print("Standardizing band " + str(band) + ' ' + image_path + " => " + output_image_path) if not Path(output_image_path).is_file(): dataType = gdal.GDT_Float32 nbands = len(bands) if convert_int16: dataType = gdal.GDT_Int16 output_ds = dl_utils.create_output_file(image_path, output_image_path, \ nbands, dataType) else: output_ds = gdal.Open(output_image_path, gdal.GA_Update) input_ds = gdal.Open(image_path, gdal.GA_ReadOnly) x_size = input_ds.RasterXSize y_Size = input_ds.RasterYSize for xoff in range(0, x_size, chunk_x_size): if (xoff + chunk_x_size) > x_size: chunk_x_size = x_size - xoff output_band_ds = output_ds.GetRasterBand(band) intput_band_ds = input_ds.GetRasterBand(band) band_data = intput_band_ds.ReadAsArray(xoff, 0, chunk_x_size, y_Size) band_data = band_data.astype('Float32') validPixels = (band_data != stats['nodata']) band_data[validPixels] = (band_data[validPixels] - stats['median']) / stats['std'] band_data[np.logical_not(validPixels)] = output_nodata if convert_int16: positive_outliers = (band_data >= 3.2760) negative_outliers = (band_data <= -3.2760) band_data[positive_outliers] = 3.2760 band_data[negative_outliers] = -3.2760 band_data[np.logical_not(validPixels)] = -3.2767 band_data = band_data * 10000 band_data = band_data.astype('Int16') output_band_ds.WriteArray(band_data, xoff, 0)
def calc_freq_histogram(images, band, in_nodata, output_dir, chunk_x_size): input_images = [] freq_histogram = None pool = multiprocessing.Pool() for image_path in images: chunks = prepare_chunks(image_path, band, chunk_x_size, in_nodata) chunks_result = pool.map(unique_values, chunks) freq_histogram_aux = chunks_result[0] for i in range(1, len(chunks_result)): chunk_uniq = list(chunks_result[i].keys()) chunk_count = list(chunks_result[i].values()) merge_unique_values(freq_histogram_aux, chunk_uniq, chunk_count) input_images.append(image_path) csvSuffix = 'b' + str(band) + '_byimgs' csvFreqFile = dl_utils.new_filepath(image_path, suffix = csvSuffix, \ ext='csv', directory=output_dir) export_csv(csvFreqFile, image_path, freq_histogram_aux) if freq_histogram is None: freq_histogram = freq_histogram_aux else: band_uniq_vals = list(freq_histogram_aux.keys()) band_count_vals = list(freq_histogram_aux.values()) merge_unique_values(freq_histogram, band_uniq_vals, band_count_vals) csvFreqFile = dl_utils.new_filepath('band'+str(band), suffix = 'all', \ ext='csv', directory=output_dir) export_csv(csvFreqFile, input_images, freq_histogram) pool.terminate() return freq_histogram
epochs = args.epochs seed = args.seed params = vars(args) tf.set_random_seed(seed) tf.logging.set_verbosity(tf.logging.INFO) dat_path, exp_path, mtd_path = dl_utils.chips_data_files(chips_dir) train_data, test_data, train_expect, test_expect, chips_info = dl_utils.train_test_split(dat_path, exp_path, mtd_path, eval_size) print("Memory size: %d Mb" % ( ((train_data.size * train_data.itemsize) + (test_data.size * test_data.itemsize))*0.000001 )) print("Train data shape: " + str(train_data.shape)) print("Train label shape: " + str(train_expect.shape)) print("Train params: " + str(params)) dl_utils.mkdirp(output_dir) param_path = dl_utils.new_filepath('train_params.dat', directory=output_dir) chips_info_path = dl_utils.new_filepath('chips_info.dat', directory=output_dir) dl_utils.save_object(param_path, params) dl_utils.save_object(chips_info_path, chips_info) estimator = tf.estimator.Estimator(model_fn=md.description, params=params, model_dir=output_dir) logging_hook = tf.train.LoggingTensorHook(tensors={'loss': 'cost/loss'}, every_n_iter=batch_size*4) for i in range(0, epochs): train_input = tf.estimator.inputs.numpy_input_fn(x={"data": train_data}, y=train_expect, batch_size=batch_size, num_epochs=1, shuffle=True) train_results = estimator.train(input_fn=train_input, steps=None, hooks=[]) test_input = tf.estimator.inputs.numpy_input_fn(x={"data": test_data}, y=test_expect, batch_size=batch_size, num_epochs=1, shuffle=False) test_results = estimator.evaluate(input_fn=test_input)
def exec(images, model_dir, output_dir, memory_percentage=40): tf.logging.set_verbosity(tf.logging.INFO) dl_utils.mkdirp(output_dir) param_path = dl_utils.new_filepath('train_params.dat', directory=model_dir) params = dl_utils.load_object(param_path) chips_info_path = dl_utils.new_filepath('chips_info.dat', directory=model_dir) chips_info = dl_utils.load_object(chips_info_path) for in_image in images: in_image_ds = gdal.Open(in_image) out_image = dl_utils.new_filepath(in_image, suffix='pred', ext='tif', directory=output_dir) out_image_ds = dl_utils.create_output_file(in_image, out_image) out_band = out_image_ds.GetRasterBand(1) estimator = tf.estimator.Estimator(model_fn=md.description, params=params, model_dir=model_dir) print(chips_info) _, dat_xsize, dat_ysize, dat_nbands = chips_info['dat_shape'] _, exp_xsize, exp_ysize, _ = chips_info['exp_shape'] pad_size = int((dat_xsize - exp_xsize) / 2) input_positions = dl_utils.get_predict_positions( in_image_ds.RasterXSize, in_image_ds.RasterYSize, exp_xsize, pad_size) cache_chip_data = [] cache_out_position = [] count = 0 for i in range(len(input_positions)): input_position = input_positions[i] try: chip_data, out_position = dl_utils.get_predict_data( in_image_ds, input_position, pad_size) except IOError as error: print(error) print('Ignoring this data block') continue cache_chip_data.append(chip_data) cache_out_position.append(out_position) print("Reading image " + in_image + ": memory percentage " + str(dl_utils.memory_percentage()) + "%") if (dl_utils.memory_percentage() > memory_percentage) or i == (len(input_positions) - 1): input_data = np.stack(cache_chip_data) del cache_chip_data cache_chip_data = [] input_data = input_data[:, :, :, 0:dat_nbands] tensors_to_log = {} print("Classifying image " + in_image + ": progress " + str(float(i) / len(input_positions) * 100) + "%") predict_input_fn = tf.estimator.inputs.numpy_input_fn( x={"data": input_data}, batch_size=params['batch_size'], shuffle=False) predict_results = estimator.predict(input_fn=predict_input_fn) print("Writing classification result in " + out_image) for chip_predict, out_position in zip(predict_results, cache_out_position): out_predict = dl_utils.discretize_values( chip_predict, 1, 0) out_x0 = out_position[0] out_xy = out_position[1] count = count + 1 out_band.WriteArray(out_predict[:, :, 0], out_x0, out_xy) out_band.FlushCache() del input_data del predict_results cache_out_position = [] gc.collect()