def get_y_vector(binary_file_path: str, smallest_window_size: tuple, percentage_threshold: float = 0.5, cached: bool = False) -> tuple: # TODO: Fix cache key. if cached: y_train = load_cache('y_train') if y_train is not None: return y_train dataset = gdal.Open(binary_file_path, gdal.GA_ReadOnly) array = dataset.ReadAsArray() array = np.min(array, 0) array = array[:, :, np.newaxis] binary_sat_image = SatelliteImage(dataset, array, MASK_BANDS) generator = CellGenerator(image=binary_sat_image, size=smallest_window_size) # Mask which covers the whole image (exploded back up to the real dims) real_mask = np.zeros(array.shape, dtype=np.uint8) # Y matrix in dims of the blocks y_matrix = np.zeros(generator.shape()) for window in generator: # for name, feature in iteritems(features.items): y = 0 unique, counts = np.unique(window.raw, return_counts=True) # total = np.sum(counts) # above_n = np.sum(counts[unique > median]) # below_n = total - above_n # percentage_above = above_n / total # if percentage_above > percentage_threshold: # y = 1 if unique[0] == 0: zeros = counts[0] non_zeros = np.sum(counts[1:]) if non_zeros / (zeros + non_zeros) > percentage_threshold: y = 1 else: y = 1 y_matrix[window.x, window.y] = y real_mask[window.x_range, window.y_range, 0] = y y_train = y_matrix.flatten() if cached: cache(y_train, "y_train") return y_train, real_mask
def test_padding(): image = load_image() generator = CellGenerator(image, (25, 25), length=(2, 180)) for cell in generator: assert cell.shape == (25, 25, 4) assert cell.super_cell((100, 100)).shape == (100, 100, 4)
def extract_features(features: FeatureSet, generator: CellGenerator, load_cached=True, image_name="", cpu_cnt=None): start = time.time() shape = generator.shape() shared_feature_matrix = None print("\n--- Calculating Feature vector: {} ---\n".format( (shape[0], shape[1]))) for name, feature in iteritems(features.items): cache_key = "feature-{feature}-window{window}-image-{image_name}".format( image_name=image_name, window=(generator.x_size, generator.y_size), feature=str(feature), ) feature_matrix = None if feature.cached: feature_matrix = load_cache(cache_key) if feature_matrix is None: feature_matrix = compute_feature(feature, generator, cpu_cnt) cache(feature_matrix, cache_key) if shared_feature_matrix is not None: shared_feature_matrix = np.append(shared_feature_matrix, feature_matrix, axis=2) else: shared_feature_matrix = feature_matrix # Dirty fix. Would be better to re-use the windows every time so that # the windows do not have to be recalculated # (generator can only be iterated over once) generator = CellGenerator(generator.image, (generator.x_size, generator.y_size)) end = time.time() print( "Elapsed time extract multiprocessing: {} minutes, start: {}, end: {}". format((end - start) / 60, start, end)) return shared_feature_matrix
def test_extract_features(): image = load_image() generator = CellGenerator(image, (25, 25), length=(5, 10)) features = FeatureSet() features.add(Pantex(windows=((25, 25), (50, 50), (100, 100)))) results = extract_features(features, generator) assert results.any()
def get_x_matrix(sat_image: SatelliteImage, image_name, feature_set, window_size=(25, 25), cached=True): # image_name = "17FEB16053453-M2AS_R1C2-056239125020_01_P010" # image_file = "/home/max/Documents/ai/scriptie/data/%s.TIF" % image_name feature_string = feature_set.string_presentation() cache_key = "X-{0}-{1}-{2}".format(image_name, str(window_size), feature_string) if cached: X = load_cache(cache_key) if X is not None: print("Loaded cached X matrix: {}".format(cache_key)) return X print("X matrix not cached: {}".format(cache_key)) # bands = WORLDVIEW2 # sat_image = SatelliteImage.load_from_file(image_file, bands) # Calculate PANTEX feature for satellite image # Calculates Z features, resulting dimensions is: # [M x N x Z], where 0,0,: are the features of the first block # In this case we have 1 feature per block start = time.time() generator = CellGenerator(image=sat_image, size=window_size) calculated_features = extract_features(feature_set, generator, load_cached=cached, image_name=image_name) end = time.time() delta = (end - start) print("Calculating multiprocessing im:{} took {} seconds block size: {}, ". format(image_name, delta, window_size)) if len(calculated_features.shape) == 3: nrows = calculated_features.shape[0] * calculated_features.shape[1] nfeatures = calculated_features.shape[2] X = calculated_features.reshape((nrows, nfeatures)) # Reshape if we only have one feature, as scikit learn always needs 2 dims if len(calculated_features.shape) == 2: X = calculated_features.ravel() X = X.reshape(-1, 1) cache(X, cache_key) return X
def plot_overlap(y_test, y_pred, image_name, image_full_path, test_sat_image, mask_full_path, main_window_size, current_time, results_path): # dataset = gdal.Open(mask_full_path, gdal.GA_ReadOnly) # array = dataset.ReadAsArray() # array = np.min(array, 0) # array = array[:, :, np.newaxis] # truth_mask = np.where(array > 0, 1, 0) # binary_sat_image = SatelliteImage.load_from_file(binary_file_path, bands=mask_bands) # binary_sat_image = SatelliteImage(dataset, array, MASK_BANDS) generator = CellGenerator(image=test_sat_image, size=main_window_size) # result_mask = np.zeros(array.shape, dtype=np.uint8) # result_mask = np.zeros(generator.shape()) # truth_mask = np.zeros(generator.shape()) # y_pred_im = y_pred[groups == im_num] # print("unique y_pred", np.unique(y_pred, return_counts=True)) # print(y_pred.shape) # print(y_pred) # print("Gen shape", generator.x_length, generator.y_length, generator.x_length * generator.y_length) # print("result mask shape", result_mask.shape) print("{} == {}".format(generator.x_length * generator.y_length, y_pred.shape)) full_scale_pred_mask = np.zeros( (test_image_loaded.shape[0], test_image_loaded.shape[1])) full_scale_truth_mask = np.zeros( (test_image_loaded.shape[0], test_image_loaded.shape[1])) gen_length = len(tuple(i for i in generator)) # print("{} == {}".format(gen_length, generator.x_length * generator.y_length)) # print("{} == {}".format(gen_length, y_pred.shape)) # print("{} == {}".format(gen_length, y_test.shape)) # assert(gen_length == generator.x_length * generator.y_length) generator = CellGenerator(image=test_sat_image, size=main_window_size) i = 0 y_expected = 0 for window in generator: # if i == generator.x_length * generator.y_length: # print("skipping", i, window.x, window.y) # continue # y = 0 # if i < y_pred.shape[0] >= i: # if y_pred[i] == 0: # y = 0 # if y_pred[i] == 1: # y = 255 # # y_matrix[window.x, window.y] = y # result_mask[window.x_range, window.y_range, 0] = y # i += 1 # if y > 0: # y_expected += 30 * 30 # truth_mask[window.x, window.y] = y_test[i] # result_mask[window.x, window.y] = y_pred[i] full_scale_pred_mask[window.x_range, window.y_range] = y_pred[i] full_scale_truth_mask[window.x_range, window.y_range] = y_test[i] i += 1 result_mask = np.reshape(y_pred, generator.shape()) truth_mask = np.reshape(y_test, generator.shape()) # print("{} == {}".format(y_expected, len(result_mask[result_mask > 0]))) # print("Total iterations", i) # print("Y_matrix counts", np.unique(y_matrix, return_counts=True)) # print("Counts:", np.unique(result_mask, return_counts=True)) # print("result_mask[1s]", len(result_mask[result_mask == 1])) # print("result_mask[0s]", len(result_mask[result_mask == 0])) ds, img, bands = load_from_file(image_full_path, WORLDVIEW3) img = normalize_image(img, bands) rgb_img = get_rgb_bands(img, bands) # grayscale = get_grayscale_image(img, bands) plt.figure() plt.axis('off') plt.imshow(rgb_img) # plt.imshow(np.zeros(rgb_img.shape)[:, :, 0], cmap='gray') # plt.imshow(grayscale, cmap='gray') show_mask = np.ma.masked_where(full_scale_pred_mask == 0, full_scale_pred_mask) plt.imshow(show_mask, cmap='jet', interpolation='none', alpha=1.0) # plt.title('Binary mask') plt.savefig("{}/classification_jaccard_results_{}_{}.png".format( results_path, image_name, current_time)) plt.show() plt.figure() plt.axis('off') plt.imshow(result_mask, cmap='jet', interpolation='none', alpha=1.0) plt.savefig("{}/classification_jaccard_mask_results_{}_{}.png".format( results_path, image_name, current_time)) plt.show() # print('Min {} Max {}'.format(result_mask.min(), result_mask.max())) # print('Len > 0: {}'.format(len(result_mask[result_mask > 0]))) # print('Len == 0: {}'.format(len(result_mask[result_mask == 0]))) jaccard_index_main_window_scale = jaccard_index_binary_masks( truth_mask, result_mask) jaccard_index_full_scale = jaccard_index_binary_masks( full_scale_truth_mask, full_scale_pred_mask) # print("Jaccard index: {}".format(jaccard_index_main_window_scale)) return jaccard_index_main_window_scale, jaccard_index_full_scale
ds, img, bands = load_from_file(image_file, WORLDVIEW3) img = normalize_image(img, bands) rgb_img = get_rgb_bands(img, bands) plt.figure() plt.imshow(rgb_img) plt.savefig(results_path + "/lacunarity_heatmap_image_{image_name}.png".format( image_name=image_name )) dataset = gdal.Open(mask_full_path, gdal.GA_ReadOnly) array = dataset.ReadAsArray() array = np.min(array, 0) array = array[:, :, np.newaxis] print(array.shape) binary_sat_image = SatelliteImage(dataset, array, MASK_BANDS) generator = CellGenerator(image=binary_sat_image, size=main_window_size) X = X.reshape(generator.shape()) print(X.shape) red_size = int(X.shape[0] / 20) X = X[red_size:X.shape[0]-red_size, red_size:X.shape[1]-red_size] X_mask = np.copy(X) X_mask[:, :] = True X_mask[red_size:X.shape[0]-red_size, red_size:X.shape[1]-red_size] = False X_mask = X_mask.astype(np.bool) # X[X_mask == True] = np.min(X) print(np.min(X), np.max(X)) print(np.unique(X, return_counts=True))
def test_generator(): image = load_image() generator = CellGenerator(image, (25, 25), length=(2, 5)) for cell in generator: assert cell.shape
def plot_overlap(y_pred, groups, im_num, image_name, mask_full_path, main_window_size, current_time, results_path): dataset = gdal.Open(mask_full_path, gdal.GA_ReadOnly) array = dataset.ReadAsArray() array = np.min(array, 0) array = array[:, :, np.newaxis] truth_mask = np.where(array > 0, 1, 0) # unique, counts = np.unique(array, return_counts=True) # median = np.median(unique) # array = np.where(array > 0, 1, 0) # binary_sat_image = SatelliteImage.load_from_file(binary_file_path, bands=mask_bands) binary_sat_image = SatelliteImage(dataset, array, MASK_BANDS) generator = CellGenerator(image=binary_sat_image, size=main_window_size) result_mask = np.zeros(array.shape, dtype=np.uint8) y_matrix = np.zeros(generator.shape()) y_pred_im = y_pred[groups == im_num] print("unique y_pred", np.unique(y_pred_im, return_counts=True)) print(y_pred_im.shape) print(y_pred_im) print("Gen shape", generator.x_length, generator.y_length) i = 0 for window in generator: # for name, feature in iteritems(features.items): y = 255 if y_pred_im[i] == 1 else 0 if y != 0: print(i, y) # unique, counts = np.unique(window.raw, return_counts=True) # # total = np.sum(counts) # # above_n = np.sum(counts[unique > median]) # # below_n = total - above_n # # percentage_above = above_n / total # # if percentage_above > percentage_threshold: # # y = 1 # # if unique[0] == 0: # zeros = counts[0] # non_zeros = np.sum(counts[1:]) # if non_zeros / (zeros + non_zeros) > percentage_threshold: # y = 255 # else: # y = 255 y_matrix[window.x, window.y] = y result_mask[window.x_range, window.y_range, 0] = y i += 1 ds, img, bands = load_from_file(image_file, WORLDVIEW3) img = normalize_image(img, bands) rgb_img = get_rgb_bands(img, bands) grayscale = get_grayscale_image(img, bands) plt.figure() plt.axis('off') plt.imshow(rgb_img) # plt.imshow(grayscale, cmap='gray') print("Counts:", np.unique(result_mask, return_counts=True)) binary_mask = result_mask show_mask = np.ma.masked_where(binary_mask == 0, binary_mask) plt.imshow(show_mask[:, :, 0], cmap='jet', interpolation='none', alpha=1.0) # plt.title('Binary mask') plt.savefig("{}/classification_results_{}_{}.png".format(results_path, image_name, current_time)) plt.show() plt.figure() plt.axis('off') plt.imshow(binary_mask[:, :, 0], cmap='jet', interpolation='none', alpha=1.0) plt.savefig("{}/classification_mask_results_{}_{}.png".format(results_path, image_name, current_time)) plt.show() print('Min {} Max {}'.format(binary_mask.min(), binary_mask.max())) print('Len > 0: {}'.format(len(binary_mask[binary_mask > 0]))) print('Len == 0: {}'.format(len(binary_mask[binary_mask == 0]))) jaccard_index = jaccard_index_binary_masks(truth_mask[:, :, 0], binary_mask[:, :, 0]) print("Jaccard index: {}".format(jaccard_index)) return jaccard_index
dataset = gdal.Open(out_file, gdal.GA_ReadOnly) # dataset = dataset[0, :, :] array = dataset.ReadAsArray() print(array.shape) array = np.min(array, 0) array = array[:, :, np.newaxis] truth_mask = np.where(array > 0, 1, 0) # unique, counts = np.unique(array, return_counts=True) # median = np.median(unique) # array = np.where(array > 0, 1, 0) # binary_sat_image = SatelliteImage.load_from_file(binary_file_path, bands=mask_bands) binary_sat_image = SatelliteImage(dataset, array, MASK_BANDS) generator = CellGenerator(image=binary_sat_image, size=smallest_window_size) result_mask = np.zeros(array.shape, dtype=np.uint8) y_matrix = np.zeros(generator.shape) for window in generator: # for name, feature in iteritems(features.items): y = 0 unique, counts = np.unique(window.raw, return_counts=True) # total = np.sum(counts) # above_n = np.sum(counts[unique > median]) # below_n = total - above_n # percentage_above = above_n / total # if percentage_above > percentage_threshold: # y = 1 if unique[0] == 0:
def plot_overlap(y_pred, image_name, image_full_path, mask_full_path, main_window_size, current_time, results_path): dataset = gdal.Open(mask_full_path, gdal.GA_ReadOnly) array = dataset.ReadAsArray() array = np.min(array, 0) array = array[:, :, np.newaxis] truth_mask = np.where(array > 0, 1, 0) # binary_sat_image = SatelliteImage.load_from_file(binary_file_path, bands=mask_bands) binary_sat_image = SatelliteImage(dataset, array, MASK_BANDS) generator = CellGenerator(image=binary_sat_image, size=main_window_size) result_mask = np.zeros(array.shape, dtype=np.uint8) y_matrix = np.zeros(generator.shape()) # y_pred_im = y_pred[groups == im_num] print("unique y_pred", np.unique(y_pred, return_counts=True)) print(y_pred.shape) print(y_pred) print("Gen shape", generator.x_length, generator.y_length, generator.x_length * generator.y_length) print("result mask shape", result_mask.shape) print("{} == {}".format(generator.x_length * generator.y_length, y_pred.shape)) i = 0 y_expected = 0 for window in generator: y = 0 if i < y_pred.shape[0] >= i: if y_pred[i] == 0: y = 0 if y_pred[i] == 1: y = 255 y_matrix[window.x, window.y] = y result_mask[window.x_range, window.y_range, 0] = y i += 1 if y > 0: y_expected += 30 * 30 print("{} == {}".format(y_expected, len(result_mask[result_mask > 0]))) print("Total iterations", i) print("Y_matrix counts", np.unique(y_matrix, return_counts=True)) print("Counts:", np.unique(result_mask, return_counts=True)) print("result_mask[255s]", len(result_mask[result_mask == 255])) print("result_mask[0s]", len(result_mask[result_mask == 0])) ds, img, bands = load_from_file(image_full_path, WORLDVIEW3) img = normalize_image(img, bands) rgb_img = get_rgb_bands(img, bands) grayscale = get_grayscale_image(img, bands) plt.figure() plt.axis('off') plt.imshow(rgb_img) # plt.imshow(np.zeros(rgb_img.shape)[:, :, 0], cmap='gray') # plt.imshow(grayscale, cmap='gray') binary_mask = result_mask show_mask = np.ma.masked_where(binary_mask == 0, binary_mask) plt.imshow(show_mask[:, :, 0], cmap='jet', interpolation='none', alpha=1.0) # plt.title('Binary mask') plt.savefig("{}/classification_jaccard_results_{}_{}.png".format( results_path, image_name, current_time)) plt.show() plt.figure() plt.axis('off') plt.imshow(binary_mask[:, :, 0], cmap='jet', interpolation='none', alpha=1.0) plt.savefig("{}/classification_jaccard_mask_results_{}_{}.png".format( results_path, image_name, current_time)) plt.show() print('Min {} Max {}'.format(binary_mask.min(), binary_mask.max())) print('Len > 0: {}'.format(len(binary_mask[binary_mask > 0]))) print('Len == 0: {}'.format(len(binary_mask[binary_mask == 0]))) jaccard_index = jaccard_index_binary_masks(truth_mask[:, :, 0], binary_mask[:, :, 0]) print("Jaccard index: {}".format(jaccard_index)) return jaccard_index
def compute_feature(feature, generator, cpu_cnt=None): print("\n--- Calculating feature: {} ---\n".format(feature)) start = time.time() shape = generator.shape() scales_feature_matrix = None # Calculate for different scales separately. for scale in feature.windows: # Prepare data for multiprocessing of individual features # Every feature needs 'initialize' option to work if hasattr(feature, 'initialize'): data = feature.initialize(generator, scale) else: raise ValueError("Initialize not implemented") end = time.time() print("Preparing data cells took {} seconds".format((end - start))) chunk_size = shape[0] if cpu_cnt is None: cpu_cnt = cpu_count() # Get chunk size if feature has that function if hasattr(feature, 'chunk_size'): chunk_size = feature.chunk_size(cpu_cnt, shape) windows_chunked = [ data[i:i + chunk_size] for i in range(0, len(data), chunk_size) ] total_chunks = len(windows_chunked) print("\nTotal chunks to compute: {}, chunk_size: {}".format( total_chunks, chunk_size)) p = Pool(cpu_cnt, maxtasksperchild=1) compute_chunk_f = partial(compute_chunk, feature=feature) processing_results = p.map(compute_chunk_f, windows_chunked, chunksize=1) p.close() p.join() # Load individual results of processing back into one matrix feature_length = processing_results[0][1].shape[1] feature_matrix = np.zeros((shape[0], shape[1], feature_length)) for coords, chunk_matrix in processing_results: load_results_into_matrix(feature_matrix, coords, chunk_matrix) if scales_feature_matrix is not None: scales_feature_matrix = np.append(scales_feature_matrix, feature_matrix, axis=2) else: scales_feature_matrix = feature_matrix # Dirty fix. Would be better to re-use the windows every time so that # the windows do not have to be recalculated # (generator can only be iterated over once) generator = CellGenerator(generator.image, (generator.x_size, generator.y_size)) return scales_feature_matrix