def convert(self, in_file, out_file): omit = ['SHAPE_AREA', 'SHAPE_LEN'] f_in = os.path.join(self.process_path, self.output_folder, in_file + ".shp") f_out = os.path.join(self.process_path, self.output_folder, out_file + ".json") with fiona.open(f_in) as source: # Use the recipe from the Shapely documentation: # http://toblerity.org/shapely/manual.html project = functools.partial(pyproj.transform, pyproj.Proj(**source.crs), pyproj.Proj(init='epsg:4326')) features = [] for f in source: shape = shapely.geometry.shape(f['geometry']) projected_shape = shapely.ops.transform(project, shape) # Remove the properties we don't want props = f['properties'] # props is a reference for k in omit: if k in props: del props[k] feature = geojson.Feature(id=f['id'], geometry=projected_shape, properties=props) features.append(feature) fc = geojson.FeatureCollection(features) with open(f_out, 'w') as f: f.write(geojson.dumps(fc))
def shapefile_to_geojson(filename, transform=None): """Load the geojson and potentially transform the coordinates using transform, which goes from a list of (x,y) coordinates to a list of (x,y) coordinates in a new coordinate system. Or no transformation if is None.""" shape_file = shapefile.Reader(filename) fields = shape_file.fields[1:] field_names = [field[0] for field in fields] features = [] def transform_poly(poly): if all( len(item) == 2 and all( isinstance(item, numbers.Number) for item in item) for item in poly): return transform(poly) return list(map(transform, poly)) for shape_record in shape_file.shapeRecords(): properties = dict(zip(field_names, shape_record.record)) geometry = shape_record.shape.__geo_interface__ if transform is not None: geometry['coordinates'] = transform_poly(geometry['coordinates']) features.append({ 'type': "Feature", 'geometry': geometry, 'properties': properties }) return {"type": "FeatureCollection", "features": features}
def load_feature(image_id_csv, image_feature_h5): image_df = pd.read_csv(image_id_csv) image_list = image_df['ImageId'].tolist() features = [] with tb.open_file(image_feature_h5, 'r') as f: for idx, image_id in enumerate(image_list): im = np.array(f.get_node('/' + image_id)) features.append(im) features = np.array(features) return features
def segment_to_features(segment_mask, image): ''' Extract features from polygons generated by segmentation. ''' image_shape = image.shape[:2] poly_ids = np.unique(segment_mask) # print(image.shape) # print(poly_ids) with warnings.catch_warnings(): warnings.simplefilter("ignore") features = [] for poly_id in poly_ids: poly_pixels = image[segment_mask == poly_id] poly_features = get_poly_features(poly_pixels) features.append(poly_features) return features, poly_ids
def geojson_from_mask(mask, transform, mode='polygon', min_aspect_ratio=1.618, min_area=None, width_factor=0.5, thickness=0.001): polys = geometries_from_mask(mask, transform, mode, min_aspect_ratio, min_area, width_factor, thickness) features = [] for poly in polys: features.append({ 'type': 'Feature', 'properties': {}, 'geometry': poly }) return geojson.dumps(FeatureCollection(features))
def poly_to_features(polygons, image, class_type): ''' Extract features for training image. Polygons are extracted from training WKT then converted to masks. Featuers are statistical metrics of each polygon ''' image_shape = image.shape[:2] poly_mask = poly_to_mask(polygons, image_shape, class_type) poly_ids = np.unique(poly_mask) poly_ids = poly_ids[poly_ids != 0] with warnings.catch_warnings(): warnings.simplefilter("ignore") features = [] for poly_id in poly_ids: poly_pixels = image[poly_mask == poly_id] poly_features = get_poly_features(poly_pixels) features.append(poly_features) return features, poly_ids, poly_mask
def main(): lidar = 'data/merged_dem.vrt' df = geopandas.read_file('data/test_reprojection.geojson') funcs = dict(relief=relief, avg_slope=avg_slope, planar_slope=planar_slope, local_height=local_height, local_relief=local_relief) features = [] columns = sorted(funcs.keys()) for _, point in df.iterrows(): row = [funcs[key](point.geometry, lidar) for key in columns] row.append(point['flooded']) features.append(row) output = pd.DataFrame(features, columns=columns + ['flooded']) output.to_csv('features_and_class.csv') pred_features = generate_prediction_points(lidar) pred_features.to_file('prediction_features.geojson', driver='GeoJSON')
def create_geojson(FILENAME, final_filename): # list of GeoJSON feature objects (later this becomes a FeatureCollection) features = [] if os.path.isfile(FILENAME): print('File that is being opened: ', FILENAME) dataset = rasterio.open(os.path.abspath(FILENAME)) # Read the dataset's valid data mask as a ndarray. mask = dataset.dataset_mask() # Extract feature shapes and values from the array. for geom, val in rasterio.features.shapes(mask, transform=dataset.transform): # val is the value of the raster feature corresponding to the shape # val = 0: no shape and val = 255 means shape (drone footage, aka tiles we want) if (val == 255.0): # Transform shapes from the dataset's own coordinate reference system to CRS84 (EPSG:4326) tbh idk what this means geom = rasterio.warp.transform_geom(dataset.crs, 'EPSG:4326', geom, precision=30) # store GeoJSON shapes to features list. # might have to put the probabilty value in properties ... tbd features.append( Feature(geometry=geom, properties={'name': FILENAME})) # all features become a feature collection feature_collection = FeatureCollection(features) # Feature collection goes into a geojson file with open(final_filename, 'w') as f: dump(feature_collection, f) return feature_collection else: return None
def list_available_features(data_dir): """List all available images in a given directory and its subdirectories. Parameters ---------- data_dir : str Path to the directory where images are stored. Returns ------- features : list of tuple Available features as a list of tuples (label, path). """ features = [] for directory, _, files in os.walk(data_dir): files = [f for f in files if f.endswith('.tif')] for f in files: path = os.path.join(directory, f) label = f.replace('.tif', '') features.append((label, path)) return features
def train_image_to_feature(image_id): # original image tif image = image_to_array(image_id) # Segmented training image mask image_segment_mask = createMask(image.shape) # Training image mask by classes image_class_mask = np.max( [image_to_train(image_id, c, 'M') for c in CLASSES], axis=0) start = time.time() # for each segment, set its class as the one which has most pixels segment_class_mask = np.zeros(image_segment_mask.shape) segment_ids = np.unique(image_segment_mask) labels = [] features = [] for segment_id in segment_ids: labels.append( most_common(image_class_mask[image_segment_mask == segment_id])) # Features segment_pixels = image[image_segment_mask == segment_id] features.append(get_poly_features(segment_pixels)) return features, labels, segment_ids
def get_geojson_from_img(in_fname, out_fname): # From https://rasterio.readthedocs.io/en/latest/ # Create a geojson file from an input filename with rasterio.open(in_fname) as dataset: # Read the dataset's valid data mask as a ndarray mask = dataset.dataset_mask() # Extract feature shapes and values from the array count = 0 for geom, val in rasterio.features.shapes(mask, transform=dataset.transform): coordinates = geom['coordinates'] geojson_geom = {"type": "Polygon", "coordinates": coordinates} assert count == 0 count += 1 polygon = Polygon(coordinates) features = [] features.append(Feature(geometry=polygon, properties={"": ""})) feature_collection = FeatureCollection(features) with open(out_fname, 'w') as f: dump(feature_collection, f) return geojson_geom
stats = [] for band in array: stats.append({ 'min': band.min(), 'mean': band.mean(), 'median': np.median(band), 'max': band.max()}) pprint(stats) for k, v in LU_CLASS.items(): print('Creating geojson for {}'.format(v)) features = [] array_cla = np.where(array == k, array, 128) # Extract feature shapes and values from the array. for geom, val in rasterio.features.shapes(array_cla[0], mask, transform=src.transform): # Transform shapes from the dataset's own coordinate # reference system to CRS84 (EPSG:4326). geom = rasterio.warp.transform_geom(src.crs, 'EPSG:4326', geom) features.append(geojson.Feature(geometry=geojson.MultiPolygon([geom['coordinates']]))) with open('{data}/geojson/LU_{cla}.geojson'.format(data=DATA_FOLDER, cla=v), 'w', encoding='utf8') as fp: geojson.dump(geojson.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False) print('Finish creating {}'.format(v))
def build_semantic_segmentation_training_data( window_radius, samples_per_response_per_site, feature_file_list, response_file_list, response_vector_flag=True, boundary_file_list=[], boundary_file_vector_flag=True, boundary_bad_value=0, internal_window_radius=None, center_random_offset_fraction=0.0, response_repeats=1, savename=None, nodata_maximum_fraction=0.5, response_minimum_fraction=0.0, fill_in_feature_data=True, global_scale_flag=None, local_scale_flag=None, nodata_value=-9999, random_seed=13, n_folds=10, verbose=False, ignore_projections=False): """ Main externally called function, transforms a list of feature/response rasters into a set of training data at a specified window size Arguments: window_radius - determines the subset image size, which results as 2*window_radius samples_per_response_per_site - either an integer (used for all sites) or a list of integers (one per site) that designates the maximum number of samples to be pulled per response from that location. If the number of responses is less than the samples per site, than the number of responses available is used feature_file_list - file list of the feature rasters response_file_list - file list of the response rasters Keyword Arguments: response_vector_flag - boolean A boolean indication of whether the response type is a vector or a raster (True for vector). boundary_file_list - list An optional list of boundary files for each feature/response file. boundary_file_vector_flag - boolean A boolean indication of whether the boundary file type is a vector or a raster (True for vector). internal_window_radius - int An inner image subset used to score the algorithm, and within which a response must lie to be included in training data center_random_offset_fraction - float The fraction to randomly shuffle data from around response center. response_repeats - int The number of times to re-caputre each response value from different offset fractions. savename - str The basename to save scaling and munged data, if None than nothing is saved. nodata_maximum_fraction - float The maximum fraction of nodata_values to allow in each training sample. response_minimum_fraction - float The minimum response fraction that must be in each training sample. fill_in_feature_data - boolean A flag to fill in missing data with a nearest neighbor interpolation. global_scale_flag - str A flag to apply global scaling (ie, scaling at the level of input rasters). local_scale_flag - str A flag to apply local scaling (ie, scaling at the individual image level). Options are: mean - mean center each image mean_std - mean center, and standard deviatio normalize each image nodata_value - float The value to ignore from the feature or response dataset. random_seed - int A random seed to set (for reproducability), set to None to not set a seed. n_folds - int The number of folds to set up for data training. verbose - boolean A flag indicating printout verbosity, set to True to get print outputs, False to have no printing. ignore_projections - boolean A flag to ignore projection differences between feature and response sets - use only if you are sure the projections are really the same. Return: features - 4d numpy array Array of data features, arranged as n,y,x,p, where n is the number of samples, y is the data y dimension (2*window_radius), x is the data x dimension (2*window_radius), and p is the number of features. responses - 4d numpy array Array of of data responses, arranged as n,y,x,p, where n is the number of samples, y is the data y dimension (2*window_radius), x is the data x dimension (2*window_radius), and p is the number of responses. Each slice in the response dimension is a binary array o f that response class value. training_fold - numpy array An array indicating which sample belongs to which data fold, from 0 to n_folds-1. """ if (random_seed is not None): np.random.seed(random_seed) check_data_matches(feature_file_list, response_file_list, response_vector_flag, boundary_file_list, boundary_file_vector_flag, ignore_projections) if (isinstance(samples_per_response_per_site, list)): if (len(samples_per_response_site) != len(feature_file_list)): raise Exception( 'samples_per_response_per_site must equal feature_file_list length, or be an integer.' ) if internal_window_radius is None: internal_window_radius = window_radius feature_scaling = get_feature_scaling(feature_file_list, global_scale_flag, nodata_value=nodata_value) if (savename is not None): np.savez(os.path.join( os.path.dirname(savename), os.path.basename(savename).split('.')[0] + '_global_feature_scaling'), feature_scaling=feature_scaling) features = [] responses = [] repeat_index = [] n_features = np.nan for _i in range(0, len(feature_file_list)): # open requisite datasets dataset = gdal.Open(feature_file_list[_i], gdal.GA_ReadOnly) if (np.isnan(n_features)): n_features = dataset.RasterCount feature = np.zeros( (dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount)) for n in range(0, dataset.RasterCount): feature[:, :, n] = dataset.GetRasterBand(n + 1).ReadAsArray() if (response_vector_flag): response = rasterize_vector(response_file_list[_i], dataset.GetGeoTransform(), [feature.shape[0], feature.shape[1]]) else: response = gdal.Open( response_file_list[_i]).ReadAsArray().astype(float) if (len(boundary_file_list) > 0): if (boundary_file_list[n] is not None): if (response_vector_flag): mask = rasterize_vector( boundary_file_list[_i], dataset.GetGeoTransform(), [feature.shape[0], feature.shape[1]]) else: mask = gdal.Open( boundary_file_list[_i]).ReadAsArray().astype(float) feature[mask == boundary_bad_value, :] = nodata_value response[mask == boundary_bad_value] = nodata_value if (verbose): print(feature.shape) # ensure nodata values are consistent if (not dataset.GetRasterBand(1).GetNoDataValue() is None): feature[feature == dataset.GetRasterBand( 1).GetNoDataValue()] = nodata_value feature[np.isnan(feature)] = nodata_value feature[np.isinf(feature)] = nodata_value response[feature[:, :, 0] == nodata_value] = nodata_value feature[response == nodata_value, :] = nodata_value for n in range(0, feature.shape[2]): gd = feature[:, :, n] != nodata_value feature[gd, n] = feature[gd, n] - feature_scaling[n, 0] feature[gd, n] = feature[gd, n] / feature_scaling[n, 1] # finodata_value unique response values un_response = np.unique(response[response != nodata_value]) if (isinstance(samples_per_response_per_site, list)): lsps = samples_per_response_per_site[_i] else: lsps = samples_per_response_per_site for ur in un_response: ls = np.sum(response == ur) lsps = min(np.sum(response == ur), lsps) # loop through each unique response for ur in un_response: coords = np.where(response == ur) if (verbose): print((len(coords[0]), 'response locations potentially available')) perm = np.random.permutation(len(coords[0])) coords = [coords[0][perm], coords[1][perm]] for repeat in range(0, response_repeats): if (center_random_offset_fraction != 0): coords = [ coords[0] + np.random.randint( -rint( center_random_offset_fraction * window_radius), rint(center_random_offset_fraction * window_radius), len(coords[0])), coords[1] + np.random.randint( -rint( center_random_offset_fraction * window_radius), rint(center_random_offset_fraction * window_radius), len(coords[1])) ] # grab the specified number (up to) of values corresponding to the response of interest pos_len = 0 n = 0 while (pos_len < lsps and n < len(coords[0])): d = feature[coords[0][n] - window_radius:coords[0][n] + window_radius, coords[1][n] - window_radius:coords[1][n] + window_radius].copy() if ((np.sum(d == nodata_value) <= d.size * nodata_maximum_fraction)): if (d.shape[0] == window_radius * 2 and d.shape[1] == window_radius * 2): r = response[coords[0][n] - window_radius:coords[0][n] + window_radius, coords[1][n] - window_radius:coords[1][n] + window_radius].copy() if (np.sum(r == ur) > r.size * response_minimum_fraction): responses.append(r) d = scale_image(d, local_scale_flag, nodata_value=nodata_value) if (fill_in_feature_data == True): if (np.sum(d == nodata_value) > 0): d = fill_nearest_neighbor(d) features.append(d) repeat_index.append(repeat) pos_len += 1 else: if (verbose): print('skip from min thresh (' + str(np.sum(r == ur)) + ',' + str(r.size * response_minimum_fraction) + ')') else: if (verbose): print('skip for bad shape') n += 1 if (n % 100 == 0 and verbose): print((pos_len, n, len(features))) # stack images up features = np.stack(features) responses = np.stack(responses) repeat_index = np.stack(repeat_index) # randombly permute data to reshuffle everything perm = np.random.permutation(features.shape[0]) features = features[perm, :] responses = responses[perm, :] repeat_index = repeat_index[perm] fold_assignments = np.zeros(responses.shape[0]) for repeat in range(0, response_repeats): lfa = np.zeros(np.sum(repeat_index == repeat)) for f in range(0, n_folds): lfa[rint(float(f) / float(n_folds) * len(fold_assignments)):rint( float(f + 1) / float(n_folds) * len(fold_assignments))] = f fold_assignments[repeat_index == repeat] = lfa del repeat_index # reshape images for the CNN features = features.reshape( (features.shape[0], features.shape[1], features.shape[2], n_features)) responses = responses.reshape( (responses.shape[0], responses.shape[1], responses.shape[2], 1)) if (verbose): print(('feature shape', features.shape)) if (verbose): print(('response shape', responses.shape)) if (savename is not None): np.savez(savename, features=features, responses=responses, fold_assignments=fold_assignments) return features, responses, fold_assignments