Beispiel #1
0
    def convert(self, in_file, out_file):
        omit = ['SHAPE_AREA', 'SHAPE_LEN']
        f_in = os.path.join(self.process_path, self.output_folder, in_file + ".shp")
        f_out = os.path.join(self.process_path, self.output_folder, out_file + ".json")
        with fiona.open(f_in) as source:
            # Use the recipe from the Shapely documentation:
            # http://toblerity.org/shapely/manual.html
            project = functools.partial(pyproj.transform,
                                        pyproj.Proj(**source.crs),
                                        pyproj.Proj(init='epsg:4326'))

            features = []
            for f in source:
                shape = shapely.geometry.shape(f['geometry'])
                projected_shape = shapely.ops.transform(project, shape)

                # Remove the properties we don't want
                props = f['properties']  # props is a reference
                for k in omit:
                    if k in props:
                        del props[k]

                feature = geojson.Feature(id=f['id'],
                                          geometry=projected_shape,
                                          properties=props)
                features.append(feature)

        fc = geojson.FeatureCollection(features)

        with open(f_out, 'w') as f:
            f.write(geojson.dumps(fc))
def shapefile_to_geojson(filename, transform=None):
    """Load the geojson and potentially transform the coordinates using transform, which
    goes from a list of (x,y) coordinates to a list of (x,y) coordinates in a new coordinate
    system. Or no transformation if is None."""
    shape_file = shapefile.Reader(filename)
    fields = shape_file.fields[1:]
    field_names = [field[0] for field in fields]
    features = []

    def transform_poly(poly):
        if all(
                len(item) == 2 and all(
                    isinstance(item, numbers.Number) for item in item)
                for item in poly):
            return transform(poly)
        return list(map(transform, poly))

    for shape_record in shape_file.shapeRecords():
        properties = dict(zip(field_names, shape_record.record))
        geometry = shape_record.shape.__geo_interface__
        if transform is not None:
            geometry['coordinates'] = transform_poly(geometry['coordinates'])

        features.append({
            'type': "Feature",
            'geometry': geometry,
            'properties': properties
        })
    return {"type": "FeatureCollection", "features": features}
def load_feature(image_id_csv, image_feature_h5):
    image_df = pd.read_csv(image_id_csv)
    image_list = image_df['ImageId'].tolist()

    features = []
    with tb.open_file(image_feature_h5, 'r') as f:
        for idx, image_id in enumerate(image_list):
            im = np.array(f.get_node('/' + image_id))
            features.append(im)
    features = np.array(features)

    return features
def segment_to_features(segment_mask, image):
    '''
    Extract features from polygons generated by segmentation.
    '''
    image_shape = image.shape[:2]
    poly_ids = np.unique(segment_mask)
    # print(image.shape)
    # print(poly_ids)
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        features = []
        for poly_id in poly_ids:
            poly_pixels = image[segment_mask == poly_id]
            poly_features = get_poly_features(poly_pixels)
            features.append(poly_features)
    return features, poly_ids
Beispiel #5
0
def geojson_from_mask(mask,
                      transform,
                      mode='polygon',
                      min_aspect_ratio=1.618,
                      min_area=None,
                      width_factor=0.5,
                      thickness=0.001):
    polys = geometries_from_mask(mask, transform, mode, min_aspect_ratio,
                                 min_area, width_factor, thickness)
    features = []
    for poly in polys:
        features.append({
            'type': 'Feature',
            'properties': {},
            'geometry': poly
        })
    return geojson.dumps(FeatureCollection(features))
def poly_to_features(polygons, image, class_type):
    '''
    Extract features for training image. 
    Polygons are extracted from training WKT then converted to masks.
    Featuers are statistical metrics of each polygon
    '''
    image_shape = image.shape[:2]
    poly_mask = poly_to_mask(polygons, image_shape, class_type)
    poly_ids = np.unique(poly_mask)
    poly_ids = poly_ids[poly_ids != 0]
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")

        features = []
        for poly_id in poly_ids:
            poly_pixels = image[poly_mask == poly_id]
            poly_features = get_poly_features(poly_pixels)
            features.append(poly_features)
    return features, poly_ids, poly_mask
Beispiel #7
0
def main():
    lidar = 'data/merged_dem.vrt'
    df = geopandas.read_file('data/test_reprojection.geojson')
    funcs = dict(relief=relief,
                 avg_slope=avg_slope,
                 planar_slope=planar_slope,
                 local_height=local_height,
                 local_relief=local_relief)

    features = []
    columns = sorted(funcs.keys())
    for _, point in df.iterrows():
        row = [funcs[key](point.geometry, lidar) for key in columns]
        row.append(point['flooded'])
        features.append(row)

    output = pd.DataFrame(features, columns=columns + ['flooded'])
    output.to_csv('features_and_class.csv')

    pred_features = generate_prediction_points(lidar)
    pred_features.to_file('prediction_features.geojson', driver='GeoJSON')
Beispiel #8
0
def create_geojson(FILENAME, final_filename):
    # list of GeoJSON feature objects (later this becomes a FeatureCollection)
    features = []

    if os.path.isfile(FILENAME):
        print('File that is being opened: ', FILENAME)
        dataset = rasterio.open(os.path.abspath(FILENAME))

        # Read the dataset's valid data mask as a ndarray.
        mask = dataset.dataset_mask()

        # Extract feature shapes and values from the array.
        for geom, val in rasterio.features.shapes(mask,
                                                  transform=dataset.transform):
            # val is the value of the raster feature corresponding to the shape
            # val = 0: no shape and val = 255 means shape (drone footage, aka tiles we want)
            if (val == 255.0):

                # Transform shapes from the dataset's own coordinate reference system to CRS84 (EPSG:4326) tbh idk what this means
                geom = rasterio.warp.transform_geom(dataset.crs,
                                                    'EPSG:4326',
                                                    geom,
                                                    precision=30)

                # store GeoJSON shapes to features list.
                # might have to put the probabilty value in properties ... tbd
                features.append(
                    Feature(geometry=geom, properties={'name': FILENAME}))

        # all features become a feature collection
        feature_collection = FeatureCollection(features)

        # Feature collection goes into a geojson file
        with open(final_filename, 'w') as f:
            dump(feature_collection, f)

        return feature_collection
    else:
        return None
def list_available_features(data_dir):
    """List all available images in a given directory and its
    subdirectories.

    Parameters
    ----------
    data_dir : str
        Path to the directory where images are stored.

    Returns
    -------
    features : list of tuple
        Available features as a list of tuples (label, path).
    """
    features = []
    for directory, _, files in os.walk(data_dir):
        files = [f for f in files if f.endswith('.tif')]
        for f in files:
            path = os.path.join(directory, f)
            label = f.replace('.tif', '')
            features.append((label, path))
    return features
def train_image_to_feature(image_id):
    # original image tif
    image = image_to_array(image_id)
    # Segmented training image mask
    image_segment_mask = createMask(image.shape)
    # Training image mask by classes
    image_class_mask = np.max(
        [image_to_train(image_id, c, 'M') for c in CLASSES], axis=0)

    start = time.time()
    # for each segment, set its class as the one which has most pixels
    segment_class_mask = np.zeros(image_segment_mask.shape)
    segment_ids = np.unique(image_segment_mask)
    labels = []
    features = []
    for segment_id in segment_ids:
        labels.append(
            most_common(image_class_mask[image_segment_mask == segment_id]))
        # Features
        segment_pixels = image[image_segment_mask == segment_id]
        features.append(get_poly_features(segment_pixels))

    return features, labels, segment_ids
def get_geojson_from_img(in_fname, out_fname):
    # From https://rasterio.readthedocs.io/en/latest/
    # Create a geojson file from an input filename
    with rasterio.open(in_fname) as dataset:
        # Read the dataset's valid data mask as a ndarray
        mask = dataset.dataset_mask()
        # Extract feature shapes and values from the array
        count = 0
        for geom, val in rasterio.features.shapes(mask,
                                                  transform=dataset.transform):
            coordinates = geom['coordinates']
            geojson_geom = {"type": "Polygon", "coordinates": coordinates}
            assert count == 0
            count += 1

        polygon = Polygon(coordinates)
        features = []
        features.append(Feature(geometry=polygon, properties={"": ""}))
        feature_collection = FeatureCollection(features)

        with open(out_fname, 'w') as f:
            dump(feature_collection, f)

        return geojson_geom
Beispiel #12
0
    stats = []

    for band in array:
        stats.append({
            'min': band.min(),
            'mean': band.mean(),
            'median': np.median(band),
            'max': band.max()})

    pprint(stats)

    for k, v in LU_CLASS.items():
        print('Creating geojson for {}'.format(v))
        features = []

        array_cla = np.where(array == k, array, 128)

        # Extract feature shapes and values from the array.
        for geom, val in rasterio.features.shapes(array_cla[0], mask, transform=src.transform):
            # Transform shapes from the dataset's own coordinate
            # reference system to CRS84 (EPSG:4326).
            geom = rasterio.warp.transform_geom(src.crs, 'EPSG:4326', geom)

            features.append(geojson.Feature(geometry=geojson.MultiPolygon([geom['coordinates']])))

        with open('{data}/geojson/LU_{cla}.geojson'.format(data=DATA_FOLDER, cla=v), 'w', encoding='utf8') as fp:
            geojson.dump(geojson.FeatureCollection(features), fp, sort_keys=True, ensure_ascii=False)

        print('Finish creating {}'.format(v))
def build_semantic_segmentation_training_data(
        window_radius,
        samples_per_response_per_site,
        feature_file_list,
        response_file_list,
        response_vector_flag=True,
        boundary_file_list=[],
        boundary_file_vector_flag=True,
        boundary_bad_value=0,
        internal_window_radius=None,
        center_random_offset_fraction=0.0,
        response_repeats=1,
        savename=None,
        nodata_maximum_fraction=0.5,
        response_minimum_fraction=0.0,
        fill_in_feature_data=True,
        global_scale_flag=None,
        local_scale_flag=None,
        nodata_value=-9999,
        random_seed=13,
        n_folds=10,
        verbose=False,
        ignore_projections=False):
    """ Main externally called function, transforms a list of feature/response rasters into 
    a set of training data at a specified window size

    Arguments:
    window_radius - determines the subset image size, which results as 2*window_radius  
    samples_per_response_per_site - either an integer (used for all sites) or a list of integers (one per site)
                       that designates the maximum number of samples to be pulled per response 
                       from that location.  If the number of responses is less than the samples
                       per site, than the number of responses available is used
    feature_file_list - file list of the feature rasters
    response_file_list - file list of the response rasters

    Keyword Arguments:
    response_vector_flag  - boolean
      A boolean indication of whether the response type is a vector or a raster (True for vector).
    boundary_file_list - list
      An optional list of boundary files for each feature/response file.
    boundary_file_vector_flag - boolean
      A boolean indication of whether the boundary file type is a vector or a raster (True for vector).
    internal_window_radius - int
      An inner image subset used to score the algorithm, and within which a response must lie to 
      be included in training data
    center_random_offset_fraction - float
      The fraction to randomly shuffle data from around response center.
    response_repeats - int 
      The number of times to re-caputre each response value from different offset fractions.
    savename - str
      The basename to save scaling and munged data, if None than nothing is saved.
    nodata_maximum_fraction - float
      The maximum fraction of nodata_values to allow in each training sample.
    response_minimum_fraction - float
      The minimum response fraction that must be in each training sample.
    fill_in_feature_data - boolean
      A flag to fill in missing data with a nearest neighbor interpolation.
    global_scale_flag - str
      A flag to apply global scaling (ie, scaling at the level of input rasters).
    local_scale_flag - str
      A flag to apply local scaling (ie, scaling at the individual image level).
      Options are:
        mean - mean center each image
        mean_std - mean center, and standard deviatio normalize each image
    nodata_value - float
      The value to ignore from the feature or response dataset.
    random_seed - int
      A random seed to set (for reproducability), set to None to not set a seed.
    n_folds - int
      The number of folds to set up for data training.
    verbose - boolean
      A flag indicating printout verbosity, set to True to get print outputs, False to have no printing.
    ignore_projections - boolean
      A flag to ignore projection differences between feature and response sets - use only if you 
      are sure the projections are really the same.

    Return: 
    features - 4d numpy array 
      Array of data features, arranged as n,y,x,p, where n is the number of samples, y is the 
      data y dimension (2*window_radius), x is the data x dimension (2*window_radius), 
      and p is the number of features.
    responses - 4d numpy array
      Array of of data responses, arranged as n,y,x,p, where n is the number of samples, y is the 
      data y dimension (2*window_radius), x is the data x dimension (2*window_radius), 
      and p is the number of responses.  Each slice in the response dimension is a binary array o
      f that response class value.
    training_fold - numpy array 
      An array indicating which sample belongs to which data fold, from 0 to n_folds-1.
  """

    if (random_seed is not None):
        np.random.seed(random_seed)

    check_data_matches(feature_file_list, response_file_list,
                       response_vector_flag, boundary_file_list,
                       boundary_file_vector_flag, ignore_projections)

    if (isinstance(samples_per_response_per_site, list)):
        if (len(samples_per_response_site) != len(feature_file_list)):
            raise Exception(
                'samples_per_response_per_site must equal feature_file_list length, or be an integer.'
            )

    if internal_window_radius is None:
        internal_window_radius = window_radius

    feature_scaling = get_feature_scaling(feature_file_list,
                                          global_scale_flag,
                                          nodata_value=nodata_value)
    if (savename is not None):
        np.savez(os.path.join(
            os.path.dirname(savename),
            os.path.basename(savename).split('.')[0] +
            '_global_feature_scaling'),
                 feature_scaling=feature_scaling)

    features = []
    responses = []
    repeat_index = []

    n_features = np.nan

    for _i in range(0, len(feature_file_list)):

        # open requisite datasets
        dataset = gdal.Open(feature_file_list[_i], gdal.GA_ReadOnly)
        if (np.isnan(n_features)):
            n_features = dataset.RasterCount
        feature = np.zeros(
            (dataset.RasterYSize, dataset.RasterXSize, dataset.RasterCount))
        for n in range(0, dataset.RasterCount):
            feature[:, :, n] = dataset.GetRasterBand(n + 1).ReadAsArray()

        if (response_vector_flag):
            response = rasterize_vector(response_file_list[_i],
                                        dataset.GetGeoTransform(),
                                        [feature.shape[0], feature.shape[1]])
        else:
            response = gdal.Open(
                response_file_list[_i]).ReadAsArray().astype(float)

        if (len(boundary_file_list) > 0):
            if (boundary_file_list[n] is not None):
                if (response_vector_flag):
                    mask = rasterize_vector(
                        boundary_file_list[_i], dataset.GetGeoTransform(),
                        [feature.shape[0], feature.shape[1]])
                else:
                    mask = gdal.Open(
                        boundary_file_list[_i]).ReadAsArray().astype(float)
                feature[mask == boundary_bad_value, :] = nodata_value
                response[mask == boundary_bad_value] = nodata_value

        if (verbose): print(feature.shape)
        # ensure nodata values are consistent
        if (not dataset.GetRasterBand(1).GetNoDataValue() is None):
            feature[feature == dataset.GetRasterBand(
                1).GetNoDataValue()] = nodata_value
        feature[np.isnan(feature)] = nodata_value
        feature[np.isinf(feature)] = nodata_value
        response[feature[:, :, 0] == nodata_value] = nodata_value
        feature[response == nodata_value, :] = nodata_value

        for n in range(0, feature.shape[2]):
            gd = feature[:, :, n] != nodata_value
            feature[gd, n] = feature[gd, n] - feature_scaling[n, 0]
            feature[gd, n] = feature[gd, n] / feature_scaling[n, 1]

        # finodata_value unique response values
        un_response = np.unique(response[response != nodata_value])

        if (isinstance(samples_per_response_per_site, list)):
            lsps = samples_per_response_per_site[_i]
        else:
            lsps = samples_per_response_per_site

        for ur in un_response:
            ls = np.sum(response == ur)
            lsps = min(np.sum(response == ur), lsps)

        # loop through each unique response
        for ur in un_response:
            coords = np.where(response == ur)
            if (verbose):
                print((len(coords[0]),
                       'response locations potentially available'))
            perm = np.random.permutation(len(coords[0]))
            coords = [coords[0][perm], coords[1][perm]]

            for repeat in range(0, response_repeats):
                if (center_random_offset_fraction != 0):
                    coords = [
                        coords[0] + np.random.randint(
                            -rint(
                                center_random_offset_fraction * window_radius),
                            rint(center_random_offset_fraction *
                                 window_radius), len(coords[0])),
                        coords[1] + np.random.randint(
                            -rint(
                                center_random_offset_fraction * window_radius),
                            rint(center_random_offset_fraction *
                                 window_radius), len(coords[1]))
                    ]

                # grab the specified number (up to) of values corresponding to the response of interest
                pos_len = 0
                n = 0
                while (pos_len < lsps and n < len(coords[0])):
                    d = feature[coords[0][n] - window_radius:coords[0][n] +
                                window_radius,
                                coords[1][n] - window_radius:coords[1][n] +
                                window_radius].copy()
                    if ((np.sum(d == nodata_value) <=
                         d.size * nodata_maximum_fraction)):
                        if (d.shape[0] == window_radius * 2
                                and d.shape[1] == window_radius * 2):
                            r = response[coords[0][n] -
                                         window_radius:coords[0][n] +
                                         window_radius, coords[1][n] -
                                         window_radius:coords[1][n] +
                                         window_radius].copy()
                            if (np.sum(r == ur) >
                                    r.size * response_minimum_fraction):
                                responses.append(r)
                                d = scale_image(d,
                                                local_scale_flag,
                                                nodata_value=nodata_value)
                                if (fill_in_feature_data == True):
                                    if (np.sum(d == nodata_value) > 0):
                                        d = fill_nearest_neighbor(d)

                                features.append(d)
                                repeat_index.append(repeat)
                                pos_len += 1
                            else:
                                if (verbose):
                                    print('skip from min thresh (' +
                                          str(np.sum(r == ur)) + ',' +
                                          str(r.size *
                                              response_minimum_fraction) + ')')
                        else:
                            if (verbose): print('skip for bad shape')

                    n += 1
                    if (n % 100 == 0 and verbose):
                        print((pos_len, n, len(features)))

    # stack images up
    features = np.stack(features)
    responses = np.stack(responses)
    repeat_index = np.stack(repeat_index)

    # randombly permute data to reshuffle everything
    perm = np.random.permutation(features.shape[0])
    features = features[perm, :]
    responses = responses[perm, :]
    repeat_index = repeat_index[perm]
    fold_assignments = np.zeros(responses.shape[0])

    for repeat in range(0, response_repeats):
        lfa = np.zeros(np.sum(repeat_index == repeat))
        for f in range(0, n_folds):
            lfa[rint(float(f) / float(n_folds) * len(fold_assignments)):rint(
                float(f + 1) / float(n_folds) * len(fold_assignments))] = f
        fold_assignments[repeat_index == repeat] = lfa
    del repeat_index

    # reshape images for the CNN
    features = features.reshape(
        (features.shape[0], features.shape[1], features.shape[2], n_features))
    responses = responses.reshape(
        (responses.shape[0], responses.shape[1], responses.shape[2], 1))

    if (verbose): print(('feature shape', features.shape))
    if (verbose): print(('response shape', responses.shape))

    if (savename is not None):
        np.savez(savename,
                 features=features,
                 responses=responses,
                 fold_assignments=fold_assignments)

    return features, responses, fold_assignments