def test_nested_contained(self): ''' Test geometry surrounded by other geometries then contained by a larger geometry''' direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/' file_path = direc_path + '/nested.shp' df = fm.load_shapefile(file_path) df = merge_fully_contained(df) assert len(df) == 1
def test_regular_contained(self): ''' Test donut hole type case. Checks correct merge''' direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/' file_path = direc_path + '/regular.shp' df = fm.load_shapefile(file_path) # merge df = merge_fully_contained(df) assert len(df) == 1
def test_cols_to_add(self): ''' Check that columns sum correctly when in cols to add''' direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/' file_path = direc_path + '/regular.shp' df = fm.load_shapefile(file_path) df['value'] = df['value'].astype(float) # merge df = merge_fully_contained(df, cols_to_add=['value']) assert[0, 'value'] == 2
def image_classification(shp_path, img_path, num_regions, num_colors=False, out_path=False): '''Generate a dissolved boundary file of larger geometries according to how the geometry is colored in a corresponding image of the same geographic region. The image should be georeferened to the boundary. Also, the image should be cropped to the extents of the geometry. It is usually better to do this by hand because the autocropping algorithm sometimes stops because of a single pixel difference If there exists a one noncontiguous larger shape, then the number of regions should be one greater becausue the algorithm will split noncontiguous regions. We limit the number of samples to be 500 per geometry for speed purposes Arguments: shp_path: path to the shapefile the algorithm will be performed on img_path: path to the image we use for the classification. This should already be cropped to the boundaries of the geometry. This can be performed with the function cropped_border_image num_regions: the number of regions that should remain at the end of the algorithm. num_colors: The number of colors to reduce the image to. Sometimes helps if classification regions in the images are differently shades of the same color within a region. Defaul is no reduction out_path: path to save final dataframe if applicable. Default will not save Output: df_classified: dataframe with geometries classified into regions df: the original dataframe with color and region assignments ''' # Load image and shapefile img = if num_colors: img = si.reduce_colors(img, num_colors) img_arr = np.asarray(img) df = fm.load_shapefile(shp_path) # create a color series and region series in the dataframe df['color'] = pd.Series(dtype=object) df['region'] = pd.Series(dtype=object) # Get the boundaries of the geodataframe bounds = shp.ops.cascaded_union(list(df['geometry'])).bounds shp_xlen = bounds[2] - bounds[0] shp_ylen = bounds[3] - bounds[1] shp_xmin = bounds[0] shp_ymin = bounds[1] # Assign each polygon and assign its most common color for ix, row in df.iterrows(): poly = row['geometry'][ix, 'color'] = si.most_common_color(poly, img_arr, shp_xmin, shp_xlen, shp_ymin, shp_ylen, 500) # Assign each polygon with a certain color a region index for ix, color in enumerate(df['color'].unique()): df.loc[df['color'] == color, 'region'] = ix # Get different region ids regions = list(df['region'].unique()) # Create the classification dataframe df_classified = pd.DataFrame(columns=['region', 'geometry']) # Create classification geoemtries for each region for ix, region in enumerate(regions): df_region = df[df['region'] == region] polys = list(df_region['geometry'])[ix, 'geometry'] = shp.ops.cascaded_union(polys)[ix, 'region'] = region # Convert clasified dataframe into a geodataframe df_classified = gpd.GeoDataFrame(df_classified, geometry='geometry') # # Split noncontiguous regions and merge fully contained regions df_classified = sm.split_noncontiguous(df_classified) df_classified = sm.merge_fully_contained(df_classified) # # Merge regions until we have the correct number df_classified = sm.merge_to_right_number(df_classified, num_regions) # save file if necessary if out_path: fm.save_shapefile(df_classified, out_path) return df_classified, df
def clean_manual_classification(in_path, classification_col, out_path=False): '''Generate a dissolved boundary file of larger geometries after being given a geodataframe with smaller geometries assigned to a value designated by the classification column. Will auto-assign unassigned geometries using the greedy shared perimeters method. Will also split non-contiguous geometries and merge fully contained geometries Usually used when a user has manually classified census blocks into precincts and needs to clean up their work Arguments: in_path: path dataframe containing smaller geometries classification_col: name of colum in df that identifies which larger "group" each smaller geometry belongs to. out_path: path to save final dataframe file if applicable. Default is false and will not save ''' df = fm.load_shapefile(in_path) # obtain unique values in classification column class_ids = list(df[classification_col].unique()) # determine the number of larger "groups" num_classes = len(class_ids) # Check if there are any unassigned census blocks if None in class_ids: # decrement number of regions because nan is not an actual region num_classes -= 1 # Assign unassigned blocks a unique dummy name for i, _ in df[df[classification_col].isnull()].iterrows():[i, classification_col] = 'foobar' + str(i) # Update the classes to include the dummy groups class_ids = list(df[classification_col].unique()) # Dissolve the boundaries given the group assignments for each small geom df = sm.dissolve(df, classification_col) # Split noncontiguous geometries after the dissolve df = sm.split_noncontiguous(df) # Merge geometries fully contained in other geometries df = sm.merge_fully_contained(df) # Get the correct number of regions df_nan = df[df[classification_col].str.slice(0, 6) == 'foobar'] ixs_to_merge = df_nan.index.to_list() df = sm.merge_geometries(df, ixs_to_merge) # drop neighbor column and reset the indexes df = df.drop(columns=['neighbors']) df = df.reset_index(drop=True) # save file if necessary if out_path: fm.save_shapefile(df, out_path) return df