def test_geometry_merge(self): '''Ensure geometries merged correctly. Don't keep any columns Merge three files together then check against a known correct file''' folder = os.getcwd() + "/testing/test_data/merge_shapefiles/" # Get correct path and output path to check correct_str = "input_full.shp" correct_path = folder + correct_str # get the path to put the output out_direc = os.getcwd() + "/testing/debug/merge_shapefiles/" out_path = out_direc + "merge.shp" # Perform merge apply_merge([]) # Load test and correct shapefiles test = fm.load_shapefile(out_path) correct = fm.load_shapefile(correct_path) # Perform geometry comparison test_poly = shp.ops.cascaded_union(list(test['geometry'])) correct_poly = shp.ops.cascaded_union(list(correct['geometry'])) assert correct_poly.equals(test_poly) # remove folder in debug shutil.rmtree(out_direc)
def test_simple_grid(): ''' 3 x 3 grid that should dissolve into three columns. Check if dissolve function is working properly for this small example ''' # load in correctly dissolved shapefile folder = "/testing/test_data/dissolve/" correct_path = os.getcwd() + folder + "dissolved_simple_correct.shp" correct = fm.load_shapefile(correct_path) # load in initial data and apply dissolve function input_path = os.getcwd() + folder + "test_dissolve_simple.shp" df = fm.load_shapefile(input_path) test = sm.dissolve(df, 'attribute') # Number of matches matches = 0 # Check if we have three matches (double for loop is fine because n=3) for ix1, row1 in correct.iterrows(): for ix2, row2 in test.iterrows(): # Check if the geometries are equal if row1['geometry'].equals(row2['geometry']): matches += 1 # Return true if each geometry matches assert matches == 3
def frame(input_path, correct_path): ''' Input is a 3 x 3 grid. The bounding frame should be created around the extents of the input shapefile that is contiguous input_path: path to shapefile that we will be created a bounding frame around correct_path: path to correct bounding frame shapefile ''' # load in correct bounding frame shapefile correct = fm.load_shapefile(correct_path) correct = gpd.read_file(correct_path) # load testing shapefile and create bounding frame shapefile df = fm.load_shapefile(input_path) created = sm.generate_bounding_frame(df) # Check if polygon created by correct_frame's and created_frame's interior # are equal # Get polygon created by the frame's interior ix = correct.index.values[0] correct_frame = correct.at[ix, 'geometry'] correct_interior = Polygon(correct_frame.interiors[0]) # Get polygon created by the bounds of the input ix = correct.index.values[0] created_frame = created.at[ix, 'geometry'] created_interior = Polygon(created_frame.interiors[0]) # Check equality between the two interiors assert correct_interior.equals(created_interior)
def compare_shapefile_difference(shp_paths1, shp_paths2, verbose=False): ''' Compare shapefiles to check how much difference is between them in terms of ratio of the first shapefile. A result of 0.90 ratio between the two shapefiles means that 90 percent of the first shapefile is NOT contained in the second shapefile. First path in list1 is compared to first list in path 2. Second path in list1 is compared to second path in list 2 and so on This is useful for comparing shapefiles received from local jurisdictions. Arguments: shp_paths1: LIST of paths to shapefiles to be compared shp_paths2: LIST of paths to shapefiles to compare verbose: whether to print the difference ratio as they are calculated Output: LIST of ratio of difference as described above for each shp pair. Returns false if the length of the lists are not the same ''' # List of difference ratio to the first shapefile out = [] # if list of shapefile lengths are not the same return false if len(shp_paths1) != len(shp_paths2): return False for ix in range(len(shp_paths1)): path1 = shp_paths1[ix] path2 = shp_paths2[ix] # Load in shapefiles shp1 = fm.load_shapefile(path1) shp2 = fm.load_shapefile(path2) # Get full geometries poly1 = shp.ops.cascaded_union(list(shp1['geometry'])) poly2 = shp.ops.cascaded_union(list(shp2['geometry'])) # calculate, store, and potentially print difference diff = poly2.difference(poly1).area out.append(diff) if verbose: name1 = path1.split('/')[-1] name2 = path2.split('/')[-1] print('Difference Between ' + name1 + ' and' + name2 + ': ' + str(out[ix])) return out
def remove_geometries(path_delete, save_path, path_reference, thresh): ''' Delete geometries from a shapefile that does not have a percent area intersetion above a inputted threshold. Arguments: path_delete: path to shapefile that we are editing (deleting shapes without enough intersection) save_path: path to save edited shapefile after geometries have been removed from the path_delete shapefile. If false, we will not save path_reference: path to shapefile we will be comparing the intersection with. Intersections will be taken with respect to the union of all of these geometries thresh: fraction threshold required to keep a shape. If thresh is 0.9 then any shape with an intersection ratio greater than or equal to 0.9 will remain and anything below will be deleted Output: edited dataframe with shapes removed ''' # Load shapefiles df_del = fm.load_shapefile(path_delete) df_ref = fm.load_shapefile(path_reference) # Get full reference poly ref_poly = shp.ops.cascaded_union(list(df_ref['geometry'])) # Get ratio for each element df_del['ratio'] = df_del['geometry'].apply( lambda x: x.intersection(ref_poly).area / x.area) # Filter out elements less than threshold df_del = df_del[df_del.ratio >= thresh] # drop ratio series df_del = df_del.drop(columns=['ratio']) # Save and return if save_path: fm.save_shapefile(df_del, save_path) return df_del
def dissolve_by_attribute(in_path, dissolve_attribute, out_path=False): '''Remove boundaries according to attribute. Dissolve boundaries for shapefile(s) according to a given attribute. we will also check for contiguity after boundaries have been dissolved. Arguments: in_path: full path to input shapefile to be dissolved out_path: full path to save created shapefile disolve_attribute: attribute to dissolve boundaries by ''' # Generate dissolved shapefile df = fm.load_shapefile(in_path) df = sm.dissolve(df, dissolve_attribute) # Print potential errors sc.check_contiguity_and_contained(df, dissolve_attribute) # Save shapefile if out_path: fm.save_shapefile(df, out_path) return df
def apply_crs_test(filename, crs='epsg:4269', default=True): ''' Apply coordinate reference system transform Arguments: filename: name of testing shapefile crs: coordinate reference system to convert to''' # Initialize files for this specific test path = intitialize_test_files(filename) paths = [path] # perform transform_crs transform_crs(paths, crs) # Check that the projection is epsg:4269 shp = fm.load_shapefile(path) assert shp.crs == {'init': crs} '''if default: assert shp.crs == {'init': crs} else: converted_3395_dict = {'lon_0': 0, 'datum': 'WGS84', 'y_0': 0, 'no_defs': True, 'proj': 'merc', 'x_0': 0, 'units': 'm', 'lat_ts':0} assert shp.crs == converted_3395_dict''' # clean up testing folders clean_test_files()
def intitialize_test_files(filename): ''' Initialize files to be edited for a given test Argument: filename: file name of testing file with extension Output: path to shapefile to be edited''' # Get input data path given filename data_direc = os.getcwd() + "/testing/test_data/transform_crs/" data_path = data_direc + filename # Create directory to dump data into direc_path = os.getcwd() + "/testing/debug/transform_crs" if os.path.exists(direc_path): shutil.rmtree(direc_path) os.mkdir(direc_path) # get testing path test_path = direc_path + '/' + filename # copy file to our debugging directory shp = fm.load_shapefile(data_path) shp.to_file(test_path) # return testing path return test_path
def load_dfs(target_name, source_name): ''' Load in target and source dataframe given the file names in test data Output: target and source df''' # Load paths data_direc = os.getcwd() + "/testing/test_data/distribute_values/" target_path = data_direc + target_name source_path = data_direc + source_name # load and return df_target = fm.load_shapefile(target_path) df_source = fm.load_shapefile(source_path) return df_target, df_source
def test_grid(self): ''' Test for 2x2 box grid ''' # load shp file direc_path = os.getcwd() + '/testing/test_data/calculate_shared_perimeters/' file_path = direc_path + 'grid.shp' df = fm.load_shapefile(file_path) # perform calculate shared perimeters df = calculate_shared_perimeters(df) # Check bottom left neighbors bot_left_dict = df.at[0, 'neighbors'] assert bot_left_dict[1] == 1 assert bot_left_dict[2] == 1 # Check bottom right neighbors bot_right_dict = df.at[1, 'neighbors'] assert bot_right_dict[0] == 1 assert bot_right_dict[3] == 1 # Check top left neighbors top_left_dict = df.at[2, 'neighbors'] assert top_left_dict[0] == 1 assert top_left_dict[3] == 1 # Check top right neighbors top_right_dict = df.at[3, 'neighbors'] assert top_right_dict[1] == 1 assert top_right_dict[2] == 1
def load_dfs(large_name, small_name): ''' Load in large and small dataframe given the file names in test data Output: large and small df''' # Load paths data_direc = os.getcwd() + "/testing/test_data/distribute_label/" large_path = data_direc + large_name small_path = data_direc + small_name # load and return df_large = fm.load_shapefile(large_path) df_small = fm.load_shapefile(small_path) return df_large, df_small
def test_disaggregate_by_attribute(): # Define Inputs test_data = "/testing/test_data/disaggregate_file/" test_data += "test_disaggregate_file.shp" shp_path = os.getcwd() + test_data disaggregate_attr = 'attribute' prefix = 'prefix_' suffix = '_suffix' # Create directory to dump data into direc_path = os.getcwd() + "/testing/debug/disaggregate_file" if os.path.exists(direc_path): shutil.rmtree(direc_path) os.mkdir(direc_path) # Perform function disaggregate_file(shp_path, disaggregate_attr, direc_path, prefix, suffix) # obtain test file and attributes df_test = fm.load_shapefile(shp_path) attr = list(set(df_test[disaggregate_attr])) # Perform Tests fold = folder_name(direc_path, attr, prefix, suffix) fname = file_name(direc_path, attr, prefix, suffix) shp = shapes(df_test, disaggregate_attr, direc_path, attr, prefix, suffix) # Delete folder in debugging if all tests are passed if fold and fname and shp: shutil.rmtree(direc_path)
def disaggregate_file(shp_path, disaggregate_attr, direc_path, prefix='', suffix=''): ''' Take a larger shapefile and disaggreagate it into smaller shapefiles according to an attribute. The directory and shapefile name will be prefix + disaggregate_attribute value + suffix. NOTE: direc_path SHOULD NOT END WITH '/' Example: Use to disaggregate statewide census block file to county census block files If available load in shp_path withh a pickle file rather than the actual shapefile. Loading in statewide census files takes a while Arguments: shp_path: path to shapefile to disaggregate disaggregate_attr: attribute to disaggregate on direc_path: path to directory to create subdirectory of smaller shapefiles for each unique value. prefix: string to put in front name of smaller shapefiles suffix: string to put behind name of smaller shapefiles ''' # load shapefile df = fm.load_shapefile(shp_path) # Get unique elements of each attribute attributes = set(df[disaggregate_attr]) # For each attribute create subdirectory, create smaller shapefile, and save for attr in attributes: # name of subdirectory and new shapefile name = prefix + attr + suffix subdirec = direc_path + '/' + name shp_name = name + '.shp' # create subdirectory if os.path.exists(subdirec): shutil.rmtree(subdirec) os.mkdir(subdirec) # create shapefile with the correct attributes df_attr = df[df[disaggregate_attr] == attr] df_attr = gpd.GeoDataFrame(df_attr, geometry='geometry') fm.save_shapefile(df_attr, subdirec + '/' + shp_name)
def perform_merge(ixs_to_merge, filename, cols_to_add=[]): '''Perform the merge of geometries''' # load direc_path = os.getcwd() + '/testing/test_data/merge_geometries/' file_path = direc_path + filename + '.shp' df = fm.load_shapefile(file_path) # merge return merge_geometries(df, ixs_to_merge, cols_to_add)
def perform_merge(num): '''Perform the merge of geometries to "num" geometries remaining''' # load direc_path = os.getcwd() + '/testing/test_data/merge_to_right_number/' file_path = direc_path + 'right_number.shp' df = fm.load_shapefile(file_path) # merge return merge_to_right_number(df, num)
def test_regular_contained(self): ''' Test donut hole type case. Checks correct merge''' direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/' file_path = direc_path + '/regular.shp' df = fm.load_shapefile(file_path) # merge df = merge_fully_contained(df) assert len(df) == 1
def test_contained(): ''' 3 x 3 grid that surrounds a single element. It should return that a noncontigous element exists''' # load in testing shapefile folder = "/testing/test_data/noncontiguous_and_contained/" input_path = os.getcwd() + folder + "test_contained.shp" df = fm.load_shapefile(input_path) assert len(sc.check_contiguity_and_contained(df, 'attribute')[1])
def test_nested_contained(self): ''' Test geometry surrounded by other geometries then contained by a larger geometry''' direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/' file_path = direc_path + '/nested.shp' df = fm.load_shapefile(file_path) df = merge_fully_contained(df) assert len(df) == 1
def test_cols_to_add(self): ''' Check that columns sum correctly when in cols to add''' direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/' file_path = direc_path + '/regular.shp' df = fm.load_shapefile(file_path) df['value'] = df['value'].astype(float) # merge df = merge_fully_contained(df, cols_to_add=['value']) assert df.at[0, 'value'] == 2
def test_four_pieces(self): '''Correctly splits a geometry with four noncontiguous pieces''' # load direc_path = os.getcwd() + '/testing/test_data/split_noncontiguous/' file_path = direc_path + '/four_pieces.shp' df = fm.load_shapefile(file_path) # Split df = split_noncontiguous(df) # Check assert len(df) == 4
def shapes(df_test, disaggregate_attr, direc_path, attr, prefix, suffix): ''' Check that correct shapefiles were created for each attribute ''' # Check if shapes are equal for a in attr: correct = df_test[df_test[disaggregate_attr] == a] correct_poly = shp.ops.cascaded_union(list(correct['geometry'])) folder = direc_path + '/' + prefix + a + suffix name = folder + '/' + prefix + a + suffix + '.shp' test = fm.load_shapefile(name) test_poly = shp.ops.cascaded_union(list(test['geometry'])) assert correct_poly.equals(test_poly) return True
def test_gap(self): ''' Test contiguity on two shapes that share no border ''' # Load shp file direc_path = os.getcwd() + '/testing/test_data/real_rook_contiguity/' file_path = direc_path + 'gap.shp' df = fm.load_shapefile(file_path) # perform real rook contiguity df = real_rook_contiguity(df) # Check that neighbor lists are empty assert [] == df.at[0, 'neighbors'] assert [] == df.at[1, 'neighbors']
def test_retain_cols(self): '''Retains_cols keeps specified values of columns''' # load direc_path = os.getcwd() + '/testing/test_data/split_noncontiguous/' file_path = direc_path + '/two_pieces.shp' df = fm.load_shapefile(file_path) # Split df = split_noncontiguous(df, ['value1', 'value2']) # Check assert df.at[0, 'value1'] == '1' assert df.at[0, 'value2'] == '2' assert df.at[1, 'value1'] == '1' assert df.at[1, 'value2'] == '2'
def test_keep_columns_default(self): '''Check that only the desired columns are remaining after the merge''' # Correct columns correct_cols = ['col1', 'col2', 'col3', 'geometry'] apply_merge('all') # get the path to put the output out_direc = os.getcwd() + "/testing/debug/merge_shapefiles/" out_path = out_direc + "merge.shp" # Load test and check columns test = fm.load_shapefile(out_path) assert set(test.columns) == set(correct_cols) # remove folder in debug shutil.rmtree(out_direc)
def transform_crs(shp_paths, crs='epsg:4269'): ''' Update the coordinate refernce system for a set of shapefiles Arguments: shp_paths: LIST of paths to shapefiles to be edited crs: the coordinate reference system to convert to. Default is above Output: None, but the original file will be edited and updated ''' # Iterate over all paths for path in shp_paths: # load, add crs, and save df = fm.load_shapefile(path) df = fm.set_CRS(df, crs) fm.save_shapefile(df, path)
def test_small_border(self): ''' Test rook contiguity on 2x2 grid when there is a small border between the top right and bottom left shapes''' # Load shp file direc_path = os.getcwd() + '/testing/test_data/real_rook_contiguity/' file_path = direc_path + 'small_border.shp' df = fm.load_shapefile(file_path) # perform real rook contiguity df = real_rook_contiguity(df) # Check bottom left neighbors bot_left_neighbors_list = df.at[0, 'neighbors'] assert 0 not in bot_left_neighbors_list assert 1 in bot_left_neighbors_list assert 2 in bot_left_neighbors_list assert 3 not in bot_left_neighbors_list # Check bottom right neighbors bot_right_neighbors_list = df.at[1, 'neighbors'] assert 0 in bot_right_neighbors_list assert 1 not in bot_right_neighbors_list assert 2 in bot_right_neighbors_list assert 3 in bot_right_neighbors_list # Check top left neighbors top_left_neighbors_list = df.at[2, 'neighbors'] assert 0 in top_left_neighbors_list assert 1 in top_left_neighbors_list assert 2 not in top_left_neighbors_list assert 3 in top_left_neighbors_list # Check top right neighbors top_right_neighbors_list = df.at[3, 'neighbors'] assert 0 not in top_right_neighbors_list assert 1 in top_right_neighbors_list assert 2 in top_right_neighbors_list assert 3 not in top_right_neighbors_list
def merge_shapefiles(paths_to_merge, out_path=False, keep_cols='all'): ''' Combine multiple shapefiles into a single shapefile Arguments: paths_to_merge: LIST of path strings of shapfiles to merge out_path: path to save new shapefile keep_cols: default -> 'all' meeans to keep all, otherwise this input takes a LIST of which columns/attributes to keep ''' # Initalize Output DatFarme df_final = pd.DataFrame() # Loop through paths and merge for path in paths_to_merge: # Load and append current dataframe df_current = fm.load_shapefile(path) df_final = df_final.append(df_current, ignore_index=True, sort=True) # reduce to only columns/attributes we are keeping if keep_cols == 'all': exclude_cols = [] else: exclude_cols = list(set(df_final.columns) - set(keep_cols)) # Save final shapefile df_final = gpd.GeoDataFrame(df_final, geometry='geometry') if out_path: fm.save_shapefile(df_final, out_path, exclude_cols) return df_final
def test_grid_dict(self): ''' Test rook contiguity on a 2x2 grid when function returns a dict''' # Load shp file direc_path = os.getcwd() + '/testing/test_data/real_rook_contiguity/' file_path = direc_path + 'grid.shp' df = fm.load_shapefile(file_path) # perform real rook contiguity df = real_rook_contiguity(df, struct_type='dict') # Check bottom left neighbors bot_left_neighbors_list = list(df.at[0, 'neighbors'].keys()) assert 0 not in bot_left_neighbors_list assert 1 in bot_left_neighbors_list assert 2 in bot_left_neighbors_list assert 3 not in bot_left_neighbors_list # Check bottom right neighbors bot_right_neighbors_list = list(df.at[1, 'neighbors'].keys()) assert 0 in bot_right_neighbors_list assert 1 not in bot_right_neighbors_list assert 2 not in bot_right_neighbors_list assert 3 in bot_right_neighbors_list # Check top left neighbors top_left_neighbors_list = list(df.at[2, 'neighbors'].keys()) assert 0 in top_left_neighbors_list assert 1 not in top_left_neighbors_list assert 2 not in top_left_neighbors_list assert 3 in top_left_neighbors_list # Check top right neighbors top_right_neighbors_list = list(df.at[3, 'neighbors'].keys()) assert 0 not in top_right_neighbors_list assert 1 in top_right_neighbors_list assert 2 in top_right_neighbors_list assert 3 not in top_right_neighbors_list
def test_multiple_boundaries(self): ''' Test when geometry has intersection in multiple locations''' # load shp file direc_path = os.getcwd() + '/testing/test_data/calculate_shared_perimeters/' file_path = direc_path + 'multiple_intersections.shp' df = fm.load_shapefile(file_path) # perform calculate shared perimeters df = calculate_shared_perimeters(df) # Check top piece top_dict = df.at[0, 'neighbors'] assert top_dict[1] == 3 assert top_dict[2] == 2 # Check middle piece mid_dict = df.at[1, 'neighbors'] assert mid_dict[0] == 3 assert mid_dict[2] == 1 # Check bottom piece bot_dict = df.at[2, 'neighbors'] assert bot_dict[1] == 1 assert bot_dict[0] == 2
def create_bounding_frame(in_path, out_path=False): ''' Create a bounding box around the extents of a shapefile. This will be used to overlay on top of a georeferenced image in GIS to allow for automated cropping in the algorithm that converts converting precinct images to shapefiles. Will usually use a census block shapfile to generate this bounding frame Arguments: in_path: full path to input shapefile to create bounding frame for out_path: full path to save bounding frame shapefile ''' # Generate bounding frame and save df = fm.load_shapefile(in_path) bounding_frame_df = sm.generate_bounding_frame(df) if out_path: fm.save_shapefile(bounding_frame_df, out_path) return df