Esempio n. 1
0
    def test_geometry_merge(self):
        '''Ensure geometries merged correctly. Don't keep any columns
		Merge three files together then check against a known correct file'''

        folder = os.getcwd() + "/testing/test_data/merge_shapefiles/"

        # Get correct path and output path to check
        correct_str = "input_full.shp"
        correct_path = folder + correct_str

        # get the path to put the output
        out_direc = os.getcwd() + "/testing/debug/merge_shapefiles/"
        out_path = out_direc + "merge.shp"

        # Perform merge
        apply_merge([])

        # Load test and correct shapefiles
        test = fm.load_shapefile(out_path)
        correct = fm.load_shapefile(correct_path)

        # Perform geometry comparison
        test_poly = shp.ops.cascaded_union(list(test['geometry']))
        correct_poly = shp.ops.cascaded_union(list(correct['geometry']))
        assert correct_poly.equals(test_poly)

        # remove folder in debug
        shutil.rmtree(out_direc)
def test_simple_grid():
    ''' 3 x 3 grid that should dissolve into three columns. Check if dissolve
	function is working properly for this small example '''

    # load in correctly dissolved shapefile
    folder = "/testing/test_data/dissolve/"
    correct_path = os.getcwd() + folder + "dissolved_simple_correct.shp"
    correct = fm.load_shapefile(correct_path)

    # load in initial data and apply dissolve function
    input_path = os.getcwd() + folder + "test_dissolve_simple.shp"
    df = fm.load_shapefile(input_path)
    test = sm.dissolve(df, 'attribute')

    # Number of matches
    matches = 0

    # Check if we have three matches (double for loop is fine because n=3)
    for ix1, row1 in correct.iterrows():
        for ix2, row2 in test.iterrows():
            # Check if the geometries are equal
            if row1['geometry'].equals(row2['geometry']):
                matches += 1

    # Return true if each geometry matches
    assert matches == 3
Esempio n. 3
0
def frame(input_path, correct_path):
    ''' Input is a 3 x 3 grid. The bounding frame should be created around the
	extents of the input shapefile that is contiguous

	input_path: path to shapefile that we will be created a bounding frame around
	correct_path: path to correct bounding frame shapefile
	'''
    # load in correct bounding frame shapefile
    correct = fm.load_shapefile(correct_path)
    correct = gpd.read_file(correct_path)

    # load testing shapefile and create bounding frame shapefile
    df = fm.load_shapefile(input_path)
    created = sm.generate_bounding_frame(df)

    # Check if polygon created by correct_frame's and created_frame's interior
    # are equal

    # Get polygon created by the frame's interior
    ix = correct.index.values[0]
    correct_frame = correct.at[ix, 'geometry']
    correct_interior = Polygon(correct_frame.interiors[0])

    # Get polygon created by the bounds of the input
    ix = correct.index.values[0]
    created_frame = created.at[ix, 'geometry']
    created_interior = Polygon(created_frame.interiors[0])

    # Check equality between the two interiors
    assert correct_interior.equals(created_interior)
Esempio n. 4
0
def compare_shapefile_difference(shp_paths1, shp_paths2, verbose=False):
    '''
    Compare shapefiles to check how much difference is between them in terms
    of ratio of the first shapefile.

    A result of 0.90 ratio between the two shapefiles means that 90 percent
    of the first shapefile is NOT contained in the second shapefile. First path
    in list1 is compared to first list in path 2. Second path in list1 is
    compared to second path in list 2 and so on

    This is useful for comparing shapefiles received from local jurisdictions.

    Arguments:
        shp_paths1:
            LIST of paths to shapefiles to be compared

        shp_paths2:
            LIST of paths to shapefiles to compare

        verbose:
            whether to print the difference ratio as they
        are calculated

    Output:
        LIST of ratio of difference as described above for each shp pair.
        Returns false if the length of the lists are not the same
    '''

    # List of difference ratio to the first shapefile
    out = []

    # if list of shapefile lengths are not the same return false
    if len(shp_paths1) != len(shp_paths2):
        return False

    for ix in range(len(shp_paths1)):
        path1 = shp_paths1[ix]
        path2 = shp_paths2[ix]

        # Load in shapefiles
        shp1 = fm.load_shapefile(path1)
        shp2 = fm.load_shapefile(path2)

        # Get full geometries
        poly1 = shp.ops.cascaded_union(list(shp1['geometry']))
        poly2 = shp.ops.cascaded_union(list(shp2['geometry']))

        # calculate, store, and potentially print difference
        diff = poly2.difference(poly1).area
        out.append(diff)
        if verbose:
            name1 = path1.split('/')[-1]
            name2 = path2.split('/')[-1]
            print('Difference Between ' + name1 + ' and' + name2 + ': ' +
                  str(out[ix]))

    return out
def remove_geometries(path_delete, save_path, path_reference, thresh):
    '''	Delete geometries from a shapefile that does not have a percent area
		intersetion above a inputted threshold.

		Arguments:
			path_delete:
                path to shapefile that we are editing (deleting
				shapes without enough intersection)

			save_path:
                path to save edited shapefile after geometries have
				been removed from the path_delete shapefile. If false, we will
				not save

			path_reference:
                path to shapefile we will be comparing the
				intersection with. Intersections will be taken with respect
				to the union of all of these geometries

			thresh:
                fraction threshold required to keep a shape. If thresh is
				0.9 then any shape with an intersection ratio greater than or
				equal to 0.9 will remain and anything below will be deleted

		Output:
			edited dataframe with shapes removed
	'''
    # Load shapefiles
    df_del = fm.load_shapefile(path_delete)
    df_ref = fm.load_shapefile(path_reference)

    # Get full reference poly
    ref_poly = shp.ops.cascaded_union(list(df_ref['geometry']))

    # Get ratio for each element
    df_del['ratio'] = df_del['geometry'].apply(
        lambda x: x.intersection(ref_poly).area / x.area)

    # Filter out elements less than threshold
    df_del = df_del[df_del.ratio >= thresh]

    # drop ratio series
    df_del = df_del.drop(columns=['ratio'])

    # Save and return
    if save_path:
        fm.save_shapefile(df_del, save_path)

    return df_del
Esempio n. 6
0
def dissolve_by_attribute(in_path, dissolve_attribute, out_path=False):
    '''Remove boundaries according to attribute.

	Dissolve boundaries for shapefile(s) according to a given attribute. we will
	also check for contiguity after boundaries have been dissolved.

	Arguments:
		in_path:
			full path to input shapefile to be dissolved

		out_path:
			full path to save created shapefile

		disolve_attribute:
			attribute to dissolve boundaries by
	'''
    #  Generate dissolved shapefile
    df = fm.load_shapefile(in_path)
    df = sm.dissolve(df, dissolve_attribute)

    # Print potential errors
    sc.check_contiguity_and_contained(df, dissolve_attribute)

    # Save shapefile
    if out_path:
        fm.save_shapefile(df, out_path)

    return df
def apply_crs_test(filename, crs='epsg:4269', default=True):
	''' Apply coordinate reference system transform 

	Arguments:
		filename: name of testing shapefile
		crs: coordinate reference system to convert to'''

	# Initialize files for this specific test
	path = intitialize_test_files(filename)
	paths = [path]

	# perform transform_crs
	transform_crs(paths, crs)

	# Check that the projection is epsg:4269
	shp = fm.load_shapefile(path)

	assert shp.crs == {'init': crs}
	'''if default:
		assert shp.crs == {'init': crs}
	else:
		converted_3395_dict = {'lon_0': 0,
								'datum': 'WGS84',
								'y_0': 0,
								'no_defs': True,
								'proj': 'merc',
								'x_0': 0,
								'units': 'm',
								'lat_ts':0}
		assert shp.crs == converted_3395_dict'''

	# clean up testing folders
	clean_test_files()
def intitialize_test_files(filename):
	''' Initialize files to be edited for a given test 

	Argument:
		filename: file name of testing file with extension

	Output:
		path to shapefile to be edited'''

	# Get input data path given filename
	data_direc = os.getcwd() + "/testing/test_data/transform_crs/"
	data_path = data_direc + filename

	# Create directory to dump data into
	direc_path = os.getcwd() + "/testing/debug/transform_crs"
	if os.path.exists(direc_path):
		shutil.rmtree(direc_path)
	os.mkdir(direc_path)

	# get testing path
	test_path = direc_path + '/' + filename

	# copy file to our debugging directory
	shp = fm.load_shapefile(data_path)
	shp.to_file(test_path)

	# return testing path
	return test_path
Esempio n. 9
0
def load_dfs(target_name, source_name):
    ''' Load in target and source dataframe given the file names in test data 

	Output:
		target and source df'''

    # Load paths
    data_direc = os.getcwd() + "/testing/test_data/distribute_values/"
    target_path = data_direc + target_name
    source_path = data_direc + source_name

    # load and return
    df_target = fm.load_shapefile(target_path)
    df_source = fm.load_shapefile(source_path)

    return df_target, df_source
	def test_grid(self):
		''' Test for 2x2 box grid '''
		# load shp file
		direc_path = os.getcwd() + '/testing/test_data/calculate_shared_perimeters/'
		file_path = direc_path + 'grid.shp'
		df = fm.load_shapefile(file_path)

		# perform calculate shared perimeters
		df = calculate_shared_perimeters(df)

		# Check bottom left neighbors
		bot_left_dict = df.at[0, 'neighbors']
		assert bot_left_dict[1] == 1
		assert bot_left_dict[2] == 1

		# Check bottom right neighbors
		bot_right_dict = df.at[1, 'neighbors']
		assert bot_right_dict[0] == 1
		assert bot_right_dict[3] == 1

		# Check top left neighbors
		top_left_dict = df.at[2, 'neighbors']
		assert top_left_dict[0] == 1
		assert top_left_dict[3] == 1

		# Check top right neighbors
		top_right_dict = df.at[3, 'neighbors']
		assert top_right_dict[1] == 1
		assert top_right_dict[2] == 1
def load_dfs(large_name, small_name):
    ''' Load in large and small dataframe given the file names in test data 

	Output:
		large and small df'''

    # Load paths
    data_direc = os.getcwd() + "/testing/test_data/distribute_label/"
    large_path = data_direc + large_name
    small_path = data_direc + small_name

    # load and return
    df_large = fm.load_shapefile(large_path)
    df_small = fm.load_shapefile(small_path)

    return df_large, df_small
Esempio n. 12
0
def test_disaggregate_by_attribute():

    # Define Inputs
    test_data = "/testing/test_data/disaggregate_file/"
    test_data += "test_disaggregate_file.shp"
    shp_path = os.getcwd() + test_data
    disaggregate_attr = 'attribute'
    prefix = 'prefix_'
    suffix = '_suffix'

    # Create directory to dump data into
    direc_path = os.getcwd() + "/testing/debug/disaggregate_file"
    if os.path.exists(direc_path):
        shutil.rmtree(direc_path)
    os.mkdir(direc_path)

    # Perform function
    disaggregate_file(shp_path, disaggregate_attr, direc_path, prefix, suffix)

    # obtain test file and attributes
    df_test = fm.load_shapefile(shp_path)
    attr = list(set(df_test[disaggregate_attr]))

    # Perform Tests
    fold = folder_name(direc_path, attr, prefix, suffix)
    fname = file_name(direc_path, attr, prefix, suffix)
    shp = shapes(df_test, disaggregate_attr, direc_path, attr, prefix, suffix)

    # Delete folder in debugging if all tests are passed
    if fold and fname and shp:
        shutil.rmtree(direc_path)
Esempio n. 13
0
def disaggregate_file(shp_path,
                      disaggregate_attr,
                      direc_path,
                      prefix='',
                      suffix=''):
    '''
	Take a larger shapefile and disaggreagate it into smaller shapefiles
	according to an attribute. The directory and shapefile name will be
	prefix + disaggregate_attribute value + suffix.

	NOTE: direc_path SHOULD NOT END WITH '/'

	Example: Use to disaggregate statewide census block file to county census
	block files

	If available load in shp_path withh a pickle file rather than the actual
	shapefile. Loading in statewide census files takes a while

	Arguments:
		shp_path:
			path to shapefile to disaggregate

		disaggregate_attr:
			attribute to disaggregate on

		direc_path:
			path to directory to create subdirectory of smaller
			shapefiles for each unique value.

		prefix:
			string to put in front name of smaller shapefiles

		suffix:
			string to put behind name of smaller shapefiles
	'''

    # load shapefile
    df = fm.load_shapefile(shp_path)

    # Get unique elements of each attribute
    attributes = set(df[disaggregate_attr])

    # For each attribute create subdirectory, create smaller shapefile, and save
    for attr in attributes:

        # name of subdirectory and new shapefile
        name = prefix + attr + suffix
        subdirec = direc_path + '/' + name
        shp_name = name + '.shp'

        # create subdirectory
        if os.path.exists(subdirec):
            shutil.rmtree(subdirec)
        os.mkdir(subdirec)

        # create shapefile with the correct attributes
        df_attr = df[df[disaggregate_attr] == attr]
        df_attr = gpd.GeoDataFrame(df_attr, geometry='geometry')
        fm.save_shapefile(df_attr, subdirec + '/' + shp_name)
def perform_merge(ixs_to_merge, filename, cols_to_add=[]):
    '''Perform the merge of geometries'''
    # load
    direc_path = os.getcwd() + '/testing/test_data/merge_geometries/'
    file_path = direc_path + filename + '.shp'
    df = fm.load_shapefile(file_path)

    # merge
    return merge_geometries(df, ixs_to_merge, cols_to_add)
Esempio n. 15
0
def perform_merge(num):
    '''Perform the merge of geometries to "num" geometries remaining'''
    # load
    direc_path = os.getcwd() + '/testing/test_data/merge_to_right_number/'
    file_path = direc_path + 'right_number.shp'
    df = fm.load_shapefile(file_path)

    # merge
    return merge_to_right_number(df, num)
	def test_regular_contained(self):
		''' Test donut hole type case. Checks correct merge'''
		direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/'
		file_path = direc_path + '/regular.shp'
		df = fm.load_shapefile(file_path)

		# merge
		df = merge_fully_contained(df)

		assert len(df) == 1
Esempio n. 17
0
def test_contained():
	''' 3 x 3 grid that surrounds a single element. It should return that a 
	noncontigous element exists'''

	# load in testing shapefile
	folder = "/testing/test_data/noncontiguous_and_contained/"
	input_path = os.getcwd()  + folder + "test_contained.shp"
	df = fm.load_shapefile(input_path)

	assert len(sc.check_contiguity_and_contained(df, 'attribute')[1])
	def test_nested_contained(self):
		''' Test geometry surrounded by other geometries then contained by 
		a larger geometry'''
		direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/'
		file_path = direc_path + '/nested.shp'
		df = fm.load_shapefile(file_path)

		df = merge_fully_contained(df)

		assert len(df) == 1
	def test_cols_to_add(self):
		''' Check that columns sum correctly when in cols to add'''
		direc_path = os.getcwd() + '/testing/test_data/merge_fully_contained/'
		file_path = direc_path + '/regular.shp'
		df = fm.load_shapefile(file_path)
		df['value'] = df['value'].astype(float)

		# merge
		df = merge_fully_contained(df, cols_to_add=['value'])

		assert df.at[0, 'value'] == 2
	def test_four_pieces(self):
		'''Correctly splits a geometry with four noncontiguous pieces'''

		# load
		direc_path = os.getcwd() + '/testing/test_data/split_noncontiguous/'
		file_path = direc_path + '/four_pieces.shp'
		df = fm.load_shapefile(file_path)

		# Split
		df = split_noncontiguous(df)

		# Check
		assert len(df) == 4
Esempio n. 21
0
def shapes(df_test, disaggregate_attr, direc_path, attr, prefix, suffix):
    ''' Check that correct shapefiles were created for each attribute '''
    # Check if shapes are equal
    for a in attr:
        correct = df_test[df_test[disaggregate_attr] == a]
        correct_poly = shp.ops.cascaded_union(list(correct['geometry']))

        folder = direc_path + '/' + prefix + a + suffix
        name = folder + '/' + prefix + a + suffix + '.shp'
        test = fm.load_shapefile(name)
        test_poly = shp.ops.cascaded_union(list(test['geometry']))

        assert correct_poly.equals(test_poly)
    return True
	def test_gap(self):
		''' Test contiguity on two shapes that share no border '''

		# Load shp file
		direc_path = os.getcwd() + '/testing/test_data/real_rook_contiguity/'
		file_path = direc_path + 'gap.shp'
		df = fm.load_shapefile(file_path)

		# perform real rook contiguity
		df = real_rook_contiguity(df)

		# Check that neighbor lists are empty
		assert [] == df.at[0, 'neighbors']
		assert [] == df.at[1, 'neighbors']
	def test_retain_cols(self):
		'''Retains_cols keeps specified values of columns'''

		# load
		direc_path = os.getcwd() + '/testing/test_data/split_noncontiguous/'
		file_path = direc_path + '/two_pieces.shp'
		df = fm.load_shapefile(file_path)

		# Split
		df = split_noncontiguous(df, ['value1', 'value2'])

		# Check
		assert df.at[0, 'value1'] == '1'
		assert df.at[0, 'value2'] == '2'
		assert df.at[1, 'value1'] == '1'
		assert df.at[1, 'value2'] == '2'
Esempio n. 24
0
    def test_keep_columns_default(self):
        '''Check that only the desired columns are remaining after the merge'''

        # Correct columns
        correct_cols = ['col1', 'col2', 'col3', 'geometry']

        apply_merge('all')

        # get the path to put the output
        out_direc = os.getcwd() + "/testing/debug/merge_shapefiles/"
        out_path = out_direc + "merge.shp"

        # Load test and check columns
        test = fm.load_shapefile(out_path)
        assert set(test.columns) == set(correct_cols)

        # remove folder in debug
        shutil.rmtree(out_direc)
def transform_crs(shp_paths, crs='epsg:4269'):
    '''
    Update the coordinate refernce system for a set of shapefiles

    Arguments:
        shp_paths:
            LIST of paths to shapefiles to be edited

        crs:
            the coordinate reference system to convert to. Default is above

    Output:
        None, but the original file will be edited and updated
    '''

    # Iterate over all paths
    for path in shp_paths:
        # load, add crs, and save
        df = fm.load_shapefile(path)

        df = fm.set_CRS(df, crs)
        fm.save_shapefile(df, path)
	def test_small_border(self):
		''' Test rook contiguity on 2x2 grid when there is a small border
		between the top right and bottom left shapes'''
		
		# Load shp file
		direc_path = os.getcwd() + '/testing/test_data/real_rook_contiguity/'
		file_path = direc_path + 'small_border.shp'
		df = fm.load_shapefile(file_path)

		# perform real rook contiguity
		df = real_rook_contiguity(df)

		# Check bottom left neighbors
		bot_left_neighbors_list = df.at[0, 'neighbors']
		assert 0 not in bot_left_neighbors_list
		assert 1 in bot_left_neighbors_list
		assert 2 in bot_left_neighbors_list
		assert 3 not in bot_left_neighbors_list

		# Check bottom right neighbors
		bot_right_neighbors_list = df.at[1, 'neighbors']
		assert 0 in bot_right_neighbors_list
		assert 1 not in bot_right_neighbors_list
		assert 2 in bot_right_neighbors_list
		assert 3 in bot_right_neighbors_list

		# Check top left neighbors
		top_left_neighbors_list = df.at[2, 'neighbors']
		assert 0 in top_left_neighbors_list
		assert 1 in top_left_neighbors_list
		assert 2 not in top_left_neighbors_list
		assert 3 in top_left_neighbors_list

		# Check top right neighbors
		top_right_neighbors_list = df.at[3, 'neighbors']
		assert 0 not in top_right_neighbors_list
		assert 1 in top_right_neighbors_list
		assert 2 in top_right_neighbors_list
		assert 3 not in top_right_neighbors_list
Esempio n. 27
0
def merge_shapefiles(paths_to_merge, out_path=False, keep_cols='all'):
    '''
	Combine multiple shapefiles into a single shapefile

	Arguments:
		paths_to_merge:
			LIST of path strings of shapfiles to merge

		out_path:
			path to save new shapefile

		keep_cols:
			default -> 'all' meeans to keep all, otherwise this input
			takes a LIST of which columns/attributes to keep

	'''
    # Initalize Output DatFarme
    df_final = pd.DataFrame()

    # Loop through paths and merge
    for path in paths_to_merge:

        # Load and append current dataframe
        df_current = fm.load_shapefile(path)
        df_final = df_final.append(df_current, ignore_index=True, sort=True)

    # reduce to only columns/attributes we are keeping
    if keep_cols == 'all':
        exclude_cols = []
    else:
        exclude_cols = list(set(df_final.columns) - set(keep_cols))

    # Save final shapefile
    df_final = gpd.GeoDataFrame(df_final, geometry='geometry')

    if out_path:
        fm.save_shapefile(df_final, out_path, exclude_cols)

    return df_final
	def test_grid_dict(self):
		''' Test rook contiguity on a 2x2 grid when function returns a dict'''
		
		# Load shp file
		direc_path = os.getcwd() + '/testing/test_data/real_rook_contiguity/'
		file_path = direc_path + 'grid.shp'
		df = fm.load_shapefile(file_path)

		# perform real rook contiguity
		df = real_rook_contiguity(df, struct_type='dict')

		# Check bottom left neighbors
		bot_left_neighbors_list = list(df.at[0, 'neighbors'].keys())
		assert 0 not in bot_left_neighbors_list
		assert 1 in bot_left_neighbors_list
		assert 2 in bot_left_neighbors_list
		assert 3 not in bot_left_neighbors_list

		# Check bottom right neighbors
		bot_right_neighbors_list = list(df.at[1, 'neighbors'].keys())
		assert 0 in bot_right_neighbors_list
		assert 1 not in bot_right_neighbors_list
		assert 2 not in bot_right_neighbors_list
		assert 3 in bot_right_neighbors_list

		# Check top left neighbors
		top_left_neighbors_list = list(df.at[2, 'neighbors'].keys())
		assert 0 in top_left_neighbors_list
		assert 1 not in top_left_neighbors_list
		assert 2 not in top_left_neighbors_list
		assert 3 in top_left_neighbors_list

		# Check top right neighbors
		top_right_neighbors_list = list(df.at[3, 'neighbors'].keys())
		assert 0 not in top_right_neighbors_list
		assert 1 in top_right_neighbors_list
		assert 2 in top_right_neighbors_list
		assert 3 not in top_right_neighbors_list
	def test_multiple_boundaries(self):
		''' Test when geometry has intersection in multiple locations'''
		# load shp file
		direc_path = os.getcwd() + '/testing/test_data/calculate_shared_perimeters/'
		file_path = direc_path + 'multiple_intersections.shp'
		df = fm.load_shapefile(file_path)

		# perform calculate shared perimeters
		df = calculate_shared_perimeters(df)

		# Check top piece
		top_dict = df.at[0, 'neighbors']
		assert top_dict[1] == 3
		assert top_dict[2] == 2

		# Check middle piece
		mid_dict = df.at[1, 'neighbors']
		assert mid_dict[0] == 3
		assert mid_dict[2] == 1

		# Check bottom piece
		bot_dict = df.at[2, 'neighbors']
		assert bot_dict[1] == 1
		assert bot_dict[0] == 2
Esempio n. 30
0
def create_bounding_frame(in_path, out_path=False):
    '''
	Create a bounding box around the extents of a shapefile.

	This will be used to overlay on top of a georeferenced image in GIS to
	allow for automated cropping in the algorithm that converts converting
	precinct images to shapefiles. Will usually use a census block shapfile to
	generate this bounding frame

	Arguments:
		in_path:
			full path to input shapefile to create bounding frame for

		out_path:
			full path to save bounding frame shapefile
	'''
    # Generate bounding frame and save
    df = fm.load_shapefile(in_path)
    bounding_frame_df = sm.generate_bounding_frame(df)

    if out_path:
        fm.save_shapefile(bounding_frame_df, out_path)

    return df