def select_closest_flowline(self, similarity_cutoff=0.6): """Select closest flowline. Selects closest flowline from flowlines_data, including all evaluation information for the flowline. Although name similarity is not considered for selection it is used to document if a name matched flowline is available. Requires output from hydrolink_flowlines. """ if self.status == 1: df = utils.df_for_selection(self.flowlines_data) df = df.rename(columns={"lengthkm": "nhdplusv2 flowline length km", "reachcode": "nhdplusv2 flowline reachcode", "gnis_name": "nhdplusv2 flowline gnis name", "comid": "nhdplusv2 comid", "terminalflag": "nhdplusv2 terminal flag", "permanent_identifier": "nhdplusv2 flowline permanent identifier" }) self.total_count_flowlines = df.shape[0] self.name_match_in_buffer = df.loc[df['flowline name similarity'] >= similarity_cutoff].shape[0] df = df.nsmallest(1, 'meters from flowline', keep='all') if df.shape[0] > 1: self.message = f'multiple flowlines with same snap distance for id: {self.source_id}. Use name_match method.' self.error_handling() else: self.hydrolink_flowline = ((df.to_dict('records'))[0])
def select_closest_flowline_w_name_match(self, similarity_cutoff=0.6): """Select closest flowline with matching water name. HydroLink data to the closest NHD feature with a name similarity that meets the specified similarity_cutoff. If no flowlines meet similarity cutoff the method HydroLinks data to the closest NHD feature. Requires output from hydrolink_flowlines. """ if self.status == 1: df = utils.df_for_selection(self.flowlines_data) df = df.rename( columns={ "lengthkm": "nhdhr flowline length km", "reachcode": "nhdhr flowline reachcode", "gnis_name": "nhdhr flowline gnis name", "permanent_identifier": "nhdhr flowline permanent identifier" }) self.total_count_flowlines = df.shape[0] df_1 = df.loc[df['flowline name similarity'] == 1.0] df_similarity = df.loc[ df['flowline name similarity'] >= similarity_cutoff] # only 1 flowline has extact matching name if df_1.shape[0] == 1: self.hydrolink_flowline = ((df.to_dict('records'))[0]) # more than 1 flowline has exact matching name, grab closest of matching name flowlines elif df_1.shape[0] > 1: df_1 = df_1.nsmallest(1, 'meters from flowline', keep='all') if df_1.shape[0] > 1: self.message = f'multiple flowlines with same snap distance for id: {self.source_id}.' self.error_handling() else: self.hydrolink_flowline = ((df_1.to_dict('records'))[0]) # only one flowline has matching name meeting similarity cutoff elif df_1.shape[0] == 0 and df_similarity.shape[0] == 1: self.hydrolink_flowline = (( df_similarity.to_dict('records'))[0]) # select closest flowline meeting name match similarity cutoff elif df_1.shape[0] == 0 and df_similarity.shape[0] > 1: df_similarity = df_similarity.nsmallest(1, 'meters from flowline', keep='all') if df_similarity.shape[0] > 1: self.message = f'multiple flowlines with same snap distance for id: {self.source_id}.' self.error_handling() else: self.hydrolink_flowline = (( df_similarity.to_dict('records'))[0]) # no flowlines with name match, select closest else: df = df.nsmallest(1, 'meters from flowline', keep='all') if df.shape[0] > 1: self.message = f'multiple flowlines with same snap distance for id: {self.source_id}.' self.error_handling() else: self.hydrolink_flowline = ((df.to_dict('records'))[0])