def get_admin1_id(self, place: Loc): """Search for Admin1 entry""" lookup_target = place.admin1_name if len(lookup_target) == 0: return # Try each query until we find a match - each query gets less exact query_list = [ Query(where="name = ? AND country = ? AND f_code = ? ", args=(lookup_target, place.country_iso, 'ADM1'), result=Result.STRONG_MATCH), Query(where="name LIKE ? AND country = ? AND f_code = ?", args=(lookup_target, place.country_iso, 'ADM1'), result=Result.WILDCARD_MATCH), Query(where="name = ? AND f_code = ?", args=(lookup_target, 'ADM1'), result=Result.SOUNDEX_MATCH) ] row_list, res = self.db.process_query_list(from_tbl='main.admin', query_list=query_list) if len(row_list) > 0: place.admin1_id = row_list[0][Entry.ADM1] # Fill in Country ISO if place.country_iso == '': place.country_iso = row_list[0][Entry.ISO]
def run_test3() -> int: """ :param title: :param inp: :param res: :param feat: :return: """ title = TestScoring.test_values[TestScoring.test_idx][0] inp = TestScoring.test_values[TestScoring.test_idx][0] res = TestScoring.test_values[TestScoring.test_idx][1] feat = TestScoring.test_values[TestScoring.test_idx][2] in_place = Loc.Loc() in_place.original_entry = inp in_place.parse_place(inp, geo_files=TestScoring.geodata.geo_files) if in_place.country_name == '' and in_place.country_iso != '': in_place.country_name = TestScoring.geodata.geo_files.geodb.get_country_name( in_place.country_iso) res_place = Loc.Loc() res_place.original_entry = res res_place.parse_place(place_name=res, geo_files=TestScoring.geodata.geo_files) res_place.feature = feat if res_place.country_name == '' and res_place.country_iso != '': res_place.country_name = TestScoring.geodata.geo_files.geodb.get_country_name( res_place.country_iso) score = TestScoring.scoring.match_score(in_place, res_place) print( f'{score:.1f} [{in_place.original_entry.title()}] [{res_place.get_five_part_title()}]' ) return score
def country_is_valid(self, place: Loc) -> bool: # See if COUNTRY is present and is in the supported country list if place.country_iso == '': place.result_type = GeoKeys.Result.NO_COUNTRY is_valid = False elif place.country_iso not in self.geo_files.supported_countries_dct: self.logger.debug(f'[{place.country_iso}] not supported') place.result_type = GeoKeys.Result.NOT_SUPPORTED place.place_type = Loc.PlaceType.COUNTRY place.target = place.country_name is_valid = False else: is_valid = True return is_valid
def get_admin2_name(self, place: Loc) -> str: """Search for Admin1 entry""" lookup_target = place.admin2_id if len(lookup_target) == 0: return '' # Try each query until we find a match - each query gets less exact query_list = [ Query(where="admin2_id = ? AND country = ? AND admin1_id = ?", args=(lookup_target, place.country_iso, place.admin1_id), result=Result.STRONG_MATCH), Query(where="admin2_id = ? AND country = ?", args=(lookup_target, place.country_iso), result=Result.PARTIAL_MATCH) ] row_list, res = self.db.process_query_list(from_tbl='main.admin', query_list=query_list) if len(row_list) > 0: row = row_list[0] place.admin2_name = row[Entry.NAME] # self.logger.debug(f'adm2 nm = {place.admin2_name}') return place.admin2_name else: return ''
def load_handler(self): """ User pressed LOAD button to load an Ancestry file. Switch app display to the Review Widgets Load in file name and loop through file and find every PLACE entry and verify the entry against the geoname data """ self.w.original_entry.set_text("") self.w.remove_initialization_widgets() # Remove old widgets self.w.create_review_widgets( ) # Switch display from Initial widgets to main review widgets self.load_data() ged_path = self.cfg.get( "gedcom_path") # Get saved config setting for file # Load appropriate handler based on file type if ged_path is not None: if '.ged' in ged_path: self.out_suffix = "import.ged" self.ancestry_file_handler = Gedcom.Gedcom( in_path=ged_path, out_suffix=temp_suffix, cache_d=self.cache_dir, progress=None, geodata=self.geodata ) # Routines to open and parse GEDCOM file elif '.gramps' in ged_path: self.out_suffix = "import.gramps" # self.out_suffix = "csv" self.ancestry_file_handler = GrampsXml.GrampsXml( in_path=ged_path, out_suffix=temp_suffix, cache_d=self.cache_dir, progress=None, geodata=self.geodata ) # Routines to open and parse Gramps file else: self.out_suffix = 'unk.new.ged' messagebox.showwarning( f'UNKNOWN File type. Not .gramps and not .ged. \n\n{ged_path}') self.out_diag_file = open(ged_path + '.output.txt', 'w') self.in_diag_file = open(ged_path + '.input.txt', 'w') if self.ancestry_file_handler.error: TKHelper.fatal_error(f"File {ged_path} not found.") self.w.root.update() self.place: Loc.Loc = Loc.Loc( ) # Create an object to store info for the current Place # Add filename to Title path_parts = os.path.split(ged_path) # Extract filename from full path self.w.title.set_text(f'GEO FINDER - {path_parts[1]}') # Read file, find each place entry and handle it. self.w.user_entry.set_text("Scanning to previous position...") self.handle_place_entry()
def get_country_iso(self, place: Loc) -> str: """ Return ISO code for specified country""" lookup_target, modified = GeoKeys.country_normalize(place.country_name) if len(lookup_target) == 0: return '' # Try each query until we find a match - each query gets less exact query_list = [ Query(where="name = ? AND f_code = ? ", args=(lookup_target, 'ADM0'), result=Result.STRONG_MATCH), # Query(where="name LIKE ? AND f_code = ? ", # args=(self.create_wildcard(lookup_target), 'ADM0'), # result=Result.PARTIAL_MATCH) #, # Query(where="sdx = ? AND f_code = ? ", # args=(GeoKeys.get_soundex (lookup_target), 'ADM0'), # result=Result.PARTIAL_MATCH) ] row_list, result_code = self.db.process_query_list( from_tbl='main.admin', query_list=query_list) if len(row_list) > 0: res = row_list[0][Entry.ISO] if len(row_list) == 1: place.country_name = row_list[0][Entry.NAME] else: res = '' return res
def select_admin1(self, place: Loc): """Search for Admin1 entry""" lookup_target = place.admin1_name #pattern = self.create_wildcard(lookup_target) if len(lookup_target) == 0: return sdx = get_soundex(lookup_target) # self.logger.debug(f'sel adm1 patt={pattern} iso={place.country_iso}') # Try each query until we find a match - each query gets less exact query_list = [ Query(where="name = ? AND country = ? AND f_code = ? ", args=(lookup_target, place.country_iso, 'ADM1'), result=Result.STRONG_MATCH), Query(where="name LIKE ? AND country = ? AND f_code = ?", args=(lookup_target, place.country_iso, 'ADM1'), result=Result.WILDCARD_MATCH), Query(where="sdx = ? AND country = ? AND f_code=?", args=(sdx, place.country_iso, 'ADM1'), result=Result.SOUNDEX_MATCH) ] place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.admin', query_list=query_list)
def lookup_geoid(self, place: Loc) -> None: """Search for GEOID""" result_place: Loc = Loc.Loc() query_list = [ Query(where="geoid = ? ", args=(place.target, ), result=Result.STRONG_MATCH) ] place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.geodata', query_list=query_list) if len(place.georow_list) == 0: place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.admin', query_list=query_list) else: place.georow_list = place.georow_list[:1] place.result_type = GeoKeys.Result.STRONG_MATCH # Add search quality score to each entry for idx, rw in enumerate(place.georow_list): self.copy_georow_to_place(row=rw, place=result_place) update = list(rw) update.append(1) # Extend list row and assign score result_place.prefix = '' res_nm = result_place.format_full_nm(None) score = 0.0 # Remove items in prefix that are in result tk_list = res_nm.split(",") for item in tk_list: place.prefix = re.sub( item.strip(' ').lower(), '', place.prefix) update[GeoKeys.Entry.SCORE] = int(score * 100) place.georow_list[idx] = tuple(update)
def lookup_admin_dbid(self, place: Loc) -> None: """Search for DB ID""" query_list = [ Query(where="id = ? ", args=(place.target, ), result=Result.STRONG_MATCH) ] place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.admin', query_list=query_list)
def get_admin2_id(self, place: Loc): """Search for Admin1 entry""" lookup_target = place.admin2_name if len(lookup_target) == 0: return # Try each query until we find a match - each query gets less exact query_list = [] if len(place.admin1_id) > 0: query_list.append( Query(where= "name = ? AND country = ? AND admin1_id=? AND f_code=?", args=(lookup_target, place.country_iso, place.admin1_id, 'ADM2'), result=Result.STRONG_MATCH)) query_list.append( Query( where= "name LIKE ? AND country = ? and admin1_id = ? AND f_code=?", args=(lookup_target, place.country_iso, place.admin1_id, 'ADM2'), result=Result.WILDCARD_MATCH)) query_list.append( Query( where= "name LIKE ? AND country = ? and admin1_id = ? AND f_code=?", args=(self.create_county_wildcard(lookup_target), place.country_iso, place.admin1_id, 'ADM2'), result=Result.WILDCARD_MATCH)) else: query_list.append( Query(where="name = ? AND country = ? AND f_code=?", args=(lookup_target, place.country_iso, 'ADM2'), result=Result.STRONG_MATCH)) query_list.append( Query(where="name LIKE ? AND country = ? AND f_code=?", args=(lookup_target, place.country_iso, 'ADM2'), result=Result.WILDCARD_MATCH)) query_list.append( Query(where="name LIKE ? AND country = ? AND f_code=?", args=(self.create_county_wildcard(lookup_target), place.country_iso, 'ADM2'), result=Result.WILDCARD_MATCH)) row_list, res = self.db.process_query_list(from_tbl='main.admin', query_list=query_list) if len(row_list) > 0: row = row_list[0] place.admin2_id = row[Entry.ADM2]
def run_key_test(self, title: str, entry: str): print("*****TEST: {}".format(title)) place = Loc.Loc() place.parse_place(entry, TestCSV.geodata.geo_files) self.geodata.find_first_match(place.original_entry, place) place.original_entry = place.format_full_nm(None) TestCSV.csv.set_CSV_place_type(place) place.id = TestCSV.csv.get_csv_key(place) TestCSV.logger.debug(f'type={place.place_type}') place.set_place_type_text() #place_name = TestCSV.ancestry_file_handler.get_csv_name(place).title() return place.id
def update_rowlist_prefix(self, place: Loc.Loc): """ Set all the prefix values in the georow_list :param place: """ temp_place = Loc.Loc() tokens = place.original_entry.split(',') for idx, rw in enumerate(place.georow_list): update = list(rw) # Put unused fields into prefix self.geo_files.geodb.copy_georow_to_place(rw, temp_place) temp_place.prefix = '' nm = GeoKeys.search_normalize( temp_place.format_full_nm(self.geo_files.output_replace_dct), place.country_iso) # self.logger.debug(f'NAME ={nm}') place.prefix = '' for num, fld in enumerate(tokens[:2]): item = GeoKeys.search_normalize(fld, place.country_iso) add_item = False # self.logger.debug(f'item={item} ') if num == 0 and item not in nm: add_item = True if num == 1 and item not in nm and len(tokens) == 2: # We only add the second token if there are only 2 tokens add_item = True if '*' in item: # Don't add as prefix if item is a wildcard search add_item = False if add_item: if len(place.prefix) > 0: place.prefix += ' ' place.prefix += item.title() if len(place.prefix) > 0: place.prefix_commas = ', ' update[GeoKeys.Entry.PREFIX] = place.prefix # self.logger.debug(f'PREFIX={place.prefix} ') place.georow_list[idx] = tuple(update)
def select_country(self, place: Loc): """Search for Admin1 entry""" lookup_target = place.country_iso if len(lookup_target) == 0: return sdx = get_soundex(lookup_target) # Try each query until we find a match - each query gets less exact query_list = [ Query(where="country = ? AND f_code = ? ", args=(place.country_iso, 'ADM0'), result=Result.STRONG_MATCH), Query(where="sdx = ? AND f_code=?", args=(sdx, 'ADM0'), result=Result.SOUNDEX_MATCH) ] place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.admin', query_list=query_list)
def load_handler(self): # Load in list and display self.clear_display_list(self.tree) place = Loc.Loc() for item in sorted(self.dict): # get lat long replacement = self.dict[item] rep_token = replacement.split('@') if len(rep_token) < 2: self.logger.debug(f'blank item=[{item}] rep=[{replacement}]') continue place.target = rep_token[GEOID_TOKEN] self.geodb.lookup_geoid(place=place) if len(place.georow_list) > 0: # Copy geo row to Place self.geodb.copy_georow_to_place(row=place.georow_list[0], place=place) if 'oston' in item: self.logger.debug( f'{item} id={rep_token[GEOID_TOKEN]} state={place.admin1_id}' ) else: if len(place.target) == 0: place.clear() place.city1 = f'<DELETE>' else: place.clear() place.city1 = f'Database error for {replacement}' place.place_type = Loc.PlaceType.CITY # Get prefix if there was one if len(rep_token) > 2: place.prefix = rep_token[PREFIX_TOKEN] nm = place.format_full_nm(self.output_replace_dct) if len(place.prefix) > 0: line = f'[{place.prefix}]{place.prefix_commas}{nm}' else: line = f'{nm}' self.list_insert(self.tree, item, line)
def complete_csv(self): # Add location enclosures. Create if not there already. Then add as reference. self.logger.debug( '\n\n******** DONE - CREATE CSV ENCLOSURES *********') place = Loc.Loc() # There are separate dictionaries for each hierarchy (prefix, city, county, country). # We need to go through prefix table, then city, etc (e.g. reversed order) # Create Enclosure records for idx, tbl in reversed(list(enumerate(self.admin_table))): self.logger.debug(f'===TABLE {idx}===') for key in tbl: self.retrieve_csv_place(self.admin_table, self.geodata, place, key, idx) self.logger.debug(f'** CSV {key} {place.original_entry}') # Create enclosure for each node at this level self.create_enclosed_by(place) if self.csv_path is not None: self.csvfile = open(self.csv_path, "w", encoding='utf-8') self.logger.debug(f'CSV file {self.csv_path}') self.csvfile.write( 'Place,Title,Name,Type,latitude,longitude,enclosed_by\n') # List CSV self.logger.debug('*** OUTPUT TABLE ***') for idx, tbl in enumerate(self.admin_table): for key in tbl: # TODO row = tbl[key] #self.logger.debug(f'IDX={idx} {key} : {row}') self.output_row(row) if self.csv_path is not None: self.csvfile.close()
def advanced_search(self, place: Loc): """ Advanced search - support parameters for ISO and Feature class """ lookup_target = place.target if len(lookup_target) == 0: return pattern = self.create_wildcard(lookup_target) feature_pattern = self.create_wildcard(place.feature) self.logger.debug( f'Advanced Search. Targ=[{pattern}] feature=[{feature_pattern}]' f' iso=[{place.country_iso}] ') if len(place.feature) > 0: query_list = [ Query(where="name LIKE ? AND country LIKE ? AND f_code LIKE ?", args=(pattern, place.country_iso, feature_pattern), result=Result.PARTIAL_MATCH) ] else: query_list = [ Query(where="name LIKE ? AND country LIKE ?", args=(pattern, place.country_iso), result=Result.PARTIAL_MATCH) ] # Search main DB place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.geodata', query_list=query_list) # self.logger.debug(f'main Result {place.georow_list}') # Search admin DB admin_list, place.result_type = self.db.process_query_list( from_tbl='main.admin', query_list=query_list) place.georow_list.extend(admin_list)
def setUp(self) -> None: TestScoring.in_place: Loc.Loc = Loc.Loc() TestScoring.out_place: Loc.Loc = Loc.Loc() TestScoring.test_idx += 1
def lookup_place(self, place: Loc.Loc) -> []: """ Lookup a place in our geoname.org dictionary and update place with Geo_result with lat, long, District, etc The dictionary geo_result entry contains: Lat, Long, districtID (County or State or Province ID) There can be multiple entries if a city name isnt unique in a country """ result_place: Loc = Loc.Loc() self.start = time.time() place.result_type = Result.STRONG_MATCH #place.admin2_name, modified = GeoKeys.admin2_normalize(place.admin2_name, place.country_iso) if place.country_iso != '' and place.country_name == '': place.country_name = self.get_country_name(place.country_iso) # Lookup Place based on Place Type if place.place_type == Loc.PlaceType.ADMIN1: self.select_admin1(place) elif place.place_type == Loc.PlaceType.ADMIN2: if place.admin1_id == '': self.get_admin1_id(place=place) self.select_admin2(place) #if len(place.georow_list) == 0: # Try search with some text replacements #place.admin2_name, modified = GeoKeys.admin2_normalize(place.admin2_name, place.country_iso) #if modified: # self.select_admin2(place) elif place.place_type == Loc.PlaceType.COUNTRY: self.select_country(place) elif place.place_type == Loc.PlaceType.ADVANCED_SEARCH: self.advanced_search(place) else: # Lookup as City if place.admin2_id == '': self.get_admin2_id(place=place) self.select_city(place) # nm = place.original_entry # self.logger.debug(f'Search results for {place.target} pref[{place.prefix}]') min_score = 9999 # Add search quality score to each entry for idx, rw in enumerate(place.georow_list): self.copy_georow_to_place(row=rw, place=result_place) if len(place.prefix) > 0 and result_place.prefix == '': result_place.prefix = ' ' result_place.prefix_commas = ',' else: result_place.prefix = '' score = self.match.match_score(inp_place=place, res_place=result_place) if score < min_score: min_score = score # Convert row tuple to list and extend so we can assign score update = list(rw) update.append(1) update[GeoKeys.Entry.SCORE] = score place.georow_list[idx] = tuple( update) # Convert back from list to tuple # Remove items in prefix that are in result tk_list = result_place.original_entry.split(",") if place.place_type != Loc.PlaceType.ADVANCED_SEARCH: for item in tk_list: place.prefix = re.sub( item.strip(' ').lower(), '', place.prefix) if place.result_type == Result.STRONG_MATCH and len(place.prefix) > 0: place.result_type = Result.PARTIAL_MATCH if place.result_type == Result.STRONG_MATCH and min_score > 10: place.result_type = Result.PARTIAL_MATCH
def copy_georow_to_place(self, row, place: Loc): # Copy data from DB row into Place # self.logger.debug(row) place.admin1_id = '' place.admin2_id = '' place.city1 = '' place.country_iso = str(row[Entry.ISO]) place.country_name = str(self.get_country_name(row[Entry.ISO])) place.lat = row[Entry.LAT] place.lon = row[Entry.LON] place.feature = str(row[Entry.FEAT]) place.geoid = str(row[Entry.ID]) if place.feature == 'ADM0': self.place_type = Loc.PlaceType.COUNTRY pass elif place.feature == 'ADM1': place.admin1_id = row[Entry.ADM1] self.place_type = Loc.PlaceType.ADMIN1 elif place.feature == 'ADM2': place.admin1_id = row[Entry.ADM1] place.admin2_id = row[Entry.ADM2] self.place_type = Loc.PlaceType.ADMIN2 else: place.admin1_id = row[Entry.ADM1] place.admin2_id = row[Entry.ADM2] place.city1 = row[Entry.NAME] self.place_type = Loc.PlaceType.CITY place.admin1_name = str(self.get_admin1_name(place)) place.admin2_name = str(self.get_admin2_name(place)) if place.admin2_name is None: place.admin2_name = '' if place.admin1_name is None: place.admin1_name = '' place.city1 = str(place.city1) if place.city1 is None: place.city1 = ''
def __init__(self, directory: str, progress_bar): self.logger = logging.getLogger(__name__) self.geodb = None self.required_db_version = 2 self.db_upgrade_text = 'Adding support for non-English output' self.directory: str = directory self.progress_bar = progress_bar self.line_num = 0 self.cache_changed: bool = False sub_dir = GeoKeys.get_cache_directory(self.directory) self.country = None # Read in dictionary listing Geoname features we should include self.feature_code_list_cd = CachedDictionary.CachedDictionary( sub_dir, "feature_list.pkl") self.feature_code_list_cd.read() self.feature_code_list_dct: Dict[str, str] = self.feature_code_list_cd.dict if len(self.feature_code_list_dct) < 3: self.logger.warning('Feature list is empty. Setting defaults') self.feature_code_list_dct.clear() feature_list = UtilFeatureFrame.default for feat in feature_list: self.feature_code_list_dct[feat] = '' self.feature_code_list_cd.write() # Read in dictionary listing countries (ISO2) we should include self.supported_countries_cd = CachedDictionary.CachedDictionary( sub_dir, "country_list.pkl") self.supported_countries_cd.read() self.supported_countries_dct: Dict[ str, str] = self.supported_countries_cd.dict # Read in dictionary listing languages (ISO2) we should include self.languages_list_cd = CachedDictionary.CachedDictionary( sub_dir, "languages_list.pkl") self.languages_list_cd.read() self.languages_list_dct: Dict[str, str] = self.languages_list_cd.dict self.lang_list = [] for item in self.languages_list_dct: self.lang_list.append(item) # Read in dictionary listing output text replacements self.output_replace_cd = CachedDictionary.CachedDictionary( sub_dir, "output_list.pkl") self.output_replace_cd.read() self.output_replace_dct: Dict[str, str] = self.output_replace_cd.dict self.output_replace_list = [] for item in self.output_replace_dct: self.output_replace_list.append(item) self.logger.debug(f'Output replace [{item}]') self.entry_place = Loc.Loc() # Support for Geonames AlternateNames file. Adds alternate names for entries self.alternate_names = AlternateNames.AlternateNames( directory_name=self.directory, geo_files=self, progress_bar=self.progress_bar, filename='alternateNamesV2.txt', lang_list=self.lang_list)
def setUp(self) -> None: self.place: Loc.Loc = Loc.Loc()
def setUpClass(cls): TestCSV.logger = logging.getLogger(__name__) fmt = "%(levelname)s %(name)s.%(funcName)s %(lineno)d: %(message)s" logging.basicConfig(level=logging.DEBUG, format=fmt) # Load test data directory = os.path.join(str(Path.home()), "geoname_test") csv_path = os.path.join(directory, "test") TestCSV.geodata = Geodata.Geodata(directory_name=directory, progress_bar=None) error: bool = TestCSV.geodata.read() if error: TestCSV.logger.error("Missing geodata support Files.") raise ValueError('Cannot open database') # Read in Geoname Gazeteer file - city names, lat/long, etc. error = TestCSV.geodata.read_geonames() if error: TestCSV.logger.info("Missing geoname Files.") TestCSV.logger.info( 'Requires ca.txt, gb.txt, de.txt from geonames.org in folder username/geoname_test' ) raise ValueError('Cannot open database') TestCSV.csv = GrampsCsv.GrampsCsv(in_path=csv_path, geodata=TestCSV.geodata) TestCSV.ancestry = GrampsXml.GrampsXml( in_path=csv_path, out_suffix='', cache_d=None, progress=None, geodata=TestCSV.geodata) # Routines # Set up CSV Data csv_data = [ #('Portugal', 'ADM0', 'P0001', 'po'), #('Scotland,United Kingdom', 'ADM1','P0002','gb'), #('Kent,England,United Kingdom', 'ADM2','P0003','gb'), #('Canterbury,Kent,England,United Kingdom', "PPL", 'P0004', 'gb'), #('St Eustace,Canterbury,Kent,England,United Kingdom', "PPL", 'P0005', 'gb'), #('Dover, ,England,United Kingdom', "PPL", 'P0006', 'gb'), ('12 Privet Drive, Dover, ,England,United Kingdom', "PPL", 'P0006', 'gb'), #('Edinburgh, ,Scotland,United Kingdom', "PPL", 'P0007', 'gb'), #("St James's Palace, ,England,United Kingdom", "PPL", 'P0008', 'gb'), ] place = Loc.Loc() for row in csv_data: place.clear() place.original_entry = row[RowEntry.NAME] place.feature = row[RowEntry.FEAT] place.parse_place(place_name=place.original_entry, geo_files=TestCSV.geodata.geo_files) # Lookup record TestCSV.geodata.find_first_match(place.original_entry, place) place.id = row[RowEntry.PLACE_ID] TestCSV.csv.set_CSV_place_type(place) #TestCSV.geodata.set_place_type_text(place) #place.name = TestCSV.ancestry.get_csv_name(place).title() TestCSV.csv.create_csv_node(place) TestCSV.csv.complete_csv()
def select_city(self, place: Loc): """ Search for entry - try the most exact match first, then less exact matches """ lookup_target = place.target if len(lookup_target) == 0: return #pattern = self.create_wildcard(lookup_target) #quick_pattern = self.create_quick_wildcard(lookup_target) sdx = get_soundex(lookup_target) #self.logger.debug(f'CITY lkp targ=[{lookup_target}] adm1 id=[{place.admin1_id}]' # f' adm2 id=[{place.admin2_id}] iso=[{place.country_iso}] patt =[{pattern}] sdx={sdx} pref={place.prefix}') query_list = [] if len(place.country_iso) == 0: # No country present - try lookup by name. query_list.append( Query(where="name = ?", args=(lookup_target, ), result=Result.PARTIAL_MATCH)) # lookup by wildcard name query_list.append( Query( where="name LIKE ?", args=(lookup_target, ), # quick_pattern,), result=Result.WILDCARD_MATCH)) # lookup by soundex query_list.append( Query(where="sdx = ?", args=(sdx, ), result=Result.SOUNDEX_MATCH)) place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.geodata', query_list=query_list) # self.logger.debug(place.georow_list) return # Build query list - try each query in order until a match is found # Start with the most exact match depending on the data provided. if len(place.admin1_name) > 0: # lookup by name, ADMIN1, country query_list.append( Query(where="name = ? AND country = ? AND admin1_id = ?", args=(lookup_target, place.country_iso, place.admin1_id), result=Result.STRONG_MATCH)) # lookup by wildcard name, ADMIN1, country query_list.append( Query(where="name LIKE ? AND country = ? AND admin1_id = ?", args=(lookup_target, place.country_iso, place.admin1_id), result=Result.WILDCARD_MATCH)) else: # lookup by wildcard name, country query_list.append( Query(where="name LIKE ? AND country = ?", args=(lookup_target, place.country_iso), result=Result.WILDCARD_MATCH)) # Lookup by name, country query_list.append( Query(where="name = ? AND country = ?", args=(lookup_target, place.country_iso), result=Result.PARTIAL_MATCH)) if len(place.admin1_name) > 0: # lookup by Soundex name, country and admin1 query_list.append( Query(where="sdx = ? AND admin1_id = ? AND country = ?", args=(sdx, place.admin1_id, place.country_iso), result=Result.SOUNDEX_MATCH)) else: # lookup by Soundex name, country query_list.append( Query(where="sdx = ? AND country = ?", args=(sdx, place.country_iso), result=Result.SOUNDEX_MATCH)) # Try each query in list place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.geodata', query_list=query_list)
def select_admin2(self, place: Loc): """Search for Admin2 entry""" lookup_target = place.admin2_name if len(lookup_target) == 0: return place.target = lookup_target # sdx = get_soundex(lookup_target) # Try Admin query until we find a match - each query gets less exact query_list = [ Query( where="name = ? AND country = ? AND admin1_id = ? AND f_code=?", args=(lookup_target, place.country_iso, place.admin1_id, 'ADM2'), result=Result.STRONG_MATCH), Query(where="name = ? AND country = ? AND f_code=?", args=(lookup_target, place.country_iso, 'ADM2'), result=Result.PARTIAL_MATCH), Query(where="name LIKE ? AND country = ? AND f_code=?", args=(self.create_wildcard(lookup_target), place.country_iso, 'ADM2'), result=Result.PARTIAL_MATCH), Query(where="name = ? AND f_code=?", args=(lookup_target, 'ADM2'), result=Result.PARTIAL_MATCH), Query(where="name LIKE ? AND country = ? AND f_code=?", args=(lookup_target, place.country_iso, 'ADM2'), result=Result.WILDCARD_MATCH) ] # self.logger.debug(f'Admin2 lookup=[{lookup_target}] country=[{place.country_iso}]') place.georow_list, place.result_type = self.db.process_query_list( from_tbl='main.admin', query_list=query_list) if place.result_type == GeoKeys.Result.WILDCARD_MATCH: # Found as Admin2 without shire place.original_entry = re.sub('shire', '', place.original_entry) if len(place.georow_list) == 0: # Try city rather than County match. save_admin2 = place.admin2_name place.city1 = place.admin2_name place.admin2_name = '' # self.logger.debug(f'Try admin2 as city: [{place.target}]') self.select_city(place) if len(place.georow_list) == 0: # not found. restore admin place.admin2_name = save_admin2 place.city1 = '' else: # Found match as a City place.place_type = Loc.PlaceType.CITY match_adm1 = self.get_admin1_name_direct( lookup_target=place.georow_list[0][Entry.ADM1], iso=place.country_iso) # self.logger.debug(f'pl_iso [{place.country_iso}] pl_adm1 {place.admin1_name} match_adm1=[{match_adm1}] ') if place.admin1_name != match_adm1: place.prefix = place.admin1_name.title() place.admin1_name = '' return