예제 #1
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def get_admin1_id(self, place: Loc):
        """Search for Admin1 entry"""
        lookup_target = place.admin1_name
        if len(lookup_target) == 0:
            return

        # Try each query until we find a match - each query gets less exact
        query_list = [
            Query(where="name = ? AND country = ? AND f_code = ? ",
                  args=(lookup_target, place.country_iso, 'ADM1'),
                  result=Result.STRONG_MATCH),
            Query(where="name LIKE ? AND country = ?  AND f_code = ?",
                  args=(lookup_target, place.country_iso, 'ADM1'),
                  result=Result.WILDCARD_MATCH),
            Query(where="name = ?  AND f_code = ?",
                  args=(lookup_target, 'ADM1'),
                  result=Result.SOUNDEX_MATCH)
        ]

        row_list, res = self.db.process_query_list(from_tbl='main.admin',
                                                   query_list=query_list)

        if len(row_list) > 0:
            place.admin1_id = row_list[0][Entry.ADM1]
            # Fill in Country ISO
            if place.country_iso == '':
                place.country_iso = row_list[0][Entry.ISO]
예제 #2
0
    def run_test3() -> int:
        """
        :param title:
        :param inp:
        :param res:
        :param feat:
        :return:
        """
        title = TestScoring.test_values[TestScoring.test_idx][0]
        inp = TestScoring.test_values[TestScoring.test_idx][0]
        res = TestScoring.test_values[TestScoring.test_idx][1]
        feat = TestScoring.test_values[TestScoring.test_idx][2]

        in_place = Loc.Loc()
        in_place.original_entry = inp
        in_place.parse_place(inp, geo_files=TestScoring.geodata.geo_files)
        if in_place.country_name == '' and in_place.country_iso != '':
            in_place.country_name = TestScoring.geodata.geo_files.geodb.get_country_name(
                in_place.country_iso)

        res_place = Loc.Loc()
        res_place.original_entry = res
        res_place.parse_place(place_name=res,
                              geo_files=TestScoring.geodata.geo_files)
        res_place.feature = feat
        if res_place.country_name == '' and res_place.country_iso != '':
            res_place.country_name = TestScoring.geodata.geo_files.geodb.get_country_name(
                res_place.country_iso)

        score = TestScoring.scoring.match_score(in_place, res_place)
        print(
            f'{score:.1f} [{in_place.original_entry.title()}] [{res_place.get_five_part_title()}]'
        )
        return score
예제 #3
0
    def country_is_valid(self, place: Loc) -> bool:
        # See if COUNTRY is present and is in the supported country list
        if place.country_iso == '':
            place.result_type = GeoKeys.Result.NO_COUNTRY
            is_valid = False
        elif place.country_iso not in self.geo_files.supported_countries_dct:
            self.logger.debug(f'[{place.country_iso}] not supported')
            place.result_type = GeoKeys.Result.NOT_SUPPORTED
            place.place_type = Loc.PlaceType.COUNTRY
            place.target = place.country_name
            is_valid = False
        else:
            is_valid = True

        return is_valid
예제 #4
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def get_admin2_name(self, place: Loc) -> str:
        """Search for Admin1 entry"""
        lookup_target = place.admin2_id
        if len(lookup_target) == 0:
            return ''

        # Try each query until we find a match - each query gets less exact
        query_list = [
            Query(where="admin2_id = ? AND country = ? AND admin1_id = ?",
                  args=(lookup_target, place.country_iso, place.admin1_id),
                  result=Result.STRONG_MATCH),
            Query(where="admin2_id = ? AND country = ?",
                  args=(lookup_target, place.country_iso),
                  result=Result.PARTIAL_MATCH)
        ]

        row_list, res = self.db.process_query_list(from_tbl='main.admin',
                                                   query_list=query_list)

        if len(row_list) > 0:
            row = row_list[0]
            place.admin2_name = row[Entry.NAME]
            # self.logger.debug(f'adm2 nm = {place.admin2_name}')
            return place.admin2_name
        else:
            return ''
예제 #5
0
    def load_handler(self):
        """
        User pressed LOAD button to load an Ancestry file. Switch app display to the Review Widgets
        Load in file name and
        loop through  file and find every PLACE entry and verify the entry against the geoname data
        """
        self.w.original_entry.set_text("")
        self.w.remove_initialization_widgets()  # Remove old widgets
        self.w.create_review_widgets(
        )  # Switch display from Initial widgets to main review widgets

        self.load_data()

        ged_path = self.cfg.get(
            "gedcom_path")  # Get saved config setting for  file

        # Load appropriate handler based on file type
        if ged_path is not None:
            if '.ged' in ged_path:
                self.out_suffix = "import.ged"
                self.ancestry_file_handler = Gedcom.Gedcom(
                    in_path=ged_path,
                    out_suffix=temp_suffix,
                    cache_d=self.cache_dir,
                    progress=None,
                    geodata=self.geodata
                )  # Routines to open and parse GEDCOM file
            elif '.gramps' in ged_path:
                self.out_suffix = "import.gramps"
                # self.out_suffix = "csv"
                self.ancestry_file_handler = GrampsXml.GrampsXml(
                    in_path=ged_path,
                    out_suffix=temp_suffix,
                    cache_d=self.cache_dir,
                    progress=None,
                    geodata=self.geodata
                )  # Routines to open and parse Gramps file
        else:
            self.out_suffix = 'unk.new.ged'
            messagebox.showwarning(
                f'UNKNOWN File type. Not .gramps and not .ged. \n\n{ged_path}')

        self.out_diag_file = open(ged_path + '.output.txt', 'w')
        self.in_diag_file = open(ged_path + '.input.txt', 'w')

        if self.ancestry_file_handler.error:
            TKHelper.fatal_error(f"File {ged_path} not found.")

        self.w.root.update()

        self.place: Loc.Loc = Loc.Loc(
        )  # Create an object to store info for the current Place

        # Add  filename to Title
        path_parts = os.path.split(ged_path)  # Extract filename from full path
        self.w.title.set_text(f'GEO FINDER - {path_parts[1]}')

        # Read  file, find each place entry and handle it.
        self.w.user_entry.set_text("Scanning to previous position...")
        self.handle_place_entry()
예제 #6
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def get_country_iso(self, place: Loc) -> str:
        """ Return ISO code for specified country"""
        lookup_target, modified = GeoKeys.country_normalize(place.country_name)
        if len(lookup_target) == 0:
            return ''

        # Try each query until we find a match - each query gets less exact
        query_list = [
            Query(where="name = ? AND f_code = ? ",
                  args=(lookup_target, 'ADM0'),
                  result=Result.STRONG_MATCH),
            # Query(where="name LIKE ?  AND f_code = ? ",
            #      args=(self.create_wildcard(lookup_target), 'ADM0'),
            #      result=Result.PARTIAL_MATCH)  #,
            # Query(where="sdx = ?  AND f_code = ? ",
            #      args=(GeoKeys.get_soundex (lookup_target), 'ADM0'),
            #      result=Result.PARTIAL_MATCH)
        ]

        row_list, result_code = self.db.process_query_list(
            from_tbl='main.admin', query_list=query_list)

        if len(row_list) > 0:
            res = row_list[0][Entry.ISO]
            if len(row_list) == 1:
                place.country_name = row_list[0][Entry.NAME]
        else:
            res = ''

        return res
예제 #7
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def select_admin1(self, place: Loc):
        """Search for Admin1 entry"""
        lookup_target = place.admin1_name

        #pattern = self.create_wildcard(lookup_target)
        if len(lookup_target) == 0:
            return
        sdx = get_soundex(lookup_target)

        # self.logger.debug(f'sel adm1 patt={pattern} iso={place.country_iso}')

        # Try each query until we find a match - each query gets less exact
        query_list = [
            Query(where="name = ? AND country = ? AND f_code = ? ",
                  args=(lookup_target, place.country_iso, 'ADM1'),
                  result=Result.STRONG_MATCH),
            Query(where="name LIKE ? AND country = ?  AND f_code = ?",
                  args=(lookup_target, place.country_iso, 'ADM1'),
                  result=Result.WILDCARD_MATCH),
            Query(where="sdx = ? AND country = ? AND f_code=?",
                  args=(sdx, place.country_iso, 'ADM1'),
                  result=Result.SOUNDEX_MATCH)
        ]
        place.georow_list, place.result_type = self.db.process_query_list(
            from_tbl='main.admin', query_list=query_list)
예제 #8
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def lookup_geoid(self, place: Loc) -> None:
        """Search for GEOID"""
        result_place: Loc = Loc.Loc()

        query_list = [
            Query(where="geoid = ? ",
                  args=(place.target, ),
                  result=Result.STRONG_MATCH)
        ]
        place.georow_list, place.result_type = self.db.process_query_list(
            from_tbl='main.geodata', query_list=query_list)
        if len(place.georow_list) == 0:
            place.georow_list, place.result_type = self.db.process_query_list(
                from_tbl='main.admin', query_list=query_list)
        else:
            place.georow_list = place.georow_list[:1]
            place.result_type = GeoKeys.Result.STRONG_MATCH

        # Add search quality score to each entry
        for idx, rw in enumerate(place.georow_list):
            self.copy_georow_to_place(row=rw, place=result_place)
            update = list(rw)
            update.append(1)  # Extend list row and assign score
            result_place.prefix = ''
            res_nm = result_place.format_full_nm(None)
            score = 0.0

            # Remove items in prefix that are in result
            tk_list = res_nm.split(",")
            for item in tk_list:
                place.prefix = re.sub(
                    item.strip(' ').lower(), '', place.prefix)

            update[GeoKeys.Entry.SCORE] = int(score * 100)
            place.georow_list[idx] = tuple(update)
예제 #9
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
 def lookup_admin_dbid(self, place: Loc) -> None:
     """Search for DB ID"""
     query_list = [
         Query(where="id = ? ",
               args=(place.target, ),
               result=Result.STRONG_MATCH)
     ]
     place.georow_list, place.result_type = self.db.process_query_list(
         from_tbl='main.admin', query_list=query_list)
예제 #10
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def get_admin2_id(self, place: Loc):
        """Search for Admin1 entry"""
        lookup_target = place.admin2_name
        if len(lookup_target) == 0:
            return

        # Try each query until we find a match - each query gets less exact
        query_list = []
        if len(place.admin1_id) > 0:
            query_list.append(
                Query(where=
                      "name = ? AND country = ? AND admin1_id=? AND f_code=?",
                      args=(lookup_target, place.country_iso, place.admin1_id,
                            'ADM2'),
                      result=Result.STRONG_MATCH))
            query_list.append(
                Query(
                    where=
                    "name LIKE ? AND country = ? and admin1_id = ? AND f_code=?",
                    args=(lookup_target, place.country_iso, place.admin1_id,
                          'ADM2'),
                    result=Result.WILDCARD_MATCH))
            query_list.append(
                Query(
                    where=
                    "name LIKE ? AND country = ? and admin1_id = ? AND f_code=?",
                    args=(self.create_county_wildcard(lookup_target),
                          place.country_iso, place.admin1_id, 'ADM2'),
                    result=Result.WILDCARD_MATCH))
        else:
            query_list.append(
                Query(where="name = ? AND country = ? AND f_code=?",
                      args=(lookup_target, place.country_iso, 'ADM2'),
                      result=Result.STRONG_MATCH))
            query_list.append(
                Query(where="name LIKE ? AND country = ? AND f_code=?",
                      args=(lookup_target, place.country_iso, 'ADM2'),
                      result=Result.WILDCARD_MATCH))
            query_list.append(
                Query(where="name LIKE ? AND country = ? AND f_code=?",
                      args=(self.create_county_wildcard(lookup_target),
                            place.country_iso, 'ADM2'),
                      result=Result.WILDCARD_MATCH))

        row_list, res = self.db.process_query_list(from_tbl='main.admin',
                                                   query_list=query_list)

        if len(row_list) > 0:
            row = row_list[0]
            place.admin2_id = row[Entry.ADM2]
예제 #11
0
    def run_key_test(self, title: str, entry: str):
        print("*****TEST: {}".format(title))
        place = Loc.Loc()
        place.parse_place(entry, TestCSV.geodata.geo_files)
        self.geodata.find_first_match(place.original_entry, place)
        place.original_entry = place.format_full_nm(None)
        TestCSV.csv.set_CSV_place_type(place)
        place.id = TestCSV.csv.get_csv_key(place)

        TestCSV.logger.debug(f'type={place.place_type}')

        place.set_place_type_text()
        #place_name = TestCSV.ancestry_file_handler.get_csv_name(place).title()
        return place.id
예제 #12
0
    def update_rowlist_prefix(self, place: Loc.Loc):
        """
        Set all the prefix values in the georow_list
        :param place:
        """
        temp_place = Loc.Loc()
        tokens = place.original_entry.split(',')

        for idx, rw in enumerate(place.georow_list):
            update = list(rw)

            # Put unused fields into prefix
            self.geo_files.geodb.copy_georow_to_place(rw, temp_place)
            temp_place.prefix = ''
            nm = GeoKeys.search_normalize(
                temp_place.format_full_nm(self.geo_files.output_replace_dct),
                place.country_iso)
            # self.logger.debug(f'NAME ={nm}')
            place.prefix = ''

            for num, fld in enumerate(tokens[:2]):
                item = GeoKeys.search_normalize(fld, place.country_iso)
                add_item = False
                # self.logger.debug(f'item={item} ')
                if num == 0 and item not in nm:
                    add_item = True

                if num == 1 and item not in nm and len(tokens) == 2:
                    # We only add the second token if there are only 2 tokens
                    add_item = True

                if '*' in item:
                    # Don't add as prefix if item is a wildcard search
                    add_item = False

                if add_item:
                    if len(place.prefix) > 0:
                        place.prefix += ' '
                    place.prefix += item.title()

            if len(place.prefix) > 0:
                place.prefix_commas = ', '
            update[GeoKeys.Entry.PREFIX] = place.prefix
            # self.logger.debug(f'PREFIX={place.prefix} ')

            place.georow_list[idx] = tuple(update)
예제 #13
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def select_country(self, place: Loc):
        """Search for Admin1 entry"""
        lookup_target = place.country_iso
        if len(lookup_target) == 0:
            return
        sdx = get_soundex(lookup_target)

        # Try each query until we find a match - each query gets less exact
        query_list = [
            Query(where="country = ? AND f_code = ? ",
                  args=(place.country_iso, 'ADM0'),
                  result=Result.STRONG_MATCH),
            Query(where="sdx = ?  AND f_code=?",
                  args=(sdx, 'ADM0'),
                  result=Result.SOUNDEX_MATCH)
        ]

        place.georow_list, place.result_type = self.db.process_query_list(
            from_tbl='main.admin', query_list=query_list)
예제 #14
0
    def load_handler(self):
        # Load in list and display
        self.clear_display_list(self.tree)
        place = Loc.Loc()

        for item in sorted(self.dict):
            # get lat long
            replacement = self.dict[item]
            rep_token = replacement.split('@')
            if len(rep_token) < 2:
                self.logger.debug(f'blank item=[{item}] rep=[{replacement}]')
                continue
            place.target = rep_token[GEOID_TOKEN]
            self.geodb.lookup_geoid(place=place)

            if len(place.georow_list) > 0:
                # Copy geo row to Place
                self.geodb.copy_georow_to_place(row=place.georow_list[0],
                                                place=place)
                if 'oston' in item:
                    self.logger.debug(
                        f'{item} id={rep_token[GEOID_TOKEN]} state={place.admin1_id}'
                    )
            else:
                if len(place.target) == 0:
                    place.clear()
                    place.city1 = f'<DELETE>'
                else:
                    place.clear()
                    place.city1 = f'Database error for {replacement}'
                place.place_type = Loc.PlaceType.CITY

            # Get prefix if there was one
            if len(rep_token) > 2:
                place.prefix = rep_token[PREFIX_TOKEN]

            nm = place.format_full_nm(self.output_replace_dct)
            if len(place.prefix) > 0:
                line = f'[{place.prefix}]{place.prefix_commas}{nm}'
            else:
                line = f'{nm}'

            self.list_insert(self.tree, item, line)
예제 #15
0
    def complete_csv(self):
        # Add location enclosures.  Create if not there already.  Then add as reference.
        self.logger.debug(
            '\n\n******** DONE - CREATE CSV ENCLOSURES *********')
        place = Loc.Loc()

        # There are separate dictionaries for each hierarchy (prefix, city, county, country).
        # We need to go through prefix table, then city, etc (e.g. reversed order)
        # Create Enclosure records
        for idx, tbl in reversed(list(enumerate(self.admin_table))):
            self.logger.debug(f'===TABLE {idx}===')
            for key in tbl:
                self.retrieve_csv_place(self.admin_table, self.geodata, place,
                                        key, idx)
                self.logger.debug(f'** CSV {key} {place.original_entry}')

                # Create enclosure for each node at this level
                self.create_enclosed_by(place)

        if self.csv_path is not None:
            self.csvfile = open(self.csv_path, "w", encoding='utf-8')
            self.logger.debug(f'CSV file {self.csv_path}')
            self.csvfile.write(
                'Place,Title,Name,Type,latitude,longitude,enclosed_by\n')

        # List CSV
        self.logger.debug('*** OUTPUT TABLE ***')
        for idx, tbl in enumerate(self.admin_table):
            for key in tbl:
                # TODO
                row = tbl[key]
                #self.logger.debug(f'IDX={idx} {key} : {row}')
                self.output_row(row)

        if self.csv_path is not None:
            self.csvfile.close()
예제 #16
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def advanced_search(self, place: Loc):
        """
        Advanced search - support parameters for ISO and Feature class
        """
        lookup_target = place.target
        if len(lookup_target) == 0:
            return
        pattern = self.create_wildcard(lookup_target)
        feature_pattern = self.create_wildcard(place.feature)
        self.logger.debug(
            f'Advanced Search. Targ=[{pattern}] feature=[{feature_pattern}]'
            f'  iso=[{place.country_iso}] ')

        if len(place.feature) > 0:
            query_list = [
                Query(where="name LIKE ? AND country LIKE ? AND f_code LIKE ?",
                      args=(pattern, place.country_iso, feature_pattern),
                      result=Result.PARTIAL_MATCH)
            ]
        else:
            query_list = [
                Query(where="name LIKE ? AND country LIKE ?",
                      args=(pattern, place.country_iso),
                      result=Result.PARTIAL_MATCH)
            ]

        # Search main DB
        place.georow_list, place.result_type = self.db.process_query_list(
            from_tbl='main.geodata', query_list=query_list)

        # self.logger.debug(f'main Result {place.georow_list}')

        # Search admin DB
        admin_list, place.result_type = self.db.process_query_list(
            from_tbl='main.admin', query_list=query_list)
        place.georow_list.extend(admin_list)
예제 #17
0
 def setUp(self) -> None:
     TestScoring.in_place: Loc.Loc = Loc.Loc()
     TestScoring.out_place: Loc.Loc = Loc.Loc()
     TestScoring.test_idx += 1
예제 #18
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def lookup_place(self, place: Loc.Loc) -> []:
        """
        Lookup a place in our geoname.org dictionary and update place with Geo_result with lat, long, District, etc
        The dictionary geo_result entry contains: Lat, Long, districtID (County or State or Province ID)
        There can be multiple entries if a city name isnt unique in a country
        """
        result_place: Loc = Loc.Loc()
        self.start = time.time()
        place.result_type = Result.STRONG_MATCH
        #place.admin2_name, modified = GeoKeys.admin2_normalize(place.admin2_name, place.country_iso)

        if place.country_iso != '' and place.country_name == '':
            place.country_name = self.get_country_name(place.country_iso)

        # Lookup Place based on Place Type
        if place.place_type == Loc.PlaceType.ADMIN1:
            self.select_admin1(place)
        elif place.place_type == Loc.PlaceType.ADMIN2:
            if place.admin1_id == '':
                self.get_admin1_id(place=place)
            self.select_admin2(place)
            #if len(place.georow_list) == 0:
            # Try search with some text replacements
            #place.admin2_name, modified = GeoKeys.admin2_normalize(place.admin2_name, place.country_iso)
            #if modified:
            #    self.select_admin2(place)
        elif place.place_type == Loc.PlaceType.COUNTRY:
            self.select_country(place)
        elif place.place_type == Loc.PlaceType.ADVANCED_SEARCH:
            self.advanced_search(place)
        else:
            # Lookup as City
            if place.admin2_id == '':
                self.get_admin2_id(place=place)
            self.select_city(place)

        # nm = place.original_entry
        # self.logger.debug(f'Search results for {place.target} pref[{place.prefix}]')
        min_score = 9999

        # Add search quality score to each entry
        for idx, rw in enumerate(place.georow_list):
            self.copy_georow_to_place(row=rw, place=result_place)

            if len(place.prefix) > 0 and result_place.prefix == '':
                result_place.prefix = ' '
                result_place.prefix_commas = ','
            else:
                result_place.prefix = ''

            score = self.match.match_score(inp_place=place,
                                           res_place=result_place)
            if score < min_score:
                min_score = score

            # Convert row tuple to list and extend so we can assign score
            update = list(rw)
            update.append(1)
            update[GeoKeys.Entry.SCORE] = score
            place.georow_list[idx] = tuple(
                update)  # Convert back from list to tuple

            # Remove items in prefix that are in result
            tk_list = result_place.original_entry.split(",")
            if place.place_type != Loc.PlaceType.ADVANCED_SEARCH:
                for item in tk_list:
                    place.prefix = re.sub(
                        item.strip(' ').lower(), '', place.prefix)

        if place.result_type == Result.STRONG_MATCH and len(place.prefix) > 0:
            place.result_type = Result.PARTIAL_MATCH

        if place.result_type == Result.STRONG_MATCH and min_score > 10:
            place.result_type = Result.PARTIAL_MATCH
예제 #19
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def copy_georow_to_place(self, row, place: Loc):
        # Copy data from DB row into Place
        # self.logger.debug(row)
        place.admin1_id = ''
        place.admin2_id = ''
        place.city1 = ''

        place.country_iso = str(row[Entry.ISO])
        place.country_name = str(self.get_country_name(row[Entry.ISO]))
        place.lat = row[Entry.LAT]
        place.lon = row[Entry.LON]
        place.feature = str(row[Entry.FEAT])
        place.geoid = str(row[Entry.ID])

        if place.feature == 'ADM0':
            self.place_type = Loc.PlaceType.COUNTRY
            pass
        elif place.feature == 'ADM1':
            place.admin1_id = row[Entry.ADM1]
            self.place_type = Loc.PlaceType.ADMIN1
        elif place.feature == 'ADM2':
            place.admin1_id = row[Entry.ADM1]
            place.admin2_id = row[Entry.ADM2]
            self.place_type = Loc.PlaceType.ADMIN2
        else:
            place.admin1_id = row[Entry.ADM1]
            place.admin2_id = row[Entry.ADM2]
            place.city1 = row[Entry.NAME]
            self.place_type = Loc.PlaceType.CITY

        place.admin1_name = str(self.get_admin1_name(place))
        place.admin2_name = str(self.get_admin2_name(place))
        if place.admin2_name is None:
            place.admin2_name = ''
        if place.admin1_name is None:
            place.admin1_name = ''

        place.city1 = str(place.city1)
        if place.city1 is None:
            place.city1 = ''
예제 #20
0
    def __init__(self, directory: str, progress_bar):
        self.logger = logging.getLogger(__name__)
        self.geodb = None
        self.required_db_version = 2
        self.db_upgrade_text = 'Adding support for non-English output'
        self.directory: str = directory
        self.progress_bar = progress_bar
        self.line_num = 0
        self.cache_changed: bool = False
        sub_dir = GeoKeys.get_cache_directory(self.directory)
        self.country = None

        # Read in dictionary listing Geoname features we should include
        self.feature_code_list_cd = CachedDictionary.CachedDictionary(
            sub_dir, "feature_list.pkl")
        self.feature_code_list_cd.read()
        self.feature_code_list_dct: Dict[str,
                                         str] = self.feature_code_list_cd.dict
        if len(self.feature_code_list_dct) < 3:
            self.logger.warning('Feature list is empty. Setting defaults')
            self.feature_code_list_dct.clear()
            feature_list = UtilFeatureFrame.default
            for feat in feature_list:
                self.feature_code_list_dct[feat] = ''
            self.feature_code_list_cd.write()

        # Read in dictionary listing countries (ISO2) we should include
        self.supported_countries_cd = CachedDictionary.CachedDictionary(
            sub_dir, "country_list.pkl")
        self.supported_countries_cd.read()
        self.supported_countries_dct: Dict[
            str, str] = self.supported_countries_cd.dict

        # Read in dictionary listing languages (ISO2) we should include
        self.languages_list_cd = CachedDictionary.CachedDictionary(
            sub_dir, "languages_list.pkl")
        self.languages_list_cd.read()
        self.languages_list_dct: Dict[str, str] = self.languages_list_cd.dict
        self.lang_list = []

        for item in self.languages_list_dct:
            self.lang_list.append(item)

        # Read in dictionary listing output text replacements
        self.output_replace_cd = CachedDictionary.CachedDictionary(
            sub_dir, "output_list.pkl")
        self.output_replace_cd.read()
        self.output_replace_dct: Dict[str, str] = self.output_replace_cd.dict
        self.output_replace_list = []

        for item in self.output_replace_dct:
            self.output_replace_list.append(item)
            self.logger.debug(f'Output replace [{item}]')

        self.entry_place = Loc.Loc()

        # Support for Geonames AlternateNames file.  Adds alternate names for entries
        self.alternate_names = AlternateNames.AlternateNames(
            directory_name=self.directory,
            geo_files=self,
            progress_bar=self.progress_bar,
            filename='alternateNamesV2.txt',
            lang_list=self.lang_list)
예제 #21
0
 def setUp(self) -> None:
     self.place: Loc.Loc = Loc.Loc()
예제 #22
0
    def setUpClass(cls):
        TestCSV.logger = logging.getLogger(__name__)
        fmt = "%(levelname)s %(name)s.%(funcName)s %(lineno)d: %(message)s"
        logging.basicConfig(level=logging.DEBUG, format=fmt)

        # Load test data
        directory = os.path.join(str(Path.home()), "geoname_test")
        csv_path = os.path.join(directory, "test")
        TestCSV.geodata = Geodata.Geodata(directory_name=directory,
                                          progress_bar=None)
        error: bool = TestCSV.geodata.read()
        if error:
            TestCSV.logger.error("Missing geodata support Files.")
            raise ValueError('Cannot open database')

        # Read in Geoname Gazeteer file - city names, lat/long, etc.
        error = TestCSV.geodata.read_geonames()

        if error:
            TestCSV.logger.info("Missing geoname Files.")
            TestCSV.logger.info(
                'Requires ca.txt, gb.txt, de.txt from geonames.org in folder username/geoname_test'
            )
            raise ValueError('Cannot open database')

        TestCSV.csv = GrampsCsv.GrampsCsv(in_path=csv_path,
                                          geodata=TestCSV.geodata)

        TestCSV.ancestry = GrampsXml.GrampsXml(
            in_path=csv_path,
            out_suffix='',
            cache_d=None,
            progress=None,
            geodata=TestCSV.geodata)  # Routines

        # Set up CSV Data
        csv_data = [
            #('Portugal', 'ADM0', 'P0001', 'po'),
            #('Scotland,United Kingdom', 'ADM1','P0002','gb'),
            #('Kent,England,United Kingdom', 'ADM2','P0003','gb'),
            #('Canterbury,Kent,England,United Kingdom', "PPL", 'P0004', 'gb'),
            #('St Eustace,Canterbury,Kent,England,United Kingdom', "PPL", 'P0005', 'gb'),
            #('Dover, ,England,United Kingdom', "PPL", 'P0006', 'gb'),
            ('12 Privet Drive, Dover, ,England,United Kingdom', "PPL", 'P0006',
             'gb'),
            #('Edinburgh, ,Scotland,United Kingdom', "PPL", 'P0007', 'gb'),
            #("St James's Palace, ,England,United Kingdom", "PPL", 'P0008', 'gb'),
        ]

        place = Loc.Loc()

        for row in csv_data:
            place.clear()
            place.original_entry = row[RowEntry.NAME]
            place.feature = row[RowEntry.FEAT]
            place.parse_place(place_name=place.original_entry,
                              geo_files=TestCSV.geodata.geo_files)
            # Lookup record
            TestCSV.geodata.find_first_match(place.original_entry, place)
            place.id = row[RowEntry.PLACE_ID]
            TestCSV.csv.set_CSV_place_type(place)
            #TestCSV.geodata.set_place_type_text(place)
            #place.name = TestCSV.ancestry.get_csv_name(place).title()

            TestCSV.csv.create_csv_node(place)

        TestCSV.csv.complete_csv()
예제 #23
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def select_city(self, place: Loc):
        """
        Search for  entry - try the most exact match first, then less exact matches
        """
        lookup_target = place.target
        if len(lookup_target) == 0:
            return
        #pattern = self.create_wildcard(lookup_target)
        #quick_pattern = self.create_quick_wildcard(lookup_target)

        sdx = get_soundex(lookup_target)
        #self.logger.debug(f'CITY lkp targ=[{lookup_target}] adm1 id=[{place.admin1_id}]'
        #                  f' adm2 id=[{place.admin2_id}] iso=[{place.country_iso}] patt =[{pattern}] sdx={sdx} pref={place.prefix}')

        query_list = []

        if len(place.country_iso) == 0:
            # No country present - try lookup by name.
            query_list.append(
                Query(where="name = ?",
                      args=(lookup_target, ),
                      result=Result.PARTIAL_MATCH))
            # lookup by wildcard name
            query_list.append(
                Query(
                    where="name LIKE ?",
                    args=(lookup_target, ),  # quick_pattern,),
                    result=Result.WILDCARD_MATCH))
            # lookup by soundex
            query_list.append(
                Query(where="sdx = ?",
                      args=(sdx, ),
                      result=Result.SOUNDEX_MATCH))

            place.georow_list, place.result_type = self.db.process_query_list(
                from_tbl='main.geodata', query_list=query_list)
            # self.logger.debug(place.georow_list)
            return

        # Build query list - try each query in order until a match is found
        # Start with the most exact match depending on the data provided.
        if len(place.admin1_name) > 0:
            # lookup by name, ADMIN1, country
            query_list.append(
                Query(where="name = ? AND country = ? AND admin1_id = ?",
                      args=(lookup_target, place.country_iso, place.admin1_id),
                      result=Result.STRONG_MATCH))

            # lookup by wildcard name, ADMIN1, country
            query_list.append(
                Query(where="name LIKE ? AND country = ? AND admin1_id = ?",
                      args=(lookup_target, place.country_iso, place.admin1_id),
                      result=Result.WILDCARD_MATCH))
        else:
            # lookup by wildcard  name, country
            query_list.append(
                Query(where="name LIKE ? AND country = ?",
                      args=(lookup_target, place.country_iso),
                      result=Result.WILDCARD_MATCH))

        # Lookup by name, country
        query_list.append(
            Query(where="name = ? AND country = ?",
                  args=(lookup_target, place.country_iso),
                  result=Result.PARTIAL_MATCH))

        if len(place.admin1_name) > 0:
            # lookup by Soundex name, country and admin1
            query_list.append(
                Query(where="sdx = ? AND admin1_id = ? AND country = ?",
                      args=(sdx, place.admin1_id, place.country_iso),
                      result=Result.SOUNDEX_MATCH))
        else:
            # lookup by Soundex name, country
            query_list.append(
                Query(where="sdx = ? AND country = ?",
                      args=(sdx, place.country_iso),
                      result=Result.SOUNDEX_MATCH))

        # Try each query in list
        place.georow_list, place.result_type = self.db.process_query_list(
            from_tbl='main.geodata', query_list=query_list)
예제 #24
0
파일: GeoDB.py 프로젝트: prculley/GeoFinder
    def select_admin2(self, place: Loc):
        """Search for Admin2 entry"""
        lookup_target = place.admin2_name
        if len(lookup_target) == 0:
            return
        place.target = lookup_target
        # sdx = get_soundex(lookup_target)

        # Try Admin query until we find a match - each query gets less exact
        query_list = [
            Query(
                where="name = ? AND country = ? AND admin1_id = ? AND f_code=?",
                args=(lookup_target, place.country_iso, place.admin1_id,
                      'ADM2'),
                result=Result.STRONG_MATCH),
            Query(where="name = ? AND country = ? AND f_code=?",
                  args=(lookup_target, place.country_iso, 'ADM2'),
                  result=Result.PARTIAL_MATCH),
            Query(where="name LIKE ? AND country = ? AND f_code=?",
                  args=(self.create_wildcard(lookup_target), place.country_iso,
                        'ADM2'),
                  result=Result.PARTIAL_MATCH),
            Query(where="name = ?  AND f_code=?",
                  args=(lookup_target, 'ADM2'),
                  result=Result.PARTIAL_MATCH),
            Query(where="name LIKE ? AND country = ? AND f_code=?",
                  args=(lookup_target, place.country_iso, 'ADM2'),
                  result=Result.WILDCARD_MATCH)
        ]

        # self.logger.debug(f'Admin2 lookup=[{lookup_target}] country=[{place.country_iso}]')
        place.georow_list, place.result_type = self.db.process_query_list(
            from_tbl='main.admin', query_list=query_list)
        if place.result_type == GeoKeys.Result.WILDCARD_MATCH:
            # Found as Admin2 without shire
            place.original_entry = re.sub('shire', '', place.original_entry)

        if len(place.georow_list) == 0:
            # Try city rather than County match.
            save_admin2 = place.admin2_name
            place.city1 = place.admin2_name
            place.admin2_name = ''
            # self.logger.debug(f'Try admin2 as city: [{place.target}]')

            self.select_city(place)

            if len(place.georow_list) == 0:
                #  not found.  restore admin
                place.admin2_name = save_admin2
                place.city1 = ''
            else:
                # Found match as a City
                place.place_type = Loc.PlaceType.CITY
                match_adm1 = self.get_admin1_name_direct(
                    lookup_target=place.georow_list[0][Entry.ADM1],
                    iso=place.country_iso)
                # self.logger.debug(f'pl_iso [{place.country_iso}] pl_adm1 {place.admin1_name} match_adm1=[{match_adm1}] ')
                if place.admin1_name != match_adm1:
                    place.prefix = place.admin1_name.title()
                    place.admin1_name = ''
                return