Beispiel #1
0
    def update_global_replacement_list(self, key, geoid, prefix):
        res = ReplacementDictionary.build_replacement_entry(geoid, prefix)
        ky = Normalize.normalize(text=key, remove_commas=False)
        self.global_replace.set(ky, res)

        # Periodically flush dictionary to disk
        if self.update_counter % 10 == 1:
            self.global_replace.write()
Beispiel #2
0
    def process_place_entries(self):
        """
        Handle PLACE entries in users file.  Replace it, skip it, or have user correct it.
        """
        self.w.original_entry.text = ""

        if self.w.prog.shutdown_requested:
            self.periodic_update("Shutting down...")
        else:
            self.periodic_update("Scanning")
        self.clear_result_list(self.place)

        while True:
            # Keep reading place entries until we need User review or reach End Of File
            self.update_counter += 1  # Counter is used to periodically update
            # Update statistics
            self.update_statistics()

            # Find the next PLACE entry in  file
            # Process it and keep looping until we need user review
            self.place.clear()
            town_entry, eof, rec_id = self.ancestry_file_handler.get_next_place(
            )
            self.place.updated_entry = town_entry
            self.place.id = rec_id
            town_entry = Normalize.normalize(text=town_entry,
                                             remove_commas=False)

            if eof:
                self.end_of_file_shutdown()

            # See if we already have a fix (Global Replace) or Skip (ignore).
            # Otherwise see if we can find it or have user handle it
            replacement_geoid = self.get_replacement(self.global_replace,
                                                     town_entry, self.place)

            if replacement_geoid is not None:
                # There is already a global change that we can apply to this entry.
                self.matched_count += 1

                if self.place.result_type == GeoUtil.Result.STRONG_MATCH:
                    # REPLACE - Output the updated place to ancestry file
                    self.write_updated_place(self.place, town_entry)

                    # Display status to user
                    if self.w.prog.shutdown_requested:
                        self.periodic_update("Creating Import...")
                    else:
                        self.periodic_update("Applying change")
                elif self.place.result_type == GeoUtil.Result.DELETE:
                    # DELETE - Don't write out this place
                    continue
                else:
                    # ERROR - We previously found an update, but the GEOID for replacement can no longer be found
                    self.logger.warning(
                        f'***ERROR looking up GEOID=[{replacement_geoid}] for [{town_entry}] '
                    )
                    self.place.event_year = int(
                        self.ancestry_file_handler.event_year
                    )  # Set place date to event date (geo names change over time)
                    self.w.original_entry.text = f'** DATABASE ERROR FOR GEOID=[{replacement_geoid}] for [{town_entry}]'
                    self.w.user_entry.text = f'{town_entry}'
                    self.geodata.find_matches(town_entry, self.place,
                                              self.w.prog.shutdown_requested)
                    break
                continue
            elif self.skiplist.get(town_entry) is not None:
                # SKIP - User marked place as SKIP - Write out as-is and go to next error
                self.skip_count += 1
                self.periodic_update("Skipping")
                self.ancestry_file_handler.write_asis(town_entry)
                continue
            else:
                # FOUND a PLACE entry that we don't already have a global replace or skip for
                # See if it is in the place database
                self.place.event_year = int(
                    self.ancestry_file_handler.event_year
                )  # Set place date to event date (geo names change over time)
                self.geodata.find_matches(town_entry, self.place,
                                          self.w.prog.shutdown_requested)

                if self.place.result_type in GeoUtil.successful_match:
                    # STRONG MATCH
                    if self.place.result_type == GeoUtil.Result.STRONG_MATCH:
                        # FOUND STRONG MATCH - no user verification needed
                        self.matched_count += 1

                        # Write out line without user verification
                        if self.w.prog.shutdown_requested:
                            self.periodic_update("Creating Import...")
                        else:
                            self.periodic_update("Scanning")

                        # Add to global replace list
                        self.update_global_replacement_list(
                            key=town_entry,
                            geoid=self.place.geoid,
                            prefix=self.place.prefix)
                        self.write_updated_place(self.place, town_entry)
                        self.logger.debug(
                            f'Found Strong Match for {town_entry} Setting DICT'
                        )
                        continue
                    else:
                        # WEAK MATCH OR MULTIPLE MATCHES
                        if self.w.prog.shutdown_requested:
                            # User requested shutdown - so no user interaction.  Write this item out as-is
                            self.review_count += 1
                            self.periodic_update("Creating Import...")
                            self.w.original_entry.text = " "
                            self.ancestry_file_handler.write_asis(town_entry)
                            continue
                        else:
                            # USER REVIEW - Have user review the match
                            self.logger.debug(
                                f'user review for {town_entry} res= [{self.place.result_type}] '
                            )

                            self.w.status.configure(style="Good.TLabel")
                            self.w.original_entry.text = self.place.original_entry  # Display place
                            self.w.user_entry.text = self.place.updated_entry  # Display place
                            # Break out of loop and have user review the match
                            break
                else:
                    # NO MATCH FOUND
                    if self.w.prog.shutdown_requested:
                        # User requested shutdown.  Write this item out as-is
                        self.review_count += 1
                        self.periodic_update("Creating Import...")
                        self.w.original_entry.text = " "
                        self.ancestry_file_handler.write_asis(town_entry)
                        continue
                    else:
                        # USER REVIEW - Have user review entry
                        # self.logger.debug(f'User2 review for {town_entry}. status ={self.place.status}')
                        self.w.status.configure(style="Good.TLabel")
                        self.w.original_entry.text = self.place.original_entry  # Display place
                        self.w.user_entry.text = self.place.original_entry  # Display place
                        # Break out of loop and have user review the item
                        break

        # Have user review the result
        self.display_result(self.place)
Beispiel #3
0
    def load_data_files(self) -> bool:
        """
        Load in data files required for GeoFinder:
        Load global_replace dictionary, Geodata files and geonames
        #Returns:
            Error - True if error occurred
        """
        # Read in Skiplist, Replace list
        self.skiplist = CachedDictionary.CachedDictionary(
            self.cache_dir, "skiplist.pkl")
        self.skiplist.read()
        self.global_replace = CachedDictionary.CachedDictionary(
            self.cache_dir, "global_replace.pkl")
        self.global_replace.read()
        dict_copy = copy.copy(self.global_replace.dict)

        # Convert all global_replace items to lowercase
        for ky in dict_copy:
            val = self.global_replace.dict.pop(ky)
            new_key = Normalize.normalize(text=ky, remove_commas=False)
            self.global_replace.dict[new_key] = val

        # Read in dictionary listing Geoname features we should include
        self.feature_code_list_cd = CachedDictionary.CachedDictionary(
            self.cache_dir, "feature_list.pkl")
        self.feature_code_list_cd.read()
        feature_code_list_dct: Dict[str, str] = self.feature_code_list_cd.dict
        if len(feature_code_list_dct) < 3:
            self.logger.warning('Feature list is empty.')
            feature_code_list_dct.clear()
            feature_list = UtilFeatureFrame.default
            for feat in feature_list:
                feature_code_list_dct[feat] = ''
            self.feature_code_list_cd.write()

        # Read in dictionary containing countries (ISO2) we should include
        self.supported_countries_cd = CachedDictionary.CachedDictionary(
            self.cache_dir, "country_list.pkl")
        self.supported_countries_cd.read()
        supported_countries_dct: Dict[str,
                                      str] = self.supported_countries_cd.dict

        # Read in dictionary containing languages (ISO2) we should include
        self.languages_list_cd = CachedDictionary.CachedDictionary(
            self.cache_dir, "languages_list.pkl")
        self.languages_list_cd.read()
        languages_list_dct: Dict[str, str] = self.languages_list_cd.dict

        # Initialize geo data
        self.geodata = Geodata(directory_name=self.directory,
                               progress_bar=self.w.prog,
                               enable_spell_checker=self.enable_spell_checker,
                               show_message=True,
                               exit_on_error=True,
                               languages_list_dct=languages_list_dct,
                               feature_code_list_dct=feature_code_list_dct,
                               supported_countries_dct=supported_countries_dct)

        # If the list of supported countries is unusually short, display note to user
        num = self.display_country_note()
        self.logger.info('{} countries will be loaded'.format(num))

        # open Geoname Gazeteer DB - city names, lat/long, etc.
        error = self.geodata.open()
        if error:
            TKHelper.fatal_error(MISSING_FILES)

        self.w.root.update()
        self.w.prog.update_progress(100, " ")
        return error