Esempio n. 1
0
    def __init__(self):
        """
        Init
        """
        self.logger = logging.getLogger(__name__)
        self.original_entry: str = ""
        self.lat: float = float('NaN')  # Latitude
        self.lon: float = float('NaN')  # Longitude
        self.country_iso: str = ""  # Country ISO code
        self.country_name: str = ''
        self.city: str = ""  # City or entity name
        self.admin1_name: str = ""  # Admin1 (State/province/etc)
        self.admin1_id: str = ""  # Admin1 Geoname ID
        self.admin2_name: str = ""  # Admin2 (county)
        self.admin2_id = ""  # Admin2 Geoname ID
        self.prefix: str = ""  # Prefix (entries prepended before geoname location)
        self.feature: str = ''  # Geoname feature code
        self.place_type: int = PlaceType.COUNTRY  # Is this a Country , Admin1 ,admin2 or city?
        self.geoid: str = ''  # Geoname GEOID
        self.enclosed_by = ''  # The entity that encloses this.  E.g United States encloses Texas
        self.updated_entry = ''
        self.score = 100.0
        self.norm = Normalize.Normalize()

        # Lookup result info
        self.status: str = ""
        self.status_detail: str = ""
        self.result_type: int = GeoUtil.Result.NO_MATCH  # Result type of lookup
        self.result_type_text: str = ''  # Text version of result type
        self.georow_list: List = []  # List of items that matched this location
        self.event_year: int = 0
        self.geo_db = None
Esempio n. 2
0
 def __init__(self, geodb):
     self.logger = logging.getLogger(__name__)
     self.detailed_debug = True
     self.start = 0
     self.use_wildcards = True
     self.total_lookups = 0
     self.cache = {}
     self.place_type = ''
     self.select_str = 'name, country, admin1_id, admin2_id, lat, lon, feature, geoid, sdx'
     self.geodb = geodb
     self.match = MatchScore.MatchScore()
     self.norm = Normalize.Normalize()
     self.place = Loc.Loc()
Esempio n. 3
0
    def __init__(self):
        self.logger = logging.getLogger(__name__)
        self.score_diags = ''  # Diagnostic text for scoring
        self.token_weight = []
        self.prefix_weight = 0.0
        self.feature_weight = 0.0
        self.input_weight = 0.0

        # Weighting for each input term match -  adm2, adm1, country
        token_weights = [.2, .3, .5]
        self.set_weighting(token_weight=token_weights,
                           prefix_weight=6.0,
                           feature_weight=0.15)

        # Weighting for each part of score
        self.wildcard_penalty = 8.0
        self.norm = Normalize.Normalize()
Esempio n. 4
0
    def __init__(self, db_path, show_message: bool, exit_on_error: bool,
                 set_speed_pragmas: bool, db_limit: int):
        """
            geoname data database init. Open database if present otherwise raise error
        # Args:
            db_path: full path to database file
            show_message: If True, show messagebox to user on error
            exit_on_error: If True, exit if significant error occurs
            set_speed_pragmas: If True, set DB pragmas for maximum performance. 
            db_limit: SQL LIMIT parameter
        # Raises:
            ValueError('Cannot open database'), ValueError('Database empty or corrupt')
        """
        self.logger = logging.getLogger(__name__)
        self.show_message = show_message
        self.exit_on_error = exit_on_error
        self.max_query_results = 50
        self.total_time = 0
        self.total_lookups = 0
        self.slow_lookup = 0
        self.match = MatchScore.MatchScore()
        self.norm = Normalize.Normalize()

        #self.select_str = 'name, country, admin1_id, admin2_id, lat, lon, feature, geoid, sdx'
        self.db_path = db_path

        # See if DB exists
        if os.path.exists(db_path):
            db_existed = True
        else:
            db_existed = False

        self.db = DB.DB(db_filename=db_path,
                        show_message=show_message,
                        exit_on_error=exit_on_error)
        if self.db.err != '':
            self.logger.error(f"Error! cannot open database {db_path}.")
            raise ValueError('Cannot open database')

        # If DB was initially found
        if db_existed:
            # Run sanity test on DB
            err = self.db.test_database('name',
                                        'main.geodata',
                                        where='name = ? AND country = ?',
                                        args=('ba', 'fr'))

            if err:
                # DB failed sanity test
                self.logger.warning(f'DB error for {db_path}')

                if show_message:
                    if messagebox.askyesno(
                            'Error',
                            f'Geoname database is empty or corrupt:\n\n {db_path} \n\nDo you want to delete it and rebuild?'
                    ):
                        messagebox.showinfo('', 'Deleting Geoname database')
                        self.db.conn.close()
                        os.remove(db_path)
                if exit_on_error:
                    sys.exit()
                else:
                    raise ValueError('Database empty or corrupt')

        if set_speed_pragmas:
            self.db.set_speed_pragmas()

        self.db_limit = db_limit
        self.db.order_string = ''
        self.db.limit_string = f'LIMIT {self.db_limit}'
        self.place_type = ''
        self.s: GeoSearch.GeoSearch = GeoSearch.GeoSearch(geodb=self)
Esempio n. 5
0
    def __init__(self,
                 directory: str,
                 display_progress,
                 show_message: bool,
                 exit_on_error: bool,
                 languages_list_dct: {},
                 feature_code_list_dct: {},
                 supported_countries_dct: {},
                 volume=''):
        """
        Read in datafiles needed for geodata, filter them and create a sql db.
        Filter dictionary examples:   
            languages_list_dct={'fr','de'}
            feature_code_list_dct={'PPL', 'ADM1', 'CSTL'}
            supported_countries_dct = {'us','gb','at'}
        # Args:
            directory: base directory
            display_progress: None or Handler called with percent_done:int, msg:str
            show_message: True to show message boxes to user on errors
            exit_on_error:  True to exit on serious errors
            languages_list_dct: dictionary containing the ISO-2 languages  to load from alternateNames
            feature_code_list_dct: dictionary containing the Geonames.org feature codes to load
            supported_countries_dct: dictionary containing the ISO-2 countries to load
            volume: disk volume to use - e.g. C: for Windows or /Volumes/xyz for OSX, /media/xyz for linux
        """
        self.logger = logging.getLogger(__name__)
        self.geodb: [GeoDB.GeoDB, None] = None
        self.show_message = show_message
        self.geoid_main_dict = {}  # Key is GEOID, Value is DB ID for entry
        self.geoid_admin_dict = {}  # Key is GEOID, Value is DB ID for entry
        # TODO fix volume handling
        self.volume = volume
        self.collate = 'COLLATE NOCASE'

        self.exit_on_error = exit_on_error
        self.required_db_version = 4
        # Message to user upgrading from earlier DB version
        self.db_upgrade_text = 'Renamed column to Feature'
        self.directory: str = directory
        self.progress_bar = display_progress
        self.line_num = 0
        self.cache_changed: bool = False
        sub_dir = GeoUtil.get_cache_directory(self.directory)
        self.country = None
        self.languages_list_dct = languages_list_dct
        self.feature_code_list_dct = feature_code_list_dct
        self.supported_countries_dct = supported_countries_dct
        self.lang_list = []
        self.norm = Normalize.Normalize()

        for item in self.languages_list_dct:
            self.lang_list.append(item)

        if volume != '':
            os.chdir(volume)
        if not os.path.exists(sub_dir):
            self.logger.warning(f'Directory] {sub_dir} NOT FOUND')
            if self.show_message:
                messagebox.showwarning(
                    'Folder not found',
                    f'Directory\n\n {sub_dir}\n\n NOT FOUND')
            if exit_on_error:
                sys.exit()

        # Read in Text Replacement dictionary pickle - this has output text replacements
        self.output_replace_cd = CachedDictionary.CachedDictionary(
            sub_dir, "output_list.pkl")
        self.output_replace_cd.read()
        self.output_replace_dct: Dict[str, str] = self.output_replace_cd.dict
        self.output_replace_list = []

        for item in self.output_replace_dct:
            self.output_replace_list.append(item)

        self.entry_place = Loc.Loc()

        # Support for Geonames AlternateNames file.  Adds alternate names for entries
        self.alternate_names = AlternateNames.AlternateNames(
            directory=self.directory,
            geo_build=self,
            progress_bar=self.progress_bar,
            prefix="Step 3 of 4) ",
            filename='alternateNamesV2.txt',
            lang_list=self.lang_list)