Esempio n. 1
0
class RGeocoder(object):
    """
    The main reverse geocoder class
    """
    def __init__(self, mode=2, verbose=True, stream=None):
        """ Class Instantiation
        Args:
        mode (int): Library supports the following two modes:
                    - 1 = Single-threaded K-D Tree
                    - 2 = Multi-threaded K-D Tree (Default)
        verbose (bool): For verbose output, set to True
        stream (io.StringIO): An in-memory stream of a custom data source
        """
        self.mode = mode
        self.verbose = verbose
        if stream:
            coordinates, self.locations = self.load(stream)
        else:
            coordinates, self.locations = self.extract(rel_path(RG_FILE))

        if mode == 1:  # Single-process
            self.tree = KDTree(coordinates)
        else:  # Multi-process
            self.tree = KDTree_MP.cKDTree_MP(coordinates)

    def query(self, coordinates):
        """
        Function to query the K-D tree to find the nearest city
        Args:
        coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)]
        """
        if self.mode == 1:
            _, indices = self.tree.query(coordinates, k=1)
        else:
            _, indices = self.tree.pquery(coordinates, k=1)
        return [self.locations[index] for index in indices]

    def load(self, stream):
        """
        Function that loads a custom data source
        Args:
        stream (io.StringIO): An in-memory stream of a custom data source.
                              The format of the stream must be a comma-separated file
                              with header containing the columns defined in RG_COLUMNS.
        """
        stream_reader = csv.DictReader(stream, delimiter=',')
        header = stream_reader.fieldnames

        if header != RG_COLUMNS:
            raise csv.Error('Input must be a comma-separated file with header containing ' + \
                'the following columns - %s. For more help, visit: ' % (','.join(RG_COLUMNS)) + \
                'https://github.com/thampiman/reverse-geocoder')

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in stream_reader:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)

        return geo_coords, locations

    def extract(self, local_filename):
        """
        Function loads the already extracted GeoNames cities file or downloads and extracts it if
        it doesn't exist locally
        Args:
        local_filename (str): Path to local RG_FILE
        """
        if os.path.exists(local_filename):
            if self.verbose:
                print('Loading formatted geocoded file...')
            rows = csv.DictReader(open(local_filename, 'rt'))
        else:
            gn_cities_US_url = GN_URL + GN_CITIES_US + '.zip'
            gn_admin1_url = GN_URL + GN_ADMIN1
            gn_admin2_url = GN_URL + GN_ADMIN2

            cities_US_zipfilename = GN_CITIES_US + '.zip'
            cities_US_filename = GN_CITIES_US + '.txt'

            if not os.path.exists(cities_US_zipfilename):
                if self.verbose:
                    print('Downloading files from Geoname...')
                try:  # Python 3
                    import urllib.request
                    urllib.request.urlretrieve(gn_cities_US_url,
                                               cities_US_zipfilename)
                    urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1)
                    urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2)
                except ImportError:  # Python 2
                    import urllib
                    urllib.urlretrieve(gn_cities_US_url, cities_US_zipfilename)
                    urllib.urlretrieve(gn_admin1_url, GN_ADMIN1)
                    urllib.urlretrieve(gn_admin2_url, GN_ADMIN2)

            if self.verbose:
                print('Extracting cities_US...')
            _z = zipfile.ZipFile(open(cities_US_zipfilename, 'rb'))
            open(cities_US_filename, 'wb').write(_z.read(cities_US_filename))

            if self.verbose:
                print('Loading admin1 codes...')
            admin1_map = {}
            t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t')
            for row in t_rows:
                admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                    ADMIN_COLUMNS['asciiName']]

            if self.verbose:
                print('Loading admin2 codes...')
            admin2_map = {}
            for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'):
                admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                    ADMIN_COLUMNS['asciiName']]

            if self.verbose:
                print('Creating formatted geocoded file...')
            writer = csv.DictWriter(open(local_filename, 'wt'),
                                    fieldnames=RG_COLUMNS)
            rows = []
            for row in csv.reader(open(cities_US_filename, 'rt'), \
                    delimiter='\t', quoting=csv.QUOTE_NONE):
                lat = row[GN_COLUMNS['latitude']]
                lon = row[GN_COLUMNS['longitude']]
                name = row[GN_COLUMNS['asciiName']]
                cc = row[GN_COLUMNS['countryCode']]

                admin1_c = row[GN_COLUMNS['admin1Code']]
                admin2_c = row[GN_COLUMNS['admin2Code']]

                cc_admin1 = cc + '.' + admin1_c
                cc_admin2 = cc + '.' + admin1_c + '.' + admin2_c

                admin1 = ''
                admin2 = ''

                if cc_admin1 in admin1_map:
                    admin1 = admin1_map[cc_admin1]
                if cc_admin2 in admin2_map:
                    admin2 = admin2_map[cc_admin2]

                write_row = {
                    'lat': lat,
                    'lon': lon,
                    'name': name,
                    'admin1': admin1,
                    'admin2': admin2,
                    'cc': cc
                }
                rows.append(write_row)
            writer.writeheader()
            writer.writerows(rows)

            if self.verbose:
                print('Removing extracted cities_US to save space...')
            os.remove(cities_US_filename)

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in rows:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)
        return geo_coords, locations
Esempio n. 2
0
class RGeocoder:
    def __init__(self, mode=2):
        coordinates, self.locations = self.extract(rel_path(RG_FILE))
        self.mode = mode
        if mode == 1:  # Single-process
            self.tree = KDTree(coordinates)
        else:  # Multi-process
            self.tree = KDTree_MP.cKDTree_MP(coordinates)

    def query(self, coordinates):
        try:
            if self.mode == 1:
                distances, indices = self.tree.query(coordinates, k=1)
            else:
                distances, indices = self.tree.pquery(coordinates, k=1)
        except ValueError as e:
            raise e
        else:
            return [self.locations[index] for index in indices]

    def extract(self, local_filename):
        if os.path.exists(local_filename):
            print('Loading formatted geocoded file...')
            rows = csv.DictReader(open(local_filename, 'rt'))
        else:
            gn_cities1000_url = GN_URL + GN_CITIES1000 + '.zip'
            gn_admin1_url = GN_URL + GN_ADMIN1
            gn_admin2_url = GN_URL + GN_ADMIN2

            cities1000_zipfilename = GN_CITIES1000 + '.zip'
            cities1000_filename = GN_CITIES1000 + '.txt'

            if not os.path.exists(cities1000_zipfilename):
                print('Downloading files from Geoname...')
                try:  # Python 3
                    import urllib.request
                    urllib.request.urlretrieve(gn_cities1000_url,
                                               cities1000_zipfilename)
                    urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1)
                    urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2)
                except ImportError:  # Python 2
                    import urllib
                    urllib.urlretrieve(gn_cities1000_url,
                                       cities1000_zipfilename)
                    urllib.urlretrieve(gn_admin1_url, GN_ADMIN1)
                    urllib.urlretrieve(gn_admin2_url, GN_ADMIN2)

            print('Extracting cities1000...')
            z = zipfile.ZipFile(open(cities1000_zipfilename, 'rb'))
            open(cities1000_filename, 'wb').write(z.read(cities1000_filename))

            print('Loading admin1 codes...')
            admin1_map = {}
            t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t')
            for row in t_rows:
                admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                    ADMIN_COLUMNS['asciiName']]

            print('Loading admin2 codes...')
            admin2_map = {}
            for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'):
                admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                    ADMIN_COLUMNS['asciiName']]

            print('Creating formatted geocoded file...')
            writer = csv.DictWriter(open(local_filename, 'wt'),
                                    fieldnames=RG_COLUMNS)
            rows = []
            for row in csv.reader(open(cities1000_filename, 'rt'),
                                  delimiter='\t',
                                  quoting=csv.QUOTE_NONE):
                lat = row[GN_COLUMNS['latitude']]
                lon = row[GN_COLUMNS['longitude']]
                name = row[GN_COLUMNS['asciiName']]
                cc = row[GN_COLUMNS['countryCode']]

                admin1_c = row[GN_COLUMNS['admin1Code']]
                admin2_c = row[GN_COLUMNS['admin2Code']]

                cc_admin1 = cc + '.' + admin1_c
                cc_admin2 = cc + '.' + admin1_c + '.' + admin2_c

                admin1 = ''
                admin2 = ''

                if cc_admin1 in admin1_map:
                    admin1 = admin1_map[cc_admin1]
                if cc_admin2 in admin2_map:
                    admin2 = admin2_map[cc_admin2]

                write_row = {
                    'lat': lat,
                    'lon': lon,
                    'name': name,
                    'admin1': admin1,
                    'admin2': admin2,
                    'cc': cc
                }
                rows.append(write_row)
            writer.writeheader()
            writer.writerows(rows)

            print('Removing extracted cities1000 to save space...')
            os.remove(cities1000_filename)

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in rows:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)
        ecef_coords = geodetic_in_ecef(geo_coords)
        return ecef_coords, locations
Esempio n. 3
0
class OSM_RG:
    def __init__(self, mode=1, precision_mode=2):
        self.mode = mode

        if precision_mode == 0:
            loc_path = RG_FILE_1000
        elif precision_mode == 1:
            loc_path = RG_FILE_5000
        else:
            loc_path = RG_FILE_15000

        if not os.path.exists(loc_path):
            loc_path = rel_path(loc_path)

        # coordinates, locations = self.extract(path)
        coordinates, self.locations = self.extract(loc_path)

        if mode == 1:  # Single-process
            self.tree = KDTree(coordinates)
        else:  # Multi-process
            self.tree = KDTree_MP.cKDTree_MP(coordinates)

    def query(self, coordinates):
        """
        Find closest match to this list of coordinates
        """
        try:
            if self.mode == 1:
                distances, indices = self.tree.query(coordinates, k=1)
            else:
                distances, indices = self.tree.pquery(coordinates, k=1)
        except ValueError as e:
            raise e
        else:
            return [self.locations[index] for index in indices]

    def extract(self, local_filename):
        """
        Extract geocode data from zip
        """
        if os.path.exists(local_filename):
            df = pd.read_csv(local_filename)
            # rows = csv.DictReader(open(local_filename, "rt"))
        elif "rg_cities" in local_filename:
            url_filename = \
                local_filename[
                local_filename.rfind("/") + 4:local_filename.rfind(".")]
            gn_cities_url = GN_URL + url_filename + ".zip"

            cities_zipfilename = url_filename + ".zip"
            cities_filename = url_filename + ".txt"

            if not os.path.exists(cities_zipfilename):
                import urllib.request
                urllib.request.urlretrieve(gn_cities_url,
                                           cities_zipfilename)

            z = zipfile.ZipFile(open(cities_zipfilename, "rb"))
            open(cities_filename, "wb").write(z.read(cities_filename))

            df = pd.read_csv(cities_filename, delimiter="\t",
                             names=sorted(GN_COLUMNS, key=GN_COLUMNS.get))
            df.drop([x for x in GN_COLUMNS if x not in GN_COLUMNS_OF_INTEREST],
                    axis=1, inplace=True)

            geolocator = osm_geolocator()
            address_list = []
            with click.progressbar(length=len(df),
                                   label="reversing geodata") as bar:
                for i, row in df.iterrows():
                    time.sleep(1)
                    bar.update(1)
                    lat = row["latitude"]
                    lon = row["longitude"]

                    address = None
                    while address is None:
                        try:
                            address = geolocator.reverse(
                                (lat, lon), timeout=10,
                                language="en").raw["address"]
                        except Exception as ex:
                            time.sleep(5)
                            address = None

                    address_list.append(address)

            df["address"] = address_list
            df.rename(columns={"latitude": "lat",
                               "longitude": "lon",
                               "geoNameId": "geo_id"},
                      inplace=True)
            df.to_csv(local_filename, index=False)

            os.remove(cities_filename)
            os.remove(cities_zipfilename)

            parse_address(local_filename)
        else:
            raise Exception("Geocoded file not found", local_filename)

        df.dropna(subset=COLUMNS_OF_INTEREST, inplace=True)
        df.drop_duplicates(subset=["city"], inplace=True)
        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for i, row in df.iterrows():
            geo_coords.append((float(row["lat"]), float(row["lon"])))
            locations.append(
                {k: v for k, v in row.items() if k in COLUMNS_OF_INTEREST})
        return geo_coords, locations
Esempio n. 4
0
class RGeocoder:
    def __init__(self,mode=2):
        coordinates, self.locations = self.extract(rel_path(RG_FILE))
        self.mode = mode
        if mode == 1: # Single-process
            self.tree = KDTree(coordinates)
        else: # Multi-process
            self.tree = KDTree_MP.cKDTree_MP(coordinates)
        

    def query(self,coordinates):
        try:
            if self.mode == 1:
                distances,indices = self.tree.query(coordinates,k=1)
            else:
                distances,indices = self.tree.pquery(coordinates,k=1)
        except ValueError as e:
            raise e
        else:
            return [self.locations[index] for index in indices]

    def extract(self,local_filename):
        if os.path.exists(local_filename):
            print('Loading formatted geocoded file...')
            rows = csv.DictReader(open(local_filename,'rt'))
        else:
            gn_cities1000_url = GN_URL + GN_CITIES1000 + '.zip'
            gn_admin1_url = GN_URL + GN_ADMIN1
            gn_admin2_url = GN_URL + GN_ADMIN2

            cities1000_zipfilename = GN_CITIES1000 + '.zip'
            cities1000_filename = GN_CITIES1000 + '.txt'

            if not os.path.exists(cities1000_zipfilename):
                print('Downloading files from Geoname...')
                try: # Python 3
                    import urllib.request
                    urllib.request.urlretrieve(gn_cities1000_url,cities1000_zipfilename)
                    urllib.request.urlretrieve(gn_admin1_url,GN_ADMIN1)
                    urllib.request.urlretrieve(gn_admin2_url,GN_ADMIN2)
                except ImportError: # Python 2
                    import urllib
                    urllib.urlretrieve(gn_cities1000_url,cities1000_zipfilename)
                    urllib.urlretrieve(gn_admin1_url,GN_ADMIN1)
                    urllib.urlretrieve(gn_admin2_url,GN_ADMIN2)


            print('Extracting cities1000...')
            z = zipfile.ZipFile(open(cities1000_zipfilename,'rb'))
            open(cities1000_filename,'wb').write(z.read(cities1000_filename))

            print('Loading admin1 codes...')
            admin1_map = {}
            t_rows = csv.reader(open(GN_ADMIN1,'rt'),delimiter='\t')
            for row in t_rows:
                admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']]

            print('Loading admin2 codes...')
            admin2_map = {}
            for row in csv.reader(open(GN_ADMIN2,'rt'),delimiter='\t'):
                admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[ADMIN_COLUMNS['asciiName']]

            print('Creating formatted geocoded file...')
            writer = csv.DictWriter(open(local_filename,'wt'),fieldnames=RG_COLUMNS)
            rows = []
            for row in csv.reader(open(cities1000_filename,'rt'),delimiter='\t',quoting=csv.QUOTE_NONE):
                lat = row[GN_COLUMNS['latitude']]
                lon = row[GN_COLUMNS['longitude']]
                name = row[GN_COLUMNS['asciiName']]
                cc = row[GN_COLUMNS['countryCode']]

                admin1_c = row[GN_COLUMNS['admin1Code']]
                admin2_c = row[GN_COLUMNS['admin2Code']]

                cc_admin1 = cc+'.'+admin1_c
                cc_admin2 = cc+'.'+admin1_c+'.'+admin2_c

                admin1 = ''
                admin2 = ''

                if cc_admin1 in admin1_map:
                    admin1 = admin1_map[cc_admin1]
                if cc_admin2 in admin2_map:
                    admin2 = admin2_map[cc_admin2]

                write_row = {'lat':lat,'lon':lon,'name':name,'admin1':admin1,'admin2':admin2,'cc':cc}
                rows.append(write_row)
            writer.writeheader()
            writer.writerows(rows)

            print('Removing extracted cities1000 to save space...')
            os.remove(cities1000_filename)

        # Load all the coordinates and locations
        geo_coords,locations = [],[]
        for row in rows:
            geo_coords.append((row['lat'],row['lon']))
            locations.append(row)
        ecef_coords = geodetic_in_ecef(geo_coords)
        return geo_coords,locations
Esempio n. 5
0
class RGeocoderImpl:
    """
    The main reverse geocoder class
    """
    def __init__(self, mode=2, verbose=True, stream=None, stream_columns=None):
        """ Class Instantiation
        Args:`
        mode (int): Library supports the following two modes:
                    - 1 = Single-threaded K-D Tree
                    - 2 = Multi-threaded K-D Tree (Default)
        verbose (bool): For verbose output, set to True
        stream (io.StringIO): An in-memory stream of a custom data source
        """
        self.mode = mode
        self.verbose = verbose
        if stream:
            coordinates, self.locations = self.load(stream, stream_columns)
        else:
            coordinates, self.locations = self.extract(rel_path(RG_FILE))

        if mode == 1:  # Single-process
            self.tree = KDTree(coordinates)
        else:  # Multi-process
            self.tree = KDTree_MP.cKDTree_MP(coordinates)

    @classmethod
    def from_data(cls, data: str):
        return cls(stream=io.StringIO(data))

    @classmethod
    def from_files(cls, location_files: list):
        """ Loading files data into a stream and creating new instance.
        Arguments:
            location_files {list} -- list of files with lat, lon and additional info on the coord
        Returns:
            [RGeocoderImpl]
        """
        data_stream = RGeocoderDataLoader.load_files_stream(location_files)
        return cls(stream=data_stream)

    def query(self, coordinates):
        """
        Function to query the K-D tree to find the nearest city
        Args:
        coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)]
        """
        if self.mode == 1:
            _, indices = self.tree.query(coordinates, k=1)
        else:
            _, indices = self.tree.pquery(coordinates, k=1)
        return [self.locations[index] for index in indices]

    def query_dist(self, coordinates):
        """
        Function to query the K-D tree to find the nearest city
        Args:
        coordinates (list): List of tuple coordinates, i.e. [(latitude, longitude)]
        """
        if self.mode == 1:
            dists, indices = self.tree.query(coordinates, k=1)
        else:
            dists, indices = self.tree.pquery(coordinates, k=1)
            # in pquery dists returns a list of arrays so get the first element instead of returning array
            dists = [dist[0] for dist in dists]
        return [(dists[n], self.locations[index])
                for (n, index) in enumerate(indices)]

    def load(self, stream, stream_columns):
        """
        Function that loads a custom data source
        Args:
        stream (io.StringIO): An in-memory stream of a custom data source.
                              The format of the stream must be a comma-separated file.
        """
        print('Loading geocoded stream ...')
        stream_reader = csv.DictReader(stream, delimiter=',')
        header = stream_reader.fieldnames

        if stream_columns and header != stream_columns:
            raise csv.Error('Input must be a comma-separated file with header containing ' + \
                'the following columns - %s.\nFound header - %s.\nFor more help, visit: ' % (','.join(stream_columns), ','.join(header)) + \
                'https://github.com/thampiman/reverse-geocoder')

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in stream_reader:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)

        return geo_coords, locations

    def extract(self, local_filename):
        """
        Function loads the already extracted GeoNames cities file or downloads and extracts it if
        it doesn't exist locally
        Args:
        local_filename (str): Path to local RG_FILE
        """
        if os.path.exists(local_filename):
            if self.verbose:
                print('Loading formatted geocoded file ...')
            rows = csv.DictReader(open(local_filename, 'rt'))
        else:
            rows = self.do_extract(GN_CITIES1000, local_filename)

        # Load all the coordinates and locations
        geo_coords, locations = [], []
        for row in rows:
            geo_coords.append((row['lat'], row['lon']))
            locations.append(row)
        return geo_coords, locations

    def do_extract(self, geoname_file, local_filename):
        gn_cities_url = GN_URL + geoname_file + '.zip'
        gn_admin1_url = GN_URL + GN_ADMIN1
        gn_admin2_url = GN_URL + GN_ADMIN2

        cities_zipfilename = geoname_file + '.zip'
        cities_filename = geoname_file + '.txt'

        if not os.path.exists(cities_zipfilename):
            if self.verbose:
                print('Downloading files from Geoname...')

            import urllib.request
            urllib.request.urlretrieve(gn_cities_url, cities_zipfilename)
            urllib.request.urlretrieve(gn_admin1_url, GN_ADMIN1)
            urllib.request.urlretrieve(gn_admin2_url, GN_ADMIN2)

        if self.verbose:
            print('Extracting %s...' % geoname_file)
        _z = zipfile.ZipFile(open(cities_zipfilename, 'rb'))
        open(cities_filename, 'wb').write(_z.read(cities_filename))

        if self.verbose:
            print('Loading admin1 codes...')
        admin1_map = {}
        t_rows = csv.reader(open(GN_ADMIN1, 'rt'), delimiter='\t')
        for row in t_rows:
            admin1_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                ADMIN_COLUMNS['asciiName']]

        if self.verbose:
            print('Loading admin2 codes...')
        admin2_map = {}
        for row in csv.reader(open(GN_ADMIN2, 'rt'), delimiter='\t'):
            admin2_map[row[ADMIN_COLUMNS['concatCodes']]] = row[
                ADMIN_COLUMNS['asciiName']]

        if self.verbose:
            print('Creating formatted geocoded file...')
        writer = csv.DictWriter(open(local_filename, 'wt'),
                                fieldnames=RG_COLUMNS)
        rows = []
        for row in csv.reader(open(cities_filename, 'rt'),
                              delimiter='\t',
                              quoting=csv.QUOTE_NONE):
            lat = row[GN_COLUMNS['latitude']]
            lon = row[GN_COLUMNS['longitude']]
            name = row[GN_COLUMNS['asciiName']]
            cc = row[GN_COLUMNS['countryCode']]

            admin1_c = row[GN_COLUMNS['admin1Code']]
            admin2_c = row[GN_COLUMNS['admin2Code']]

            cc_admin1 = cc + '.' + admin1_c
            cc_admin2 = cc + '.' + admin1_c + '.' + admin2_c

            admin1 = ''
            admin2 = ''

            if cc_admin1 in admin1_map:
                admin1 = admin1_map[cc_admin1]
            if cc_admin2 in admin2_map:
                admin2 = admin2_map[cc_admin2]

            write_row = {
                'lat': lat,
                'lon': lon,
                'name': name,
                'admin1': admin1,
                'admin2': admin2,
                'cc': cc
            }
            rows.append(write_row)
        writer.writeheader()
        writer.writerows(rows)

        if self.verbose:
            print('Removing extracted %s to save space...' % geoname_file)
        os.remove(cities_filename)

        return rows