def __init__(self,
              database_file=os.path.join(get_data_dir(),
                                         'address_to_coords.csv')):
     self.nominatim = Nominatim(user_agent='supplier_map')
     self.ban = BANFrance()
     if os.path.exists(database_file):
         self.database_file = database_file
         self.database = pd.read_csv(self.database_file,
                                     encoding='iso-8859-1',
                                     sep=';')
     else:
         self.database = pd.DataFrame()
         self.database_file = database_file if database_file is not None else 'address_to_coords.csv'
Ejemplo n.º 2
0
def geocode_with_Ban(	address, 
											time_out	= dft_timeout, 
										) :
	geocoder_ban = BANFrance(user_agent="solidata_app_to_BAN")
	log.debug("- geocode_with_Ban - ")
	try:
		loc = geocoder_ban.geocode(
							query=address, 
							timeout=time_out, 
							# exactly_one=True,
		)
		log.debug("- loc : \n%s ", loc)
		return loc
	except GeocoderTimedOut:
		return geocode_with_Ban(address)	
	except : 
		pass
class AddressToCoords:
    def __init__(self,
                 database_file=os.path.join(get_data_dir(),
                                            'address_to_coords.csv')):
        self.nominatim = Nominatim(user_agent='supplier_map')
        self.ban = BANFrance()
        if os.path.exists(database_file):
            self.database_file = database_file
            self.database = pd.read_csv(self.database_file,
                                        encoding='iso-8859-1',
                                        sep=';')
        else:
            self.database = pd.DataFrame()
            self.database_file = database_file if database_file is not None else 'address_to_coords.csv'

    def get_online_coordinates(self, address, country='France'):
        time.sleep(1.1)  # Respect the API condition
        location = None
        if country == 'France':
            try:
                location = self.ban.geocode(address)
            except GeocoderTimedOut:
                location = None
            print(f'Adress {address} sought with BAN')
        if location is None:
            try:
                location = self.nominatim.geocode(address)
            except GeocoderTimedOut:
                location = None
            print(f'Adress {address} sought with Nominatim')
            if location is None:
                print(f'Adress not found for : {address}')
                return None, None
        self.database = pd.concat([
            self.database,
            pd.DataFrame(data=[(address, location.latitude, location.longitude)
                               ],
                         columns=['Address', 'lat', 'lon'])
        ],
                                  axis=0)
        return location.latitude, location.longitude

    def save_database(self):
        self.database.round(4).to_csv(self.database_file, index=None, sep=';')

    def get_coordinates(self, address, country):
        if 'Address' in self.database and address in self.database[
                'Address'].values:
            idx = self.database[self.database['Address'] == address].index
            return self.database.loc[idx, 'lat'].values[0], self.database.loc[
                idx, 'lon'].values[0]
        return self.get_online_coordinates(address, country)
Ejemplo n.º 4
0
 def make_geocoder(cls, **kwargs):
     return BANFrance(timeout=10, **kwargs)
Ejemplo n.º 5
0
 async def test_user_agent_custom(self):
     geocoder = BANFrance(
         user_agent='my_user_agent/1.0'
     )
     assert geocoder.headers['User-Agent'] == 'my_user_agent/1.0'
Ejemplo n.º 6
0
 def setUpClass(cls):
     cls.delta = 0.04
     cls.geocoder = BANFrance(timeout=10)
Ejemplo n.º 7
0
 def test_user_agent_custom(self):
     geocoder = BANFrance(user_agent='my_user_agent/1.0')
     self.assertEqual(geocoder.headers['User-Agent'], 'my_user_agent/1.0')
Ejemplo n.º 8
0
    def __geolocation(self):

        self.__dict_id['adresse1'] = None
        self.__dict_id['adresse2'] = None
        self.__dict_id['ville'] = None
        self.__dict_id['code_postal'] = None
        self.__dict_id['altitude'] = None
        self.__dict_id['longitude'] = None
        self.__dict_id['latitude'] = None

        if 'localisation' in self.__request_json:
            # latitude, longitude
            if 'geolocalisation' in self.__request_json['localisation']:
                if 'geoJson' in self.__request_json['localisation']['geolocalisation']:
                    if 'coordinates' in self.__request_json['localisation']['geolocalisation']['geoJson']:
                        self.__dict_id['longitude'] = self.__request_json['localisation']['geolocalisation']['geoJson']['coordinates'][0]
                        self.__dict_id['latitude'] = self.__request_json['localisation']['geolocalisation']['geoJson']['coordinates'][1]
            # adresse1, adresse2
            if 'adresse' in self.__request_json['localisation']:
                if 'adresse1' in self.__request_json['localisation']['adresse']:
                    self.__dict_id['adresse1'] = self.__request_json['localisation']['adresse']['adresse1']
                if 'adresse2' in self.__request_json['localisation']['adresse']:
                    self.__dict_id['adresse2'] = self.__request_json['localisation']['adresse']['adresse2']
            # ville
            if 'adresse' in self.__request_json['localisation']:
                if 'commune' in self.__request_json['localisation']['adresse']:
                    if 'codePostal' in self.__request_json['localisation']['adresse']:
                        self.__dict_id['code_postal'] = self.__request_json['localisation']['adresse']['codePostal']
                        if 'nom' in self.__request_json['localisation']['adresse']['commune']:
                            self.__dict_id['ville'] = self.__request_json['localisation']['adresse']['commune']['nom']
            # altitude
            if 'informations' in self.__request_json:
                if 'structureGestion' in self.__request_json['informations']:
                    if 'geolocalisation' in self.__request_json['informations']['structureGestion']:
                        if 'altitude' in self.__request_json['informations']['structureGestion']['geolocalisation']:
                            self.__dict_id['altitude'] = self.__request_json['informations'][
                                'structureGestion']['geolocalisation']['altitude']
            if self.__dict_id['altitude'] is None:
                if 'localisation' in self.__request_json:
                    if 'geolocalisation' in self.__request_json['localisation']:
                        if 'altitude' in self.__request_json['localisation']['geolocalisation']:
                            self.__dict_id['altitude'] = self.__request_json['localisation']['geolocalisation']['altitude']
            # latitude, longitude
            if 'localisation' in self.__request_json:
                if 'geolocalisation' in self.__request_json['localisation']:
                    if 'geoJson' in self.__request_json['localisation']['geolocalisation']:
                        if 'coordinates' in self.__request_json['localisation']['geolocalisation']['geoJson']:
                            self.__dict_id['longitude'] = self.__request_json['localisation']['geolocalisation']['geoJson']['coordinates'][0]
                            self.__dict_id['latitude'] = self.__request_json['localisation']['geolocalisation']['geoJson']['coordinates'][1]

        if self.__dict_id['longitude'] is None or self.__dict_id['latitude'] is None:
            # geolocator = Nominatim(
            #     timeout=10, user_agent="cooltogo_api_backend")
            geolocator = BANFrance(
                domain='api-adresse.data.gouv.fr', timeout=10)
            address_to_geolocalize = ""
            if self.__dict_id['adresse1'] is not None:
                address_to_geolocalize += " " + self.__dict_id['adresse1']
            if self.__dict_id['adresse2'] is not None:
                address_to_geolocalize += " " + self.__dict_id['adresse2']
            if self.__dict_id['code_postal'] is not None:
                address_to_geolocalize += " " + self.__dict_id['code_postal']
            if self.__dict_id['ville'] is not None:
                address_to_geolocalize += " " + self.__dict_id['ville']
            try:
                geocode = RateLimiter(
                    geolocator.geocode, min_delay_seconds=2, max_retries=4, error_wait_seconds=10.0, swallow_exceptions=True, return_value_on_exception=None)
                location = geocode(address_to_geolocalize)
                if location is not None:
                    self.__dict_id['latitude'] = location.latitude
                    self.__dict_id['longitude'] = location.longitude
                    FileLogger.log(
                        logging.DEBUG, f"{address_to_geolocalize} resolved with latitute {location.latitude} and longitute {location.longitude}")
                else:
                    FileLogger.log(
                        logging.DEBUG, f"{address_to_geolocalize} not resolved !!!!")
            except (GeocoderTimedOut, GeocoderUnavailable, GeocoderQuotaExceeded) as e:
                FileLogger.log(logging.ERROR, "Error: geocode failed on input %s with message %s" %
                               (address_to_geolocalize, str(e)))
##################################################################
# IMPORTS
##################################################################
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
tqdm().pandas()

from geopy.geocoders import BANFrance
from geopy.exc import GeocoderTimedOut
geolocator = BANFrance()

import os

import requests
from bs4 import BeautifulSoup
import re
from unidecode import unidecode

from datetime import datetime as dt

##################################################################

def clean(data_folder='data',
          guy_hoquet_path,
          laforet_path,
          orpi_path):

    # Utility functions
    def print_shape(df):
        print(f'Number of rows    = {df.shape[0]:,}')