Exemple #1
0
    def geocode_siae(self, siae):
        assert siae.address_on_one_line

        geocoding_data = get_geocoding_data(siae.address_on_one_line,
                                            post_code=siae.post_code)

        if not geocoding_data:
            self.stderr.write(
                f"No geocoding data found for siae.external_id={siae.external_id}"
            )
        else:
            siae.geocoding_score = geocoding_data["score"]
            # If the score is greater than API_BAN_RELIABLE_MIN_SCORE, coords are reliable:
            # use data returned by the BAN API because it's better written using accents etc.
            # while the source data is in all caps etc.
            # Otherwise keep the old address (which is probably wrong or incomplete).
            if siae.geocoding_score >= API_BAN_RELIABLE_MIN_SCORE:
                siae.address_line_1 = geocoding_data["address_line_1"]
            else:
                self.stderr.write(
                    f"Geocoding not reliable for siae.external_id={siae.external_id}"
                )
            # City is always good due to `postcode` passed in query.
            # ST MAURICE DE REMENS => Saint-Maurice-de-Rémens
            siae.city = geocoding_data["city"]

            siae.coords = geocoding_data["coords"]

        return siae
Exemple #2
0
 def set_coords(self, address, post_code=None):
     geocoding_data = get_geocoding_data(address, post_code=post_code)
     if not geocoding_data:
         logger.error("No geocoding data could be found for `%s - %s`",
                      address, post_code)
         return
     self.coords = geocoding_data["coords"]
     self.geocoding_score = geocoding_data["score"]
Exemple #3
0
 def set_coords_and_address(self, address, post_code=None):
     geocoding_data = get_geocoding_data(address, post_code=post_code)
     if not geocoding_data:
         logger.error("No geocoding data could be found for `%s - %s`",
                      address, post_code)
         return
     self.coords = geocoding_data["coords"]
     self.geocoding_score = geocoding_data["score"]
     self.address_line_1 = geocoding_data["address_line_1"]
     self.address_line_2 = ""
     self.post_code = geocoding_data["post_code"]
     self.city = geocoding_data["city"]
Exemple #4
0
    def clean_siret(self):
        # `max_length` is skipped so that we can allow an arbitrary number of spaces in the user-entered value.
        siret = self.cleaned_data["siret"].replace(" ", "")
        validate_siret(siret)

        # Fetch name and address from API entreprise.
        etablissement, error = etablissement_get_or_error(siret)
        if error:
            raise forms.ValidationError(error)

        if etablissement.is_closed:
            raise forms.ValidationError(
                "La base Sirene indique que l'établissement est fermé.")

        # Perform another API call to fetch geocoding data.
        address_fields = [
            etablissement.address_line_1,
            # `address_line_2` is omitted on purpose because it tends to return no results with the BAN API.
            etablissement.post_code,
            etablissement.city,
            etablissement.department,
        ]
        address_on_one_line = ", ".join(
            [field for field in address_fields if field])
        geocoding_data = get_geocoding_data(
            address_on_one_line, post_code=etablissement.post_code) or {}

        self.org_data = {
            "siret": siret,
            "is_head_office": etablissement.is_head_office,
            "name": etablissement.name,
            "address_line_1": etablissement.address_line_1,
            "address_line_2": etablissement.address_line_2,
            "post_code": etablissement.post_code,
            "city": etablissement.city,
            "department": etablissement.department,
            "longitude": geocoding_data.get("longitude"),
            "latitude": geocoding_data.get("latitude"),
            "geocoding_score": geocoding_data.get("score"),
        }

        return siret
Exemple #5
0
def geocode_siae(siae):
    if siae.geocoding_address is None:
        return siae

    geocoding_data = get_geocoding_data(siae.geocoding_address,
                                        post_code=siae.post_code)

    if geocoding_data:
        siae.geocoding_score = geocoding_data["score"]
        # If the score is greater than API_BAN_RELIABLE_MIN_SCORE, coords are reliable:
        # use data returned by the BAN API because it's better written using accents etc.
        # while the source data is in all caps etc.
        # Otherwise keep the old address (which is probably wrong or incomplete).
        if siae.geocoding_score >= AddressMixin.API_BAN_RELIABLE_MIN_SCORE:
            siae.address_line_1 = geocoding_data["address_line_1"]
        # City is always good due to `postcode` passed in query.
        # ST MAURICE DE REMENS => Saint-Maurice-de-Rémens
        siae.city = geocoding_data["city"]

        siae.coords = geocoding_data["coords"]

    return siae
Exemple #6
0
    def handle(self, dry_run=False, **options):

        self.set_logger(options.get("verbosity"))

        with open(CSV_FILE) as csvfile:

            # Count lines in CSV.
            reader = csv.reader(csvfile, delimiter=";")
            row_count = sum(1 for row in reader)
            last_progress = 0
            # Reset the iterator to iterate through the reader again.
            csvfile.seek(0)

            for i, row in enumerate(reader):

                if i == 0:
                    # Skip CSV header.
                    continue

                progress = int((100 * i) / row_count)
                if progress > last_progress + 5:
                    self.stdout.write(f"Creating SIAEs… {progress}%")
                    last_progress = progress

                self.logger.debug("-" * 80)

                siret = row[7]
                self.logger.debug(siret)
                assert len(siret) == 14

                naf = row[5]
                self.logger.debug(naf)
                assert len(naf) == 5

                kind = row[0]
                self.logger.debug(kind)
                assert kind in KINDS

                # Max length of `name` is 50 chars in the source file, some are truncated.
                # Also `name` is in upper case.
                name = row[8].strip()
                name = " ".join(
                    name.split())  # Replace multiple spaces by a single space.
                self.logger.debug(name)

                email = row[14].strip()
                self.logger.debug(email)
                assert " " not in email

                street_num = row[9].strip().replace(" ", "")
                street_name = row[10].strip().lower()
                street_name = " ".join(street_name.split(
                ))  # Replace multiple spaces by a single space.
                address_line_1 = f"{street_num} {street_name}"
                address_line_1 = " ".join(address_line_1.split(
                ))  # Replace multiple spaces by a single space.
                address_line_2 = ""
                if " - " in address_line_1:
                    addresses = address_line_1.split(" - ")
                    address_line_1 = addresses[0]
                    address_line_2 = addresses[1]
                self.logger.debug(address_line_1)
                self.logger.debug(address_line_2)

                # Fields are identical, we can use one or another.
                post_code = row[3].strip()
                post_code2 = row[11].strip()
                self.logger.debug(post_code)
                assert post_code == post_code2

                # Fields are identical, we can use one or another.
                city = row[4].strip()
                city_name = row[12].strip()
                self.logger.debug(city)
                assert city_name == city

                department = row[1]
                if department[0] == "0":
                    department = department[1:]
                if department in ["59L", "59V"]:
                    department = "59"
                if department not in [
                        "2A", "2B"
                ] and not post_code.startswith(department):
                    # Fix wrong departments using the post code.
                    department = post_code[:len(department)]
                self.logger.debug(department)
                assert department in DEPARTMENTS

                siae_info = f"{siret} {name} - {address_line_1} - {post_code} {city}."

                phone = row[13].strip().replace(" ", "")
                if phone and len(phone) != 10:
                    self.stderr.write(f"Wrong phone `{phone}`. {siae_info}.")
                    phone = ""
                self.logger.debug(phone)

                if siret in SEEN_SIRET:
                    # First come, first served.
                    self.stderr.write(
                        f"Siret already seen. Skipping {siae_info}.")
                    continue
                SEEN_SIRET.add(siret)

                if not dry_run:

                    siae = Siae()
                    siae.siret = siret
                    siae.naf = naf
                    siae.kind = kind
                    siae.source = Siae.SOURCE_ASP
                    siae.name = name
                    siae.phone = phone
                    siae.email = email
                    siae.address_line_1 = address_line_1
                    siae.address_line_2 = address_line_2
                    siae.post_code = post_code
                    siae.city = city
                    siae.department = department

                    if siae.address_on_one_line:

                        geocoding_data = get_geocoding_data(
                            siae.address_on_one_line, post_code=siae.post_code)

                        if not geocoding_data:
                            self.stderr.write(
                                f"No geocoding data found for {siae_info}")
                            siae.save()
                            continue

                        siae.geocoding_score = geocoding_data["score"]
                        # If the score is greater than API_BAN_RELIABLE_MIN_SCORE, coords are reliable:
                        # use data returned by the BAN API because it's better written using accents etc.
                        # while the source data is in all caps etc.
                        # Otherwise keep the old address (which is probably wrong or incomplete).
                        if siae.geocoding_score >= API_BAN_RELIABLE_MIN_SCORE:
                            siae.address_line_1 = geocoding_data[
                                "address_line_1"]
                        else:
                            self.stderr.write(
                                f"Geocoding not reliable for {siae_info}")
                        # City is always good due to `postcode` passed in query.
                        # ST MAURICE DE REMENS => Saint-Maurice-de-Rémens
                        siae.city = geocoding_data["city"]

                        self.logger.debug("-" * 40)
                        self.logger.debug(siae.address_line_1)
                        self.logger.debug(siae.city)

                        siae.coords = geocoding_data["coords"]

                    siae.save()

        self.stdout.write("-" * 80)
        self.stdout.write("Done.")
Exemple #7
0
def format_address(obj):
    """
    Formats the address contained in obj into a valid address "structure" for ASP ER exports.

    Heavily relies on geo.api.gouv.fr API to do parts of the job for us:
    - extracting lane number and extension
    - giving a geocoding score / threshold in order to improve an existing DB address
    - validation of a real-world address

    Employee records ("Fiches salarié") contains 2 addresses of this kind.

    See validation of ASP address for expected/valid fields.

    Output fields:
    - number (opt.): number in the lane
    - std_extension (opt.): One of the ASP ref lane extension (see LaneExtension)
    - non_std_extension (opt.): if another extension is detected
    - lane: name of the lane
    - lane_type: One of the ASP ref lane type (see LaneType)
    - additional_address : further details on the address (if available)
    - city: name of city
    - post_code: postal code
    - insee_code: INSEE code of the city (Itou)

    INSEE code can be checked against ASP ref for further validation.

    Returns a (result,error) tuple:
    - OK => (result_dict, None),
    - KO => (None, error_message)
    """
    if not obj:
        return None, ERROR_HEXA_CONVERSION

    # Do we have enough data to make an extraction?
    if not obj.post_code or not obj.address_line_1:
        return None, ERROR_INCOMPLETE_ADDRESS_DATA

    # first we use geo API to get a 'lane' and a number
    address = get_geocoding_data(obj.address_line_1, post_code=obj.post_code)

    if not address:
        return None, ERROR_GEOCODING_API

    # Default values
    additional_address = unidecode(obj.address_line_2)
    result = {
        "number": "",
        "non_std_extension": "",
        "additional_address": additional_address if re.match(ADDITIONAL_ADDRESS_RE, additional_address) else "",
    }

    # Street extension processing (bis, ter ...)
    # Extension is part of the resulting streetnumber geo API field
    number_plus_ext = address.get("number")

    if number_plus_ext:
        # API change : now extension can be "stuck" to lane number
        # This was not he case before (space in between)
        # REGEX to the rescue to fix ASP error 3323
        [[number, extension]] = re.findall(LANE_NUMBER_RE, number_plus_ext)

        if number:
            result["number"] = number

        if extension:
            extension = extension[0]
            ext = LaneExtension.with_similar_name_or_value(extension)
            if ext:
                result["std_extension"] = ext.name or ""
            else:
                result["non_std_extension"] = extension.upper()

    lane = None
    if not address.get("lane") and not address.get("address"):
        return None, ERROR_UNKNOWN_ADDRESS_LANE

    lane = address.get("lane") or address.get("address")
    lane = unidecode(lane)
    result["lane"] = lane

    # Lane type processing (Avenue, RUe, Boulevard ...)
    lane_type, *rest = lane.split(maxsplit=1)

    lt = (
        # The API field is similar to know lane type,
        # example: got "Av" for name "AV" (Avenue)
        LaneType.with_similar_name(lane_type)
        # The API field is similar to an exiting value
        # example: got "allee" for "Allée"
        or LaneType.with_similar_value(lane_type)
        # Maybe the geo API mispelled the lane type (happens sometimes)
        # so we use an aliases table as a last change to get the type
        # example: got "R" or "r" instead of "Rue"
        or find_lane_type_aliases(lane)
    )

    if lt:
        result["lane_type"] = lt.name
        # If split is successful, then we can strip the lane type
        # from the lane name for a better result
        result["lane"] = rest[0] if rest else lane_type
    else:
        return None, f"Impossible de trouver le type de voie : {lane_type} pour l'adresse : {address}"

    # INSEE code: must double check with ASP ref file
    result["insee_code"] = address.get("insee_code")
    result["post_code"] = address.get("post_code")
    result["city"] = address.get("city")

    return result, None
Exemple #8
0
    def handle(self, dry_run=False, **options):

        self.set_logger(options.get("verbosity"))

        with open(CSV_FILE) as csvfile:

            reader = csv.reader(csvfile, delimiter=";")

            for i, row in enumerate(reader):

                if i == 0:
                    # Skip CSV header.
                    continue

                self.logger.debug("-" * 80)

                name = row[0].strip()
                name = " ".join(
                    name.split())  # Replace multiple spaces by a single space.
                self.logger.debug(name)

                address_line_1 = row[1].strip()
                address_line_1 = " ".join(address_line_1.split())
                self.logger.debug(address_line_1)

                address_line_2 = row[2].strip()
                address_line_2 = " ".join(address_line_2.split())
                self.logger.debug(address_line_2)

                city = row[3].strip()
                city = " ".join(city.split())
                self.logger.debug(city)

                post_code = row[4].strip()
                post_code = " ".join(post_code.split())
                self.logger.debug(post_code)

                email = row[5].strip()
                self.logger.debug(email)
                assert " " not in email

                phone = row[6].strip().replace(" ", "")
                assert len(phone) == 10
                self.logger.debug(phone)

                naf = row[7]
                self.logger.debug(naf)
                if naf:
                    assert len(naf) == 5

                siret = row[8].strip()
                self.logger.debug(siret)
                assert len(siret) == 14

                if siret in SEEN_SIRET:
                    self.stderr.write(f"Siret already seen. Skipping.")
                    continue
                SEEN_SIRET.add(siret)

                if post_code.startswith("20"):
                    if post_code.startswith("200") or post_code.startswith(
                            "201"):
                        department = "2A"
                    elif post_code.startswith("202"):
                        department = "2B"
                elif post_code.startswith("97") or post_code.startswith("98"):
                    department = post_code[:3]
                else:
                    department = post_code[:2]
                self.logger.debug(department)
                assert department in DEPARTMENTS

                siae_info = f"{siret} {name} - {address_line_1} - {post_code} {city}."

                self.logger.debug(siae_info)

                if not dry_run:

                    siae = Siae()
                    siae.siret = siret
                    siae.naf = naf
                    siae.kind = Siae.KIND_GEIQ
                    siae.source = Siae.SOURCE_GEIQ
                    siae.name = name
                    siae.phone = phone
                    siae.email = email
                    siae.address_line_1 = address_line_1
                    siae.address_line_2 = address_line_2
                    siae.post_code = post_code
                    siae.city = city
                    siae.department = department

                    if siae.address_on_one_line:

                        geocoding_data = get_geocoding_data(
                            siae.address_on_one_line, post_code=siae.post_code)

                        if (not geocoding_data or geocoding_data["score"] <
                                API_BAN_RELIABLE_MIN_SCORE):
                            geocoding_data = get_geocoding_data(
                                siae.address_on_one_line,
                                post_code=f"{siae.post_code[:2]}000",
                            )

                        if (not geocoding_data or geocoding_data["score"] <
                                API_BAN_RELIABLE_MIN_SCORE):
                            geocoding_data = get_geocoding_data(
                                siae.address_on_one_line)

                        if (not geocoding_data or geocoding_data["score"] <
                                API_BAN_RELIABLE_MIN_SCORE):
                            geocoding_data = get_geocoding_data(
                                siae.address_line_1)

                        if (not geocoding_data or geocoding_data["score"] <
                                API_BAN_RELIABLE_MIN_SCORE):
                            geocoding_data = get_geocoding_data(
                                siae.address_line_2)

                        if not geocoding_data:
                            self.stderr.write(
                                f"No geocoding data found for {siae_info}")
                            siae.save()
                            continue

                        siae.geocoding_score = geocoding_data["score"]
                        # If the score is greater than API_BAN_RELIABLE_MIN_SCORE, coords are reliable:
                        # use data returned by the BAN API because it's better written using accents etc.
                        # while the source data is in all caps etc.
                        # Otherwise keep the old address (which is probably wrong or incomplete).
                        if siae.geocoding_score >= API_BAN_RELIABLE_MIN_SCORE:
                            siae.address_line_1 = geocoding_data[
                                "address_line_1"]
                            siae.city = geocoding_data["city"]
                        else:
                            self.stderr.write(
                                f"Geocoding not reliable for {siae_info}\n{siae.address_on_one_line}"
                            )

                        self.logger.debug("-" * 40)
                        self.logger.debug(siae.address_line_1)
                        self.logger.debug(siae.city)

                        siae.coords = geocoding_data["coords"]

                    siae.save()

        self.stdout.write("-" * 80)
        self.stdout.write("Done.")
Exemple #9
0
    def handle(self, dry_run=False, **options):

        self.set_logger(options.get("verbosity"))

        with open(CSV_FILE) as csvfile:

            # Count lines in CSV.
            reader = csv.reader(csvfile, delimiter=",")
            row_count = sum(1 for row in reader)
            last_progress = 0
            # Reset the iterator to iterate through the reader again.
            csvfile.seek(0)

            for i, row in enumerate(reader):

                if i == 0:
                    # Skip CSV header.
                    continue

                progress = int((100 * i) / row_count)
                if progress > last_progress + 5:
                    self.stdout.write(
                        f"Creating prescriber organizations… {progress}%")
                    last_progress = progress

                self.logger.debug("-" * 80)

                city = row[7].strip()
                self.logger.debug(city)

                department = row[0].strip()
                assert department in DEPARTMENTS
                self.logger.debug(department)

                name = row[1].strip()
                if name == "MISSION LOCALE":
                    name = f"{name} - {city}"
                elif name in [
                        "CAP EMPLOI",
                        "DIRECTION TERRITORIALE DE LA PROTECTION JUDICIAIRE DE LA JEUNESSE",
                        "POLE EMPLOI",
                        "SERVICE PENITENTIAIRE D'INSERTION ET DE PROBATION",
                ]:
                    name = f"{name} - {department}"
                self.logger.debug(name)

                phone = row[10].strip()
                if phone:
                    assert len(phone) == 10
                    self.logger.debug(phone)

                email = row[9].strip()
                self.logger.debug(email)

                website = row[11].strip()
                self.logger.debug(website)

                post_code = row[6].strip()
                self.logger.debug(post_code)

                address_line_1 = row[2].strip()
                self.logger.debug(address_line_1)

                complement = row[3].strip()
                complement_2 = row[4].strip()
                bp_cs = row[5].strip()
                cedex = row[8].strip()
                address_line_2 = [complement, complement_2, bp_cs, cedex]
                address_line_2 = " - ".join(item for item in address_line_2
                                            if item)
                self.logger.debug(address_line_2)

                if not dry_run:

                    prescriber_organization = PrescriberOrganization()

                    prescriber_organization.is_authorized = True
                    prescriber_organization.name = name
                    prescriber_organization.phone = phone
                    prescriber_organization.email = email
                    prescriber_organization.website = website
                    prescriber_organization.address_line_1 = address_line_1
                    prescriber_organization.address_line_2 = address_line_2
                    prescriber_organization.post_code = post_code
                    prescriber_organization.city = city
                    prescriber_organization.department = department

                    geocoding_data = get_geocoding_data("{}, {} {}".format(
                        prescriber_organization.address_line_1,
                        prescriber_organization.post_code,
                        prescriber_organization.city,
                    ))
                    prescriber_organization.coords = geocoding_data["coords"]

                    prescriber_organization.save()

        self.stdout.write("-" * 80)
        self.stdout.write("Done.")