Example #1
0
    def c4_create_siae(self, c1_siae, dry_run):
        """
        Here we create a new Siae with C1 data
        """
        self.stdout.write("Creating Siae...")

        # clean fields
        rename_dict_key(c1_siae, "id", "c1_id")

        # init fields
        c1_siae["description"] = ""
        c1_siae["contact_website"] = c1_siae["website"]
        c1_siae["contact_email"] = c1_siae["admin_email"] or c1_siae["email"]
        c1_siae["contact_phone"] = c1_siae["phone"]

        # TODO: call API Entreprise

        # other fields
        c1_siae["is_delisted"] = False

        # create object
        if not dry_run:
            siae = Siae.objects.create(**c1_siae)
            self.add_siae_to_contact_list(siae)
            self.stdout.write(
                f"New Siae created / {siae.id} / {siae.name} / {siae.siret}")
    def migrate_siae_label(self, cur):
        """
        Migrate SiaeLabel data
        """
        print("-" * 80)
        print("Migrating SiaeLabel...")

        SiaeLabel.objects.all().delete()

        cur.execute("SELECT * FROM directory_label")
        resp = cur.fetchall()

        for elem in resp:
            # rename fields
            rename_dict_key(elem, "directory_id", "siae_id")

            # cleanup fields
            cleanup_date_field_names(elem)
            make_aware_dates(elem)

            # remove useless keys
            [elem.pop(key) for key in ["id"]]

            # create object
            SiaeLabel.objects.create(**elem)

        print(f"Created {SiaeLabel.objects.count()} labels !")
    def migrate_siae_client_reference_logo(self, cur):
        """
        Migrate SiaeClientReference data
        """
        print("-" * 80)
        print("Migrating SiaeClientReference...")

        SiaeClientReference.objects.all().delete()

        cur.execute("SELECT * FROM directory_client_image")
        resp = cur.fetchall()

        for elem in resp:
            # cleanup dates
            cleanup_date_field_names(elem)
            make_aware_dates(elem)

            # rename fields
            rename_dict_key(elem, "name", "image_name")
            rename_dict_key(elem, "description", "name")
            rename_dict_key(elem, "position", "order")
            rename_dict_key(elem, "directory_id", "siae_id")

            # remove useless keys
            [elem.pop(key) for key in ["id"]]

            # create object
            SiaeClientReference.objects.create(**elem)

        print(
            f"Created {SiaeClientReference.objects.count()} client references !"
        )
def extract_duplicates(esat_list):
    esat_siret_list = list()
    for esat in esat_list:
        rename_dict_key(esat, "Raison Sociale", "name")
        rename_dict_key(esat, "Siret", "siret")
        if "siret" in esat:
            esat["siret"] = esat["siret"].replace(" ", "")
        esat_siret_index = next((index
                                 for (index, s) in enumerate(esat_siret_list)
                                 if s["siret"] == esat["siret"]), None)
        if esat_siret_index is None:
            esat_siret_list.append({
                "name": esat["name"],
                "siret": esat["siret"]
            })
        else:
            print("===========")
            print("Current", esat["name"], esat["siret"])
            print("Duplicate", esat_siret_list[esat_siret_index]["name"],
                  esat_siret_list[esat_siret_index]["siret"])
    def migrate_user(self, cur):
        """
        Migrate User data
        """
        print("-" * 80)
        print("Migrating User...")

        User.objects.filter(api_key__isnull=True).delete()
        reset_app_sql_sequences("users")

        cur.execute("SELECT * FROM user")
        resp = cur.fetchall()

        for elem in resp:
            # rename fields
            rename_dict_key(elem, "enabled", "is_active")
            rename_dict_key(elem, "id", "c4_id")
            rename_dict_key(elem, "phone_prefix", "c4_phone_prefix")
            rename_dict_key(elem, "time_zone", "c4_time_zone")
            rename_dict_key(elem, "website", "c4_website")
            rename_dict_key(elem, "siret", "c4_siret")
            rename_dict_key(elem, "naf", "c4_naf")
            rename_dict_key(elem, "phone_verified", "c4_phone_verified")
            rename_dict_key(elem, "email_verified", "c4_email_verified")
            rename_dict_key(elem, "id_card_verified", "c4_id_card_verified")
            # rename_dict_key(elem, "accept_survey", "c4_accept_survey")
            # rename_dict_key(elem, "accept_rgpd", "c4_accept_rgpd")
            rename_dict_key(elem, "offers_for_pro_sector",
                            "accept_offers_for_pro_sector")
            rename_dict_key(elem, "quote_promise", "accept_quote_promise")

            # cleanup fields
            cleanup_date_field_names(elem)
            make_aware_dates(elem)
            integer_to_boolean(elem)

            # cleanup person_type
            if "person_type" in elem:
                elem["kind"] = map_user_kind(elem["person_type"])

            # set staff users
            if "roles" in elem:
                if elem["roles"].startswith("a:1:{i:0;s:10"):
                    elem["is_staff"] = True
                if elem["roles"].startswith("a:1:{i:0;s:16"):
                    elem["is_superuser"] = True

            # remove useless keys
            [elem.pop(key) for key in USER_EXTRA_KEYS]

            # create object
            # Note: we ignore users with kind=None
            if elem["kind"]:
                try:
                    User.objects.create(**elem)
                except Exception as e:
                    print("a", e)

        print(f"Created {User.objects.count()} users !")
    def migrate_siae_image(self, cur):  # noqa C901
        """
        Migrate SiaeImage data
        - first get list from 'listing'
        - enrich with 'listing_translation'
        - finally get all the images from 'listing_image'

        User -- Listing(s) -- Image(s)
        """
        print("-" * 80)
        print("Migrating SiaeImage...")

        SiaeImage.objects.all().delete()

        siae_listing_list = list()

        cur.execute("SELECT * FROM listing")
        resp = cur.fetchall()

        print(f"Found {len(resp)} Siae listings...")

        for elem in resp:
            # cleanup dates
            cleanup_date_field_names(elem)
            make_aware_dates(elem)

            # rename fields
            rename_dict_key(elem, "id", "listing_id")

            # remove useless keys
            elem_thin = {
                key: elem[key]
                for key in
                ["listing_id", "user_id", "created_at", "updated_at"]
            }
            elem_thin["images"] = list()

            siae_listing_list.append(elem_thin)

        cur.execute("SELECT * FROM listing_translation")
        resp = cur.fetchall()

        for elem in resp:
            # rename fields
            rename_dict_key(elem, "title", "name")

            # remove useless keys
            elem_thin = {
                key: elem[key]
                for key in ["translatable_id", "name", "description"]
            }  # "rules"

            # find corresponding siae_listing item, and enrich it
            siae_listing_index = next(
                (index for (index, si) in enumerate(siae_listing_list)
                 if si["listing_id"] == elem_thin["translatable_id"]),
                None,
            )
            if siae_listing_index:
                siae_listing_list[siae_listing_index] |= elem_thin

        cur.execute("SELECT * FROM listing_image")
        resp = cur.fetchall()

        print(f"Found {len(resp)} Siae images...")

        for elem in resp:
            # rename fields
            rename_dict_key(elem, "name", "image_name")

            # remove useless keys
            elem_thin = {
                key: elem[key]
                for key in ["listing_id", "image_name", "position"]
            }

            # find corresponding siae_listing item, and enrich it
            siae_listing_index = next(
                (index for (index, si) in enumerate(siae_listing_list)
                 if si["listing_id"] == elem_thin["listing_id"]),
                None,
            )
            if siae_listing_index:
                siae_listing_list[siae_listing_index]["images"].append(
                    elem_thin)

        error_count = {
            "listing_without_image": 0,
            "user_not_found": 0,
            "user_no_siae": 0,
            "user_multiple_siae": 0
        }
        for siae_listing in siae_listing_list:
            if not len(siae_listing["images"]):
                # print("images missing", siae_listing)
                error_count["listing_without_image"] += 1
            else:
                for (index, siae_image) in enumerate(siae_listing["images"]):
                    siae_image_dict = siae_listing.copy() | siae_image

                    # rename fields
                    rename_dict_key(siae_image_dict, "listing_id",
                                    "c4_listing_id")
                    rename_dict_key(siae_image_dict, "position", "order")

                    users = User.objects.prefetch_related("siaes").filter(
                        c4_id=siae_image_dict["user_id"])
                    if users.count() == 0:
                        # print("missing user...", siae_image_dict)
                        error_count["user_not_found"] += 1
                    else:
                        if users.first().siaes.count() > 1:
                            # print("which siae?", siae_image_dict)
                            error_count["user_multiple_siae"] += 1
                        elif users.first().siaes.count() == 0:
                            # print("no siae...", siae_image_dict)
                            error_count["user_no_siae"] += 1
                        else:  # count == 1
                            # get siae_id
                            siae = users.first().siaes.first()
                            siae_image_dict["siae_id"] = siae.id

                            # we want to group the images by their listing (by updating their order)
                            # listing_count = Siae

                            # remove useless keys
                            [
                                siae_image_dict.pop(key) for key in
                                ["translatable_id", "user_id", "images"]
                            ]

                            # create object
                            SiaeImage.objects.create(**siae_image_dict)

        print(f"Created {SiaeImage.objects.count()} siae images !")
        print("Errors", error_count)
    def migrate_siae(self, cur):
        """
        Migrate Siae data
        """
        print("-" * 80)
        print("Migrating Siae...")

        Siae.objects.all().delete()

        cur.execute("SELECT * FROM directory ORDER BY is_active DESC")
        resp = cur.fetchall()
        # print(len(resp))

        # s = set([elem["is_qpv"] for elem in resp])
        # print(s)

        # elem = cur.fetchone()
        # print(elem)

        for elem in resp:
            # rename fields
            rename_dict_key(elem, "geo_range", "geo_range_custom_distance")
            rename_dict_key(elem, "pol_range", "geo_range")
            rename_dict_key(elem, "c4_id", "c4_id_old")
            rename_dict_key(elem, "c1_source",
                            "source")  # changed after the migration

            # cleanup fields
            cleanup_date_field_names(elem)
            make_aware_dates(elem)
            integer_to_boolean(elem)

            # cleanup nature
            if "nature" in elem:
                elem["nature"] = map_siae_nature(elem["nature"])

            # cleanup presta_type
            if "presta_type" in elem:
                elem["presta_type"] = map_siae_presta_type(elem["presta_type"])

            # cleanup geo_range
            if "geo_range" in elem:
                elem["geo_range"] = map_geo_range(elem["geo_range"])

            # create coords from latitude & longitude
            if "latitude" in elem and "longitude" in elem:
                if elem["latitude"] and elem["longitude"]:
                    coords = {
                        "type":
                        "Point",
                        "coordinates":
                        [float(elem["longitude"]),
                         float(elem["latitude"])]
                    }
                    elem["coords"] = GEOSGeometry(
                        f"{coords}")  # Feed `GEOSGeometry` with GeoJSON.

            # remove useless keys
            [elem.pop(key) for key in DIRECTORY_EXTRA_KEYS]

            # create object
            try:
                Siae.objects.create(**elem)
            except Exception as e:
                print(e)

        print(f"Created {Siae.objects.count()} siaes !")
Example #8
0
    def import_sep(self, siae, source="sep"):  # noqa C901
        # store raw dict
        siae["import_source"] = source
        siae["import_raw_object"] = siae.copy()

        # defaults
        siae["kind"] = Siae.KIND_SEP
        siae["source"] = Siae.KIND_SEP
        siae["geo_range"] = Siae.GEO_RANGE_DEPARTMENT

        # basic fields
        rename_dict_key(siae, "Raison sociale", "name")
        siae["name"].strip()
        rename_dict_key(siae, "Enseigne", "brand")
        rename_dict_key(siae, "Siret", "siret")
        if "siret" in siae:
            siae["siret"].strip()
            siae["siret"] = siae["siret"].replace(" ", "").replace(" ", "")
            if validate_siret(siae["siret"]):
                siae["siret_is_valid"] = True

        # presta_type
        siae["presta_type"] = list()
        for presta_type_name in PRESTA_TYPE_NAME_LIST:
            if presta_type_name in siae:
                if siae[presta_type_name]:
                    siae["presta_type"].append(
                        PRESTA_TYPE_MAPPING[siae[presta_type_name]])

        # contact fields
        rename_dict_key(siae, "Prénom 1", "contact_first_name")
        rename_dict_key(siae, "Nom 1", "contact_last_name")
        rename_dict_key(siae, "Site internet", "website")
        siae["contact_website"] = siae["website"]
        rename_dict_key(siae, "Email 1", "email")
        siae["contact_email"] = siae["email"]
        rename_dict_key(siae, "Téléphone", "phone")
        siae["phone"].strip()
        siae["contact_phone"] = siae["phone"]

        # geo fields
        rename_dict_key(siae, "Adresse", "address")
        rename_dict_key(siae, "Code Postal", "post_code")
        if "post_code" in siae:
            siae["department"] = department_from_postcode(siae["post_code"])
            siae["region"] = DEPARTMENT_TO_REGION[siae["department"]]
        rename_dict_key(siae, "Ville", "city")

        # enrich with geocoding
        geocoding_data = get_geocoding_data(siae["address"] + " " +
                                            siae["city"],
                                            post_code=siae["post_code"])
        if geocoding_data:
            if siae["post_code"] != geocoding_data["post_code"]:
                if siae["post_code"][:2] == geocoding_data["post_code"][:2]:
                    # update post_code as well
                    siae["coords"] = geocoding_data["coords"]
                    siae["post_code"] = geocoding_data["post_code"]
                else:
                    print(
                        f"Geocoding found a different place,{siae['name']},{siae['post_code']},{geocoding_data['post_code']}"  # noqa
                    )
            else:
                siae["coords"] = geocoding_data["coords"]
        else:
            print(f"Geocoding not found,{siae['name']},{siae['post_code']}")

        # enrich with API Entreprise, API QPV, API ZRR?
        # done in weekly CRON job

        # sectors
        siae_sectors = []
        for sector_name in siae["Secteurs d'act list"]:
            sector = Sector.objects.get(name=sector_name)
            siae_sectors.append(sector)

        # cleanup unused fields
        [
            siae.pop(key) for key in
            ["Secteurs d'act list", "Gestionnaires", "import_source"]
        ]  # temporary fields
        [
            siae.pop(key) for key in [
                "Type de structure", "Département", "Région",
                "Périmètre d'intervention"
            ]
        ]
        [
            siae.pop(key) for key in [
                "Prénom de l'utilisateur principal",
                "Nom 2",
                "Prénom 2",
                "Email 2",
                "Nom 3",
                "Prénom 3",
                "Email 3",
            ] if key in siae
        ]
        [
            siae.pop(key) for key in [
                "Logo",
                "nombre de salariés",
                "nombre d'opérateurs",
                "Date de création",
                "Ouvert à la co-traitance ?",
                "Liste des labels",
                "Liste des réseaux",
            ]
        ]
        [siae.pop(key) for key in SECTOR_COLUMN_NAME_LIST if key in siae]
        [siae.pop(key) for key in PRESTA_TYPE_NAME_LIST if key in siae]

        # create object
        try:
            siae = Siae.objects.create(**siae)
            siae.sectors.set(siae_sectors)
            # print("ESAT ajoutée", siae.name)
        except Exception as e:
            print(e)
            print(siae)

        # avoid DDOSing APIs
        time.sleep(0.1)
Example #9
0
    def import_esat(self, esat):  # noqa C901
        # store raw dict
        esat["import_source"] = "esat_handeco"
        esat["import_raw_object"] = esat.copy()

        # defaults
        esat["kind"] = Siae.KIND_ESAT
        esat["source"] = Siae.SOURCE_ESAT
        esat["geo_range"] = Siae.GEO_RANGE_DEPARTMENT

        # basic fields
        rename_dict_key(esat, "title", "name")
        esat["name"].strip().replace("  ", " ")
        if "siret" in esat:
            esat["siret_is_valid"] = True

        # contact fields
        rename_dict_key(esat, "mail", "email")
        esat["contact_phone"] = esat["phone"]
        esat["contact_email"] = esat["email"]

        # geo fields
        rename_dict_key(esat, "zip", "post_code")
        if "post_code" in esat:
            esat["post_code"] = esat["post_code"].replace(
                " ", "")  # sometimes formated '12 345'
            esat["department"] = department_from_postcode(esat["post_code"])
        esat["city"] = esat["city"].strip()  # space at the beginning
        esat["region"] = esat["region"].strip()  # just to be sure
        # manually fix region="DOM"

        # enrich with geocoding
        geocoding_data = get_geocoding_data(esat["city"],
                                            post_code=esat["post_code"])
        if geocoding_data:
            if esat["post_code"] != geocoding_data["post_code"]:
                if esat["post_code"][:2] == geocoding_data["post_code"][:2]:
                    # update post_code as well
                    esat["city"] = geocoding_data["city"]  # avoid uppercase
                    esat["coords"] = geocoding_data["coords"]
                    esat["post_code"] = geocoding_data["post_code"]
                else:
                    print(
                        f"Geocoding found a different place,{esat['name']},{esat['post_code']},{geocoding_data['post_code']}"  # noqa
                    )
            else:
                esat["city"] = geocoding_data["city"]  # avoid uppercase
                esat["coords"] = geocoding_data["coords"]
        else:
            print(f"Geocoding not found,{esat['name']},{esat['post_code']}")

        # enrich with API Entreprise
        etablissement, error = etablissement_get_or_error(
            esat["siret"],
            reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion"
        )
        if etablissement:
            esat["nature"] = Siae.NATURE_HEAD_OFFICE if etablissement[
                "is_head_office"] else Siae.NATURE_ANTENNA
            # esat["is_active"] = False if not etablissement["is_closed"] else True
            esat["api_entreprise_employees"] = etablissement["employees"]
            if etablissement["date_constitution"]:
                esat["api_entreprise_date_constitution"] = timezone.make_aware(
                    etablissement["date_constitution"])
        # else:
        #     print(error)
        # TODO: if 404, siret_is_valid = False
        # exercice, error = exercice_get_or_error(esat["siret"], reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion")  # noqa
        # if exercice:
        #     esat["api_entreprise_ca"] = exercice["ca"]
        # # else:
        # #     print(error)

        # sectors
        esat_sectors = []
        for domaine in esat["secteurs"]:
            esat_sectors.extend(SECTORS_DICT.get(domaine["secteurnom"], []))

        # cleanup unused fields
        [
            esat.pop(key)
            for key in ["type", "effectif", "secteurs", "import_source"]
        ]

        # create object
        try:
            siae = Siae.objects.create(**esat)
            siae.sectors.set(esat_sectors)
            # print("ESAT ajoutée", siae.name)
        except Exception as e:
            print(e)
            print(esat)

        # avoid DDOSing APIs
        time.sleep(0.1)
    def import_esat(self, esat):  # noqa C901
        # store raw dict
        esat["import_source"] = "esat_gesat"
        esat["import_raw_object"] = esat.copy()

        # defaults
        esat["kind"] = Siae.KIND_ESAT
        esat["source"] = Siae.SOURCE_ESAT
        esat["geo_range"] = Siae.GEO_RANGE_DEPARTMENT

        # basic fields
        rename_dict_key(esat, "Raison Sociale", "name")
        esat["name"].strip()
        esat["name"] = esat["name"].replace("  ", " ")
        rename_dict_key(esat, "Siret", "siret")
        if "siret" in esat:
            esat["siret"] = esat["siret"].replace(" ", "")
            esat["siret_is_valid"] = True

        # contact fields
        rename_dict_key(esat, "Email", "email")
        rename_dict_key(esat, "Tel", "phone")
        esat["phone"].strip()
        esat["contact_email"] = esat["email"]
        esat["contact_phone"] = esat["phone"]

        # geo fields
        rename_dict_key(esat, "Adresse", "address")
        rename_dict_key(esat, "Code Postal", "post_code")
        if "post_code" in esat:
            esat["post_code"] = esat["post_code"].replace(" ", "")
            esat["department"] = department_from_postcode(esat["post_code"])
        rename_dict_key(esat, "Ville", "city")
        rename_dict_key(esat, "Région", "region")
        esat["region"].strip()
        # manually fix some regions

        # enrich with geocoding
        geocoding_data = get_geocoding_data(esat["address"] + " " +
                                            esat["city"],
                                            post_code=esat["post_code"])
        if geocoding_data:
            if esat["post_code"] != geocoding_data["post_code"]:
                if esat["post_code"][:2] == geocoding_data["post_code"][:2]:
                    # update post_code as well
                    esat["coords"] = geocoding_data["coords"]
                    esat["post_code"] = geocoding_data["post_code"]
                else:
                    print(
                        f"Geocoding found a different place,{esat['name']},{esat['post_code']},{geocoding_data['post_code']}"  # noqa
                    )
            else:
                esat["coords"] = geocoding_data["coords"]
        else:
            print(f"Geocoding not found,{esat['name']},{esat['post_code']}")

        # enrich with API Entreprise
        etablissement, error = etablissement_get_or_error(
            esat["siret"],
            reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion"
        )
        if etablissement:
            esat["nature"] = Siae.NATURE_HEAD_OFFICE if etablissement[
                "is_head_office"] else Siae.NATURE_ANTENNA
            # esat["is_active"] = False if not etablissement["is_closed"] else True
            esat["api_entreprise_employees"] = etablissement["employees"]
            if etablissement["date_constitution"]:
                esat["api_entreprise_date_constitution"] = timezone.make_aware(
                    etablissement["date_constitution"])
        # else:
        #     print(error)
        # TODO: if 404, siret_is_valid = False
        # exercice, error = exercice_get_or_error(esat["siret"], reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion")  # noqa
        # if exercice:
        #     esat["api_entreprise_ca"] = exercice["ca"]
        # # else:
        # #     print(error)

        # sectors
        esat_sectors = []
        for domaine in esat["Domaines list"]:
            esat_sectors.extend(SECTORS_DICT.get(domaine, []))

        # dates
        [esat.pop(key) for key in ["Date d'ouverture", "Denière date de MAJ"]]

        # cleanup unused fields
        [
            esat.pop(key) for key in
            ["Domaines list", "Pôles de compétences list", "import_source"]
        ]
        [
            esat.pop(key) for key in
            ["Lieudit/BP", "Capacité d'accueil (nombre de TH total)"]
        ]

        # create object
        try:
            siae = Siae.objects.create(**esat)
            siae.sectors.set(esat_sectors)
            # print("ESAT ajoutée", siae.name)
        except Exception as e:
            print(e)
            print(esat)

        # avoid DDOSing APIs
        time.sleep(0.1)