def c4_create_siae(self, c1_siae, dry_run): """ Here we create a new Siae with C1 data """ self.stdout.write("Creating Siae...") # clean fields rename_dict_key(c1_siae, "id", "c1_id") # init fields c1_siae["description"] = "" c1_siae["contact_website"] = c1_siae["website"] c1_siae["contact_email"] = c1_siae["admin_email"] or c1_siae["email"] c1_siae["contact_phone"] = c1_siae["phone"] # TODO: call API Entreprise # other fields c1_siae["is_delisted"] = False # create object if not dry_run: siae = Siae.objects.create(**c1_siae) self.add_siae_to_contact_list(siae) self.stdout.write( f"New Siae created / {siae.id} / {siae.name} / {siae.siret}")
def migrate_siae_label(self, cur): """ Migrate SiaeLabel data """ print("-" * 80) print("Migrating SiaeLabel...") SiaeLabel.objects.all().delete() cur.execute("SELECT * FROM directory_label") resp = cur.fetchall() for elem in resp: # rename fields rename_dict_key(elem, "directory_id", "siae_id") # cleanup fields cleanup_date_field_names(elem) make_aware_dates(elem) # remove useless keys [elem.pop(key) for key in ["id"]] # create object SiaeLabel.objects.create(**elem) print(f"Created {SiaeLabel.objects.count()} labels !")
def migrate_siae_client_reference_logo(self, cur): """ Migrate SiaeClientReference data """ print("-" * 80) print("Migrating SiaeClientReference...") SiaeClientReference.objects.all().delete() cur.execute("SELECT * FROM directory_client_image") resp = cur.fetchall() for elem in resp: # cleanup dates cleanup_date_field_names(elem) make_aware_dates(elem) # rename fields rename_dict_key(elem, "name", "image_name") rename_dict_key(elem, "description", "name") rename_dict_key(elem, "position", "order") rename_dict_key(elem, "directory_id", "siae_id") # remove useless keys [elem.pop(key) for key in ["id"]] # create object SiaeClientReference.objects.create(**elem) print( f"Created {SiaeClientReference.objects.count()} client references !" )
def extract_duplicates(esat_list): esat_siret_list = list() for esat in esat_list: rename_dict_key(esat, "Raison Sociale", "name") rename_dict_key(esat, "Siret", "siret") if "siret" in esat: esat["siret"] = esat["siret"].replace(" ", "") esat_siret_index = next((index for (index, s) in enumerate(esat_siret_list) if s["siret"] == esat["siret"]), None) if esat_siret_index is None: esat_siret_list.append({ "name": esat["name"], "siret": esat["siret"] }) else: print("===========") print("Current", esat["name"], esat["siret"]) print("Duplicate", esat_siret_list[esat_siret_index]["name"], esat_siret_list[esat_siret_index]["siret"])
def migrate_user(self, cur): """ Migrate User data """ print("-" * 80) print("Migrating User...") User.objects.filter(api_key__isnull=True).delete() reset_app_sql_sequences("users") cur.execute("SELECT * FROM user") resp = cur.fetchall() for elem in resp: # rename fields rename_dict_key(elem, "enabled", "is_active") rename_dict_key(elem, "id", "c4_id") rename_dict_key(elem, "phone_prefix", "c4_phone_prefix") rename_dict_key(elem, "time_zone", "c4_time_zone") rename_dict_key(elem, "website", "c4_website") rename_dict_key(elem, "siret", "c4_siret") rename_dict_key(elem, "naf", "c4_naf") rename_dict_key(elem, "phone_verified", "c4_phone_verified") rename_dict_key(elem, "email_verified", "c4_email_verified") rename_dict_key(elem, "id_card_verified", "c4_id_card_verified") # rename_dict_key(elem, "accept_survey", "c4_accept_survey") # rename_dict_key(elem, "accept_rgpd", "c4_accept_rgpd") rename_dict_key(elem, "offers_for_pro_sector", "accept_offers_for_pro_sector") rename_dict_key(elem, "quote_promise", "accept_quote_promise") # cleanup fields cleanup_date_field_names(elem) make_aware_dates(elem) integer_to_boolean(elem) # cleanup person_type if "person_type" in elem: elem["kind"] = map_user_kind(elem["person_type"]) # set staff users if "roles" in elem: if elem["roles"].startswith("a:1:{i:0;s:10"): elem["is_staff"] = True if elem["roles"].startswith("a:1:{i:0;s:16"): elem["is_superuser"] = True # remove useless keys [elem.pop(key) for key in USER_EXTRA_KEYS] # create object # Note: we ignore users with kind=None if elem["kind"]: try: User.objects.create(**elem) except Exception as e: print("a", e) print(f"Created {User.objects.count()} users !")
def migrate_siae_image(self, cur): # noqa C901 """ Migrate SiaeImage data - first get list from 'listing' - enrich with 'listing_translation' - finally get all the images from 'listing_image' User -- Listing(s) -- Image(s) """ print("-" * 80) print("Migrating SiaeImage...") SiaeImage.objects.all().delete() siae_listing_list = list() cur.execute("SELECT * FROM listing") resp = cur.fetchall() print(f"Found {len(resp)} Siae listings...") for elem in resp: # cleanup dates cleanup_date_field_names(elem) make_aware_dates(elem) # rename fields rename_dict_key(elem, "id", "listing_id") # remove useless keys elem_thin = { key: elem[key] for key in ["listing_id", "user_id", "created_at", "updated_at"] } elem_thin["images"] = list() siae_listing_list.append(elem_thin) cur.execute("SELECT * FROM listing_translation") resp = cur.fetchall() for elem in resp: # rename fields rename_dict_key(elem, "title", "name") # remove useless keys elem_thin = { key: elem[key] for key in ["translatable_id", "name", "description"] } # "rules" # find corresponding siae_listing item, and enrich it siae_listing_index = next( (index for (index, si) in enumerate(siae_listing_list) if si["listing_id"] == elem_thin["translatable_id"]), None, ) if siae_listing_index: siae_listing_list[siae_listing_index] |= elem_thin cur.execute("SELECT * FROM listing_image") resp = cur.fetchall() print(f"Found {len(resp)} Siae images...") for elem in resp: # rename fields rename_dict_key(elem, "name", "image_name") # remove useless keys elem_thin = { key: elem[key] for key in ["listing_id", "image_name", "position"] } # find corresponding siae_listing item, and enrich it siae_listing_index = next( (index for (index, si) in enumerate(siae_listing_list) if si["listing_id"] == elem_thin["listing_id"]), None, ) if siae_listing_index: siae_listing_list[siae_listing_index]["images"].append( elem_thin) error_count = { "listing_without_image": 0, "user_not_found": 0, "user_no_siae": 0, "user_multiple_siae": 0 } for siae_listing in siae_listing_list: if not len(siae_listing["images"]): # print("images missing", siae_listing) error_count["listing_without_image"] += 1 else: for (index, siae_image) in enumerate(siae_listing["images"]): siae_image_dict = siae_listing.copy() | siae_image # rename fields rename_dict_key(siae_image_dict, "listing_id", "c4_listing_id") rename_dict_key(siae_image_dict, "position", "order") users = User.objects.prefetch_related("siaes").filter( c4_id=siae_image_dict["user_id"]) if users.count() == 0: # print("missing user...", siae_image_dict) error_count["user_not_found"] += 1 else: if users.first().siaes.count() > 1: # print("which siae?", siae_image_dict) error_count["user_multiple_siae"] += 1 elif users.first().siaes.count() == 0: # print("no siae...", siae_image_dict) error_count["user_no_siae"] += 1 else: # count == 1 # get siae_id siae = users.first().siaes.first() siae_image_dict["siae_id"] = siae.id # we want to group the images by their listing (by updating their order) # listing_count = Siae # remove useless keys [ siae_image_dict.pop(key) for key in ["translatable_id", "user_id", "images"] ] # create object SiaeImage.objects.create(**siae_image_dict) print(f"Created {SiaeImage.objects.count()} siae images !") print("Errors", error_count)
def migrate_siae(self, cur): """ Migrate Siae data """ print("-" * 80) print("Migrating Siae...") Siae.objects.all().delete() cur.execute("SELECT * FROM directory ORDER BY is_active DESC") resp = cur.fetchall() # print(len(resp)) # s = set([elem["is_qpv"] for elem in resp]) # print(s) # elem = cur.fetchone() # print(elem) for elem in resp: # rename fields rename_dict_key(elem, "geo_range", "geo_range_custom_distance") rename_dict_key(elem, "pol_range", "geo_range") rename_dict_key(elem, "c4_id", "c4_id_old") rename_dict_key(elem, "c1_source", "source") # changed after the migration # cleanup fields cleanup_date_field_names(elem) make_aware_dates(elem) integer_to_boolean(elem) # cleanup nature if "nature" in elem: elem["nature"] = map_siae_nature(elem["nature"]) # cleanup presta_type if "presta_type" in elem: elem["presta_type"] = map_siae_presta_type(elem["presta_type"]) # cleanup geo_range if "geo_range" in elem: elem["geo_range"] = map_geo_range(elem["geo_range"]) # create coords from latitude & longitude if "latitude" in elem and "longitude" in elem: if elem["latitude"] and elem["longitude"]: coords = { "type": "Point", "coordinates": [float(elem["longitude"]), float(elem["latitude"])] } elem["coords"] = GEOSGeometry( f"{coords}") # Feed `GEOSGeometry` with GeoJSON. # remove useless keys [elem.pop(key) for key in DIRECTORY_EXTRA_KEYS] # create object try: Siae.objects.create(**elem) except Exception as e: print(e) print(f"Created {Siae.objects.count()} siaes !")
def import_sep(self, siae, source="sep"): # noqa C901 # store raw dict siae["import_source"] = source siae["import_raw_object"] = siae.copy() # defaults siae["kind"] = Siae.KIND_SEP siae["source"] = Siae.KIND_SEP siae["geo_range"] = Siae.GEO_RANGE_DEPARTMENT # basic fields rename_dict_key(siae, "Raison sociale", "name") siae["name"].strip() rename_dict_key(siae, "Enseigne", "brand") rename_dict_key(siae, "Siret", "siret") if "siret" in siae: siae["siret"].strip() siae["siret"] = siae["siret"].replace(" ", "").replace(" ", "") if validate_siret(siae["siret"]): siae["siret_is_valid"] = True # presta_type siae["presta_type"] = list() for presta_type_name in PRESTA_TYPE_NAME_LIST: if presta_type_name in siae: if siae[presta_type_name]: siae["presta_type"].append( PRESTA_TYPE_MAPPING[siae[presta_type_name]]) # contact fields rename_dict_key(siae, "Prénom 1", "contact_first_name") rename_dict_key(siae, "Nom 1", "contact_last_name") rename_dict_key(siae, "Site internet", "website") siae["contact_website"] = siae["website"] rename_dict_key(siae, "Email 1", "email") siae["contact_email"] = siae["email"] rename_dict_key(siae, "Téléphone", "phone") siae["phone"].strip() siae["contact_phone"] = siae["phone"] # geo fields rename_dict_key(siae, "Adresse", "address") rename_dict_key(siae, "Code Postal", "post_code") if "post_code" in siae: siae["department"] = department_from_postcode(siae["post_code"]) siae["region"] = DEPARTMENT_TO_REGION[siae["department"]] rename_dict_key(siae, "Ville", "city") # enrich with geocoding geocoding_data = get_geocoding_data(siae["address"] + " " + siae["city"], post_code=siae["post_code"]) if geocoding_data: if siae["post_code"] != geocoding_data["post_code"]: if siae["post_code"][:2] == geocoding_data["post_code"][:2]: # update post_code as well siae["coords"] = geocoding_data["coords"] siae["post_code"] = geocoding_data["post_code"] else: print( f"Geocoding found a different place,{siae['name']},{siae['post_code']},{geocoding_data['post_code']}" # noqa ) else: siae["coords"] = geocoding_data["coords"] else: print(f"Geocoding not found,{siae['name']},{siae['post_code']}") # enrich with API Entreprise, API QPV, API ZRR? # done in weekly CRON job # sectors siae_sectors = [] for sector_name in siae["Secteurs d'act list"]: sector = Sector.objects.get(name=sector_name) siae_sectors.append(sector) # cleanup unused fields [ siae.pop(key) for key in ["Secteurs d'act list", "Gestionnaires", "import_source"] ] # temporary fields [ siae.pop(key) for key in [ "Type de structure", "Département", "Région", "Périmètre d'intervention" ] ] [ siae.pop(key) for key in [ "Prénom de l'utilisateur principal", "Nom 2", "Prénom 2", "Email 2", "Nom 3", "Prénom 3", "Email 3", ] if key in siae ] [ siae.pop(key) for key in [ "Logo", "nombre de salariés", "nombre d'opérateurs", "Date de création", "Ouvert à la co-traitance ?", "Liste des labels", "Liste des réseaux", ] ] [siae.pop(key) for key in SECTOR_COLUMN_NAME_LIST if key in siae] [siae.pop(key) for key in PRESTA_TYPE_NAME_LIST if key in siae] # create object try: siae = Siae.objects.create(**siae) siae.sectors.set(siae_sectors) # print("ESAT ajoutée", siae.name) except Exception as e: print(e) print(siae) # avoid DDOSing APIs time.sleep(0.1)
def import_esat(self, esat): # noqa C901 # store raw dict esat["import_source"] = "esat_handeco" esat["import_raw_object"] = esat.copy() # defaults esat["kind"] = Siae.KIND_ESAT esat["source"] = Siae.SOURCE_ESAT esat["geo_range"] = Siae.GEO_RANGE_DEPARTMENT # basic fields rename_dict_key(esat, "title", "name") esat["name"].strip().replace(" ", " ") if "siret" in esat: esat["siret_is_valid"] = True # contact fields rename_dict_key(esat, "mail", "email") esat["contact_phone"] = esat["phone"] esat["contact_email"] = esat["email"] # geo fields rename_dict_key(esat, "zip", "post_code") if "post_code" in esat: esat["post_code"] = esat["post_code"].replace( " ", "") # sometimes formated '12 345' esat["department"] = department_from_postcode(esat["post_code"]) esat["city"] = esat["city"].strip() # space at the beginning esat["region"] = esat["region"].strip() # just to be sure # manually fix region="DOM" # enrich with geocoding geocoding_data = get_geocoding_data(esat["city"], post_code=esat["post_code"]) if geocoding_data: if esat["post_code"] != geocoding_data["post_code"]: if esat["post_code"][:2] == geocoding_data["post_code"][:2]: # update post_code as well esat["city"] = geocoding_data["city"] # avoid uppercase esat["coords"] = geocoding_data["coords"] esat["post_code"] = geocoding_data["post_code"] else: print( f"Geocoding found a different place,{esat['name']},{esat['post_code']},{geocoding_data['post_code']}" # noqa ) else: esat["city"] = geocoding_data["city"] # avoid uppercase esat["coords"] = geocoding_data["coords"] else: print(f"Geocoding not found,{esat['name']},{esat['post_code']}") # enrich with API Entreprise etablissement, error = etablissement_get_or_error( esat["siret"], reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion" ) if etablissement: esat["nature"] = Siae.NATURE_HEAD_OFFICE if etablissement[ "is_head_office"] else Siae.NATURE_ANTENNA # esat["is_active"] = False if not etablissement["is_closed"] else True esat["api_entreprise_employees"] = etablissement["employees"] if etablissement["date_constitution"]: esat["api_entreprise_date_constitution"] = timezone.make_aware( etablissement["date_constitution"]) # else: # print(error) # TODO: if 404, siret_is_valid = False # exercice, error = exercice_get_or_error(esat["siret"], reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion") # noqa # if exercice: # esat["api_entreprise_ca"] = exercice["ca"] # # else: # # print(error) # sectors esat_sectors = [] for domaine in esat["secteurs"]: esat_sectors.extend(SECTORS_DICT.get(domaine["secteurnom"], [])) # cleanup unused fields [ esat.pop(key) for key in ["type", "effectif", "secteurs", "import_source"] ] # create object try: siae = Siae.objects.create(**esat) siae.sectors.set(esat_sectors) # print("ESAT ajoutée", siae.name) except Exception as e: print(e) print(esat) # avoid DDOSing APIs time.sleep(0.1)
def import_esat(self, esat): # noqa C901 # store raw dict esat["import_source"] = "esat_gesat" esat["import_raw_object"] = esat.copy() # defaults esat["kind"] = Siae.KIND_ESAT esat["source"] = Siae.SOURCE_ESAT esat["geo_range"] = Siae.GEO_RANGE_DEPARTMENT # basic fields rename_dict_key(esat, "Raison Sociale", "name") esat["name"].strip() esat["name"] = esat["name"].replace(" ", " ") rename_dict_key(esat, "Siret", "siret") if "siret" in esat: esat["siret"] = esat["siret"].replace(" ", "") esat["siret_is_valid"] = True # contact fields rename_dict_key(esat, "Email", "email") rename_dict_key(esat, "Tel", "phone") esat["phone"].strip() esat["contact_email"] = esat["email"] esat["contact_phone"] = esat["phone"] # geo fields rename_dict_key(esat, "Adresse", "address") rename_dict_key(esat, "Code Postal", "post_code") if "post_code" in esat: esat["post_code"] = esat["post_code"].replace(" ", "") esat["department"] = department_from_postcode(esat["post_code"]) rename_dict_key(esat, "Ville", "city") rename_dict_key(esat, "Région", "region") esat["region"].strip() # manually fix some regions # enrich with geocoding geocoding_data = get_geocoding_data(esat["address"] + " " + esat["city"], post_code=esat["post_code"]) if geocoding_data: if esat["post_code"] != geocoding_data["post_code"]: if esat["post_code"][:2] == geocoding_data["post_code"][:2]: # update post_code as well esat["coords"] = geocoding_data["coords"] esat["post_code"] = geocoding_data["post_code"] else: print( f"Geocoding found a different place,{esat['name']},{esat['post_code']},{geocoding_data['post_code']}" # noqa ) else: esat["coords"] = geocoding_data["coords"] else: print(f"Geocoding not found,{esat['name']},{esat['post_code']}") # enrich with API Entreprise etablissement, error = etablissement_get_or_error( esat["siret"], reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion" ) if etablissement: esat["nature"] = Siae.NATURE_HEAD_OFFICE if etablissement[ "is_head_office"] else Siae.NATURE_ANTENNA # esat["is_active"] = False if not etablissement["is_closed"] else True esat["api_entreprise_employees"] = etablissement["employees"] if etablissement["date_constitution"]: esat["api_entreprise_date_constitution"] = timezone.make_aware( etablissement["date_constitution"]) # else: # print(error) # TODO: if 404, siret_is_valid = False # exercice, error = exercice_get_or_error(esat["siret"], reason="Mise à jour donnéés Marché de la plateforme de l'Inclusion") # noqa # if exercice: # esat["api_entreprise_ca"] = exercice["ca"] # # else: # # print(error) # sectors esat_sectors = [] for domaine in esat["Domaines list"]: esat_sectors.extend(SECTORS_DICT.get(domaine, [])) # dates [esat.pop(key) for key in ["Date d'ouverture", "Denière date de MAJ"]] # cleanup unused fields [ esat.pop(key) for key in ["Domaines list", "Pôles de compétences list", "import_source"] ] [ esat.pop(key) for key in ["Lieudit/BP", "Capacité d'accueil (nombre de TH total)"] ] # create object try: siae = Siae.objects.create(**esat) siae.sectors.set(esat_sectors) # print("ESAT ajoutée", siae.name) except Exception as e: print(e) print(esat) # avoid DDOSing APIs time.sleep(0.1)