def save(self, *args, **kwargs): if self.first_name_uk: self.first_name_en = self.first_name_en or translitua( self.first_name_uk) else: self.first_name_en = "" if self.last_name_uk: self.last_name_en = self.last_name_en or translitua( self.last_name_uk) else: self.last_name_en = "" if self.patronymic_uk: self.patronymic_en = self.patronymic_en or translitua( self.patronymic_uk) else: self.patronymic_en = "" if self.also_known_as_uk: self.also_known_as_en = translitua(self.also_known_as_uk) else: self.also_known_as_en = "" if self.city_of_birth_uk and not self.city_of_birth_en: t = Ua2EnDictionary.objects.filter( term__iexact=lookup_term(self.city_of_birth_uk)).first() if t and t.translation: self.city_of_birth_en = t.translation super(Person, self).save(*args, **kwargs)
def move_names(apps, schema_editor): Person = apps.get_model("core", "Person") for p in Person.objects.all(): p.first_name_ua = p.first_name p.last_name_ua = p.last_name p.patronymic_ua = p.patronymic p.first_name_en = translitua(p.first_name) p.last_name_en = translitua(p.last_name) p.patronymic_en = translitua(p.patronymic) p.save()
def handle(self, *args, **options): if not os.path.exists(options["source"]): raise CommandError( "CSV file with parliament members photos doesn't exists") with open(options["source"], "r", encoding="utf-8") as fp: mps = {} r = DictReader(fp) for row in r: mps[row["name.text"].lower()] = { "link": row["name.href"], "img": row["image.src"] } for mp in MP2Convocation.objects.filter( convocation_id=options["convocation"]).select_related("mp"): if mp.mp.name.lower() not in mps: print("%s not found" % mp.mp.name) continue data = mps[mp.mp.name.lower()] mp.link = data["link"] mp.save() if not mp.mp.img: resp = requests.get(data["img"]) if resp.status_code != 200: print("Cannot download image %s for %s" % ( data["img"], mp.mp.name )) continue mp.mp.img.save( translitua(mp.mp.name) + ".jpg", ContentFile(resp.content)) mp.mp.save() else: print("Image for %s already exists" % mp.mp.name)
def handle(self, *args, **options): peklun = User.objects.get(username="******") wks = get_spreadsheet().sheet1 for i, l in enumerate(wks.get_all_records()): # reopen it time from time to avoid disconnect by timeout if i % 2000 == 0 and i: wks = get_spreadsheet().sheet1 self.stdout.write('Processing line #{}'.format(i)) company_ipn = l.get("ІПН", "") company_name = l.get("Назва", "") person_id = l.get("id персони", "") company_id = l.get("id компанії", "") photo_url = l.get("Фото", "") person = None # First let's search for appropriate company company = self.process_company(company_id, company_ipn, company_name) # No company — no go if company is None: continue # Let's backwrite company id to the spreadsheet for further use if company.pk != company_id: company_id = company.pk wks.update_cell(i + 2, len(l.keys()), company.pk) person_name = l.get("ПІБ", "").strip() position = l.get("Посада", "").strip() person_dob = unicode(l.get("Дата народження", "")).strip() person_from = parse_date(l.get("Дата призначення", "")) person_to = parse_date(l.get("Дата звільнення", "")) doc_received = parse_date(l.get("Дата відповіді", "")) docs = l.get("Лінк на відповідь", "").strip() website = l.get("лінк на сайт", "").strip() # Now let's search for the person if person_name: last_name, first_name, patronymic, _ = parse_fullname( person_name) if not last_name: continue # First we search by person_id (if it's present) if person_id: try: person = Person.objects.get(pk=person_id) except Person.DoesNotExist: pass # If nothing is found we search for name (for now) if not person: try: person = Person.objects.get( first_name_uk__iexact=first_name, last_name_uk__iexact=last_name, patronymic_uk__iexact=patronymic) except Person.MultipleObjectsReturned: self.stderr.write( "Double person {}!".format(person_name)) except Person.DoesNotExist: pass # If nothing is found, let's create a record for that person if not person: person = Person() self.stderr.write( "Created new person {}".format(person_name)) person.first_name_uk = first_name person.last_name_uk = last_name person.patronymic_uk = patronymic Ua2RuDictionary.objects.get_or_create(term=first_name) Ua2RuDictionary.objects.get_or_create(term=last_name) Ua2RuDictionary.objects.get_or_create(term=patronymic) person.first_name_en = translitua(first_name) person.last_name_en = translitua(last_name) person.patronymic_en = translitua(patronymic) person.is_pep = True person.imported = True person.type_of_official = 1 # Parsing date (can be a full date or just a year or # year/month) if person_dob: person.dob = parse_date(person_dob) if len(person_dob) == 4: person.dob_details = 2 # Only year if len(person_dob) > 4 and len(person_dob) < 7: person.dob_details = 1 # month and year # Let's download the photo (if any) if not person.photo and photo_url: photo_name, photo_san_name, photo_content = download( photo_url, translitua(person_name)) if photo_name: person.photo.save(photo_san_name, ContentFile(photo_content)) else: self.stdout.write("Cannot download image %s for %s" % (photo_url, person_name)) person.save() # Let's write the person id back to the table. if person.pk != person_id: person_id = person.pk wks.update_cell(i + 2, len(l.keys()) - 1, person.pk) # Now let's download all supporting docs docs_downloaded = [] first_doc_name = False # There might be many of them for doc in docs.split(", "): doc_instance = None # we cannot download folders from google docs, so let's # skip them if doc and "folderview" not in doc \ and "drive/#folders" not in doc: doc = expand_gdrive_download_url(doc) doc_hash = sha1(doc).hexdigest() # Check, if docs try: doc_instance = Document.objects.get(hash=doc_hash) except Document.DoesNotExist: self.stdout.write( 'Downloading file {}'.format(doc)) doc_name, doc_san_name, doc_content = download(doc) doc_san_name = translitua(doc_san_name) if doc_name: doc_instance = Document(name_uk=doc_name, uploader=peklun, hash=doc_hash) doc_instance.doc.save(doc_san_name, ContentFile(doc_content)) doc_instance.save() else: self.stdout.write( 'Cannot download file {}'.format(doc)) if doc_instance: first_doc_name = doc_instance.name_uk docs_downloaded.append(doc_instance.doc.url) # Now let's setup links between person and companies links = Person2Company.objects.filter( (Q(date_established=person_from) | Q(date_established=mangle_date(person_from)) | Q(date_established__isnull=True)), (Q(date_finished=person_to) | Q(date_finished=mangle_date(person_to)) | Q(date_finished__isnull=True)), from_person=person, to_company=company) # Delete if there are doubling links # including those cases when dates were imported incorrectly # because of parse_date if len(links) > 1: links.delete() link, _ = Person2Company.objects.update_or_create( from_person=person, to_company=company, date_established=person_from, date_established_details=0, date_finished=person_to, date_finished_details=0) if not link.relationship_type: link.relationship_type = position # And translate them Ua2EnDictionary.objects.get_or_create( term=lookup_term(position)) # oh, and add links to supporting docs all_docs = docs_downloaded + website.split(", ") if all_docs: link.proof = ", ".join(filter(None, all_docs)) if first_doc_name: link.proof_title = first_doc_name link.date_confirmed = doc_received link.is_employee = True link.save()
def load_peps(apps, schema_editor): User = apps.get_model("auth", "User") Company = apps.get_model("core", "Company") Person = apps.get_model("core", "Person") Person2Company = apps.get_model("core", "Person2Company") Document = apps.get_model("core", "Document") peklun = User.objects.get(username="******") with open("core/dicts/new_peps.csv", "r") as fp: r = DictReader(fp, errors="ignore") for i, l in enumerate(r): print(i) company_ipn = l.get("ІПН", "") company_name = l.get("Назва", "") company = None if not company_ipn and not company_name: continue # Search by IPN first (if it's present) if company_ipn: try: company = Company.objects.get(edrpou=company_ipn) except Company.DoesNotExist: pass # then search by name (if it's present) if company_name: if company is None: try: company = Company.objects.get(name=company_name) except Company.DoesNotExist: pass if company is None: company = Company(state_company=True) # Set missing params if not company.name: company.name = company_name if not company.edrpou: company.edrpou = company_ipn company.save() person_name = l.get("ПІБ", "").strip() position = l.get("Посада", "").strip() person_dob = l.get("Дата народження", "").strip() person_from = l.get("Дата призначення", "").strip() person_to = l.get("Дата звільнення", "").strip() doc_received = l.get("Дата відповіді", "").strip() doc = l.get("Лінк на відповідь", "").strip() website = l.get("лінк на сайт", "").strip() if person_name: chunks = person_name.split(" ") if len(chunks) == 2: last_name = title(chunks[0]) first_name = title(chunks[1]) else: last_name = title(" ".join(chunks[:-2])) first_name = title(chunks[-2]) patronymic = title(chunks[-1]) # Kind of get_or_create try: person = Person.objects.get(first_name__iexact=first_name, last_name__iexact=last_name, patronymic__iexact=patronymic) except Person.DoesNotExist: person = Person(first_name=first_name, last_name=last_name, patronymic=patronymic) person.is_pep = True person.type_of_official = 1 if person_dob: person.dob = parse_date(person_dob) if len(person_dob) == 4: person.dob_details = 2 # Only year if len(person_dob) > 4 and len(person_dob) < 7: person.dob_details = 1 # month and year person.save() doc_instance = None if doc and "folderview" not in doc \ and "drive/#folders" not in doc: print(doc) doc = expand_gdrive_download_url(doc) doc_hash = sha1(doc).hexdigest() try: doc_instance = Document.objects.get(hash=doc_hash) except Document.DoesNotExist: doc_name, doc_san_name, doc_content = download(doc) doc_san_name = translitua(doc_san_name) if doc_name: doc_instance = Document(name=doc_name, uploader=peklun, hash=doc_hash) doc_instance.doc.save(doc_san_name, ContentFile(doc_content)) doc_instance.save() link, link_created = Person2Company.objects.update_or_create( from_person=person, to_company=company, date_established=parse_date(person_from), date_finished=parse_date(person_to)) if not link.relationship_type: link.relationship_type = position if doc_instance is not None: link.proof_title = doc_instance.name link.proof = doc_instance.doc.url link.date_confirmed = parse_date(doc_received) if not doc and website: link.proof = website link.save()