Beispiel #1
0
    def process_company(self, company_id, company_ipn, company_name):
        if not company_ipn and not company_name:
            return None

        if len(company_name) > 250:
            self.stderr.write(
                'Company name {} is too long'.format(company_name))
            return None

        company = None

        for k, v in [("pk", company_id), ("edrpou", company_ipn),
                     ("name_uk", company_name)]:
            try:
                if v:
                    company = Company.objects.get(**{k: v})
                    break
            except Company.DoesNotExist:
                pass

        if company is None:
            company = Company(state_company=True)

        # Set missing params
        if not company.name_uk:
            company.name_uk = company_name

        Ua2EnDictionary.objects.get_or_create(term=lookup_term(company_name))

        if not company.edrpou:
            company.edrpou = company_ipn

        company.save()
        return company
Beispiel #2
0
    def save(self, *args, **kwargs):
        if self.first_name_uk:
            self.first_name_en = self.first_name_en or translitua(
                self.first_name_uk)
        else:
            self.first_name_en = ""

        if self.last_name_uk:
            self.last_name_en = self.last_name_en or translitua(
                self.last_name_uk)
        else:
            self.last_name_en = ""

        if self.patronymic_uk:
            self.patronymic_en = self.patronymic_en or translitua(
                self.patronymic_uk)
        else:
            self.patronymic_en = ""

        if self.also_known_as_uk:
            self.also_known_as_en = translitua(self.also_known_as_uk)
        else:
            self.also_known_as_en = ""

        if self.city_of_birth_uk and not self.city_of_birth_en:
            t = Ua2EnDictionary.objects.filter(
                term__iexact=lookup_term(self.city_of_birth_uk)).first()

            if t and t.translation:
                self.city_of_birth_en = t.translation

        super(Person, self).save(*args, **kwargs)
    def handle(self, *args, **options):
        en_translations = {}

        for t in Ua2EnDictionary.objects.exclude(translation="").nocache():
            en_translations[lookup_term(t.term)] = t.translation

        for p in Declaration.objects.filter(
                confirmed="a").defer("source").all().nocache():
            if lookup_term(p.region_uk) in en_translations:
                p.region_en = en_translations[lookup_term(p.region_uk)]

            if lookup_term(p.position_uk) in en_translations:
                p.position_en = en_translations[lookup_term(p.position_uk)]

            if lookup_term(p.office_uk) in en_translations:
                p.office_en = en_translations[lookup_term(p.office_uk)]

            p.save()

        for c in Company.objects.filter(
                Q(name_en="") | Q(short_name_en="") | Q(name_en__isnull=True)
                | Q(short_name_en__isnull=True)).nocache():
            c.save()  # This will invoke translation on the save method

        for p2c in Person2Company.objects.filter(
                Q(relationship_type_en="")
                | Q(relationship_type_en__isnull=True)).nocache():
            p2c.save()  # This will invoke translation on the save method

        for p in Person.objects \
                .exclude(Q(city_of_birth_uk="") | Q(city_of_birth_uk__isnull=True)) \
                .filter(Q(city_of_birth_en="") | Q(city_of_birth_en__isnull=True)).nocache():
            p.save()  # This will invoke translation on the save method
Beispiel #4
0
    def save(self, *args, **kwargs):
        if not self.relationship_type_en:
            t = Ua2EnDictionary.objects.filter(
                term__iexact=lookup_term(self.relationship_type_uk)).first()

            if t and t.translation:
                self.relationship_type_en = t.translation

        super(Person2Company, self).save(*args, **kwargs)
Beispiel #5
0
    def save(self, *args, **kwargs):
        if not self.name_en:
            t = Ua2EnDictionary.objects.filter(
                term__iexact=lookup_term(self.name_uk)).first()

            if t and t.translation:
                self.name_en = t.translation

        if not self.short_name_en:
            t = Ua2EnDictionary.objects.filter(
                term__iexact=lookup_term(self.short_name_uk)).first()

            if t and t.translation:
                self.short_name_en = t.translation

        edrpou = self.edrpou or ""
        if " " in edrpou and edrpou.strip() and ":" not in edrpou:
            self.edrpou = self.edrpou.replace(" ", "")

        super(Company, self).save(*args, **kwargs)
Beispiel #6
0
def fix_terms(apps, schema_editor):
    Ua2EnDictionary = apps.get_model("core", "Ua2EnDictionary")

    for t in Ua2EnDictionary.objects.all():
        t.term = lookup_term(t.term)

        if t.translation:
            duplicates = Ua2EnDictionary.objects.filter(term=t.term).exclude(
                pk=t.pk)
            if duplicates:
                print(t.translation + "::")
                print(";".join(duplicates.values_list("translation",
                                                      flat=True)))
                print("=" * 30)
                duplicates.delete()

        try:
            t.save()
        except utils.IntegrityError:
            print(t.term, t.translation)
            raise
    def handle(self, *args, **options):
        positions = {}
        with open("core/dicts/unified_positions.csv", "r") as fp:
            r = reader(fp)

            for l in r:
                if len(l) != 2:
                    self.stderr.write(
                        "CSV file doesn't look sane, check this out: {}".
                        format(", ".join(l)))
                    return

                positions[l[0].lower().strip()] = l[1].strip()

        for p2c in Person2Company.objects.annotate(
                rt_lower=Lower("relationship_type_uk")).filter(
                    rt_lower__in=positions.keys()).nocache():

            p2c.relationship_type_uk = positions[
                p2c.relationship_type_uk.lower()]

            term = lookup_term(p2c.relationship_type_uk)
            t = Ua2EnDictionary.objects.filter(term__iexact=term).first()

            if t and t.translation:
                self.relationship_type_en = t.translation
            else:
                self.stderr.write(
                    "Cannot translate {} into english, leaving translation {}".
                    format(p2c.relationship_type_uk, p2c.relationship_type_en))

                try:
                    Ua2EnDictionary.objects.create(term=term)
                except IntegrityError:
                    # No need to turn alarm on, if the value is already in db
                    pass

            p2c.save()
Beispiel #8
0
    def handle(self, *args, **options):
        peklun = User.objects.get(username="******")

        wks = get_spreadsheet().sheet1

        for i, l in enumerate(wks.get_all_records()):
            # reopen it time from time to avoid disconnect by timeout
            if i % 2000 == 0 and i:
                wks = get_spreadsheet().sheet1

            self.stdout.write('Processing line #{}'.format(i))

            company_ipn = l.get("ІПН", "")
            company_name = l.get("Назва", "")
            person_id = l.get("id персони", "")
            company_id = l.get("id компанії", "")
            photo_url = l.get("Фото", "")

            person = None
            # First let's search for appropriate company
            company = self.process_company(company_id, company_ipn,
                                           company_name)

            # No company — no go
            if company is None:
                continue

            # Let's backwrite company id to the spreadsheet for further use
            if company.pk != company_id:
                company_id = company.pk
                wks.update_cell(i + 2, len(l.keys()), company.pk)

            person_name = l.get("ПІБ", "").strip()
            position = l.get("Посада", "").strip()
            person_dob = unicode(l.get("Дата народження", "")).strip()
            person_from = parse_date(l.get("Дата призначення", ""))
            person_to = parse_date(l.get("Дата звільнення", ""))

            doc_received = parse_date(l.get("Дата відповіді", ""))
            docs = l.get("Лінк на відповідь", "").strip()
            website = l.get("лінк на сайт", "").strip()

            # Now let's search for the person
            if person_name:
                last_name, first_name, patronymic, _ = parse_fullname(
                    person_name)

                if not last_name:
                    continue

                # First we search by person_id (if it's present)
                if person_id:
                    try:
                        person = Person.objects.get(pk=person_id)
                    except Person.DoesNotExist:
                        pass

                # If nothing is found we search for name (for now)
                if not person:
                    try:
                        person = Person.objects.get(
                            first_name_uk__iexact=first_name,
                            last_name_uk__iexact=last_name,
                            patronymic_uk__iexact=patronymic)
                    except Person.MultipleObjectsReturned:
                        self.stderr.write(
                            "Double person {}!".format(person_name))
                    except Person.DoesNotExist:
                        pass

                # If nothing is found, let's create a record for that person
                if not person:
                    person = Person()
                    self.stderr.write(
                        "Created new person {}".format(person_name))

                person.first_name_uk = first_name
                person.last_name_uk = last_name
                person.patronymic_uk = patronymic

                Ua2RuDictionary.objects.get_or_create(term=first_name)
                Ua2RuDictionary.objects.get_or_create(term=last_name)
                Ua2RuDictionary.objects.get_or_create(term=patronymic)

                person.first_name_en = translitua(first_name)
                person.last_name_en = translitua(last_name)
                person.patronymic_en = translitua(patronymic)

                person.is_pep = True
                person.imported = True
                person.type_of_official = 1

                # Parsing date (can be a full date or just a year or
                # year/month)
                if person_dob:
                    person.dob = parse_date(person_dob)
                    if len(person_dob) == 4:
                        person.dob_details = 2  # Only year

                    if len(person_dob) > 4 and len(person_dob) < 7:
                        person.dob_details = 1  # month and year

                # Let's download the photo (if any)
                if not person.photo and photo_url:
                    photo_name, photo_san_name, photo_content = download(
                        photo_url, translitua(person_name))

                    if photo_name:
                        person.photo.save(photo_san_name,
                                          ContentFile(photo_content))
                    else:
                        self.stdout.write("Cannot download image %s for %s" %
                                          (photo_url, person_name))

                person.save()

                # Let's write the person id back to the table.
                if person.pk != person_id:
                    person_id = person.pk
                    wks.update_cell(i + 2, len(l.keys()) - 1, person.pk)

                # Now let's download all supporting docs
                docs_downloaded = []
                first_doc_name = False

                # There might be many of them
                for doc in docs.split(", "):
                    doc_instance = None

                    # we cannot download folders from google docs, so let's
                    # skip them

                    if doc and "folderview" not in doc \
                            and "drive/#folders" not in doc:
                        doc = expand_gdrive_download_url(doc)
                        doc_hash = sha1(doc).hexdigest()

                        # Check, if docs
                        try:
                            doc_instance = Document.objects.get(hash=doc_hash)
                        except Document.DoesNotExist:
                            self.stdout.write(
                                'Downloading file {}'.format(doc))
                            doc_name, doc_san_name, doc_content = download(doc)
                            doc_san_name = translitua(doc_san_name)

                            if doc_name:
                                doc_instance = Document(name_uk=doc_name,
                                                        uploader=peklun,
                                                        hash=doc_hash)

                                doc_instance.doc.save(doc_san_name,
                                                      ContentFile(doc_content))
                                doc_instance.save()
                            else:
                                self.stdout.write(
                                    'Cannot download file {}'.format(doc))

                        if doc_instance:
                            first_doc_name = doc_instance.name_uk
                            docs_downloaded.append(doc_instance.doc.url)

                # Now let's setup links between person and companies
                links = Person2Company.objects.filter(
                    (Q(date_established=person_from)
                     | Q(date_established=mangle_date(person_from))
                     | Q(date_established__isnull=True)),
                    (Q(date_finished=person_to)
                     | Q(date_finished=mangle_date(person_to))
                     | Q(date_finished__isnull=True)),
                    from_person=person,
                    to_company=company)

                # Delete if there are doubling links
                # including those cases when dates were imported incorrectly
                # because of parse_date
                if len(links) > 1:
                    links.delete()

                link, _ = Person2Company.objects.update_or_create(
                    from_person=person,
                    to_company=company,
                    date_established=person_from,
                    date_established_details=0,
                    date_finished=person_to,
                    date_finished_details=0)

                if not link.relationship_type:
                    link.relationship_type = position

                # And translate them
                Ua2EnDictionary.objects.get_or_create(
                    term=lookup_term(position))

                # oh, and add links to supporting docs
                all_docs = docs_downloaded + website.split(", ")
                if all_docs:
                    link.proof = ", ".join(filter(None, all_docs))

                    if first_doc_name:
                        link.proof_title = first_doc_name

                link.date_confirmed = doc_received
                link.is_employee = True

                link.save()
    def handle(self, *args, **options):
        en_translations = {}

        for t in Ua2EnDictionary.objects.all().nocache():
            en_translations[lookup_term(t.term)] = filter(None, [
                t.translation, t.alt_translation
            ])

        not_translated_regions = set()
        not_translated_positions = set()
        not_translated_offices = set()
        not_translated_cities = set()
        not_translated_proofs = set()

        for p in Declaration.objects.filter(confirmed="a").defer("source").all().nocache():
            if (lookup_term(p.region_uk) not in en_translations):
                not_translated_regions.add(lookup_term(p.region_uk))

            if (lookup_term(p.position_uk) not in en_translations):
                not_translated_positions.add(lookup_term(p.position_uk))

            if (lookup_term(p.office_uk) not in en_translations):
                not_translated_offices.add(lookup_term(p.office_uk))

        for p in Person.objects.all().nocache():
            if not p.city_of_birth_en and lookup_term(p.city_of_birth_en) not in en_translations:
                not_translated_cities.add(lookup_term(p.city_of_birth_uk))

        for c in Company.objects.all().nocache():
            if not c.name_en and lookup_term(c.name_uk) not in en_translations:
                not_translated_offices.add(lookup_term(c.name_uk))

            if not c.short_name_en and lookup_term(c.short_name_uk) not in en_translations:
                not_translated_offices.add(lookup_term(c.short_name_uk))

        for p2c in Person2Company.objects.all().nocache():
            if (not p2c.relationship_type_en and
                    lookup_term(p2c.relationship_type_uk) not in en_translations):
                not_translated_positions.add(lookup_term(p2c.relationship_type_uk))

        for r in RelationshipProof.objects.all().nocache():
            if (not r.proof_title_en and
                    # "pdf" not in r.proof_title_uk.lower() and
                    # "jpg" not in r.proof_title_uk.lower() and
                    lookup_term(r.proof_title_uk) not in en_translations):
                not_translated_proofs.add(lookup_term(r.proof_title_uk))

        x = (
            not_translated_regions | not_translated_positions |
            not_translated_offices | not_translated_cities |
            not_translated_proofs
        )

        for term in x:
            if not term:
                continue

            try:
                Ua2EnDictionary.objects.create(
                    term=term
                )
            except IntegrityError:
                # No need to turn alarm on, if the value is already in db
                pass