Example #1
0
    def to_dict(self):
        """
        Convert Minion model to an indexable presentation for ES.
        """
        all_persons = set()
        names_autocomplete = set()

        d = model_to_dict(self, fields=["paid"])

        d["mp"] = self.mp2convocation.to_dict()

        all_persons |= parse_and_generate(self.mp2convocation.mp.name,
                                          "Депутат")
        names_autocomplete |= autocomplete_suggestions(
            self.mp2convocation.mp.name)

        all_persons |= parse_and_generate(self.minion.name, "Помічник")
        names_autocomplete |= autocomplete_suggestions(self.minion.name)

        d["_id"] = self.id
        d["id"] = self.minion.id
        d["name"] = self.minion.name
        d["companies"] = [self.mp2convocation.party]
        d["persons"] = list(filter(None, all_persons))
        d["names_autocomplete"] = list(names_autocomplete)

        return d
Example #2
0
    def to_dict(self):
        dt = self.data
        res = {"_id": self.pk}

        names_autocomplete = set()
        addresses = set()
        raw_records = set([
            dt["obj"], dt["land_plot_info"], dt["number"], dt["tech_oversee"]
        ])
        persons = set([dt["authors_oversee"]])
        companies = set([
            dt["customer"].strip(" 0"),
            dt["designer"].strip(" 0"),
            dt["contractor"].strip(" 0"),
        ])

        for k in ["customer", "designer", "contractor"]:
            edrpou = parse_edrpou(dt[k])
            if edrpou:
                companies |= generate_edrpou_options(edrpou)

        if ";" in dt["obj"]:
            _, adr = dt["obj"].replace("\xa0", " ").split(";", 1)
            addresses = set([adr])

        names_autocomplete |= companies

        if dt["tech_oversee"]:
            m = re.search(r"\d{2,}(\s*.*)", dt["tech_oversee"])
            if m:
                parsed = m.group(1)
                parsed = parsed.replace(";", ",")
                for p in parsed.split(","):
                    if re.search(r"\d{2,}", p) is None:
                        names_autocomplete |= parse_and_generate(
                            p, "технічний нагляд")
                    else:
                        raw_records.add(p)

        res.update(dt)
        res.update({
            "persons":
            list(filter(None, persons)),
            "companies":
            list(
                filter(
                    None,
                    [c
                     for c in companies if not c.lower() == "фізична особа"])),
            "addresses":
            list(filter(None, addresses)),
            "names_autocomplete":
            list(filter(None, names_autocomplete)),
            "raw_records":
            list(filter(None, raw_records)),
        })

        return res
Example #3
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        companies = set()
        addresses = set()
        persons = set()

        if dt["TIN_S"]:
            companies |= deal_with_mixed_lang(dt["NAME"])
            companies |= generate_edrpou_options(dt["TIN_S"])
            persons |= parse_and_generate(dt["PIB"], "боржник")
        else:
            persons |= parse_and_generate(dt["NAME"], "боржник")

        companies |= deal_with_mixed_lang(dt["DPI"])
        persons |= parse_and_generate(dt["DPI_BOSS"], "керівник податкової")

        names_autocomplete = (
            companies
            | autocomplete_suggestions(dt["NAME"])
            | autocomplete_suggestions(dt["PIB"])
            | autocomplete_suggestions(dt["DPI_BOSS"])
        )

        res.update(dt)
        res.update(
            {
                "companies": list(filter(None, companies)),
                "addresses": list(filter(None, addresses)),
                "persons": list(filter(None, persons)),
                "names_autocomplete": list(filter(None, names_autocomplete)),
            }
        )

        return res
Example #4
0
    def to_dict(self):
        dt = self.data
        res = {"_id": self.pk}

        names_autocomplete = set()
        companies = (
            set([dt["party"]])
            | generate_edrpou_options(dt["donator_code"])
            | generate_edrpou_options(dt["party"])
        )

        if dt.get("branch_code"):
            companies |= generate_edrpou_options(dt["branch_code"])

        if dt.get("branch_name"):
            companies |= generate_edrpou_options(dt["branch_name"])

        addresses = set([dt["donator_location"]])
        persons = set([dt.get("candidate_name")])

        if dt["donator_code"]:
            companies |= set([dt["donator_name"]])
        else:
            persons |= parse_and_generate(dt["donator_name"], "Донор")
            names_autocomplete |= autocomplete_suggestions(dt["donator_name"])

        names_autocomplete |= companies
        raw_records = set(
            [
                dt.get("account_number"),
                dt.get("payment_subject"),
                dt["transaction_doc_number"],
            ]
        )

        res.update(dt)
        res.update(
            {
                "companies": list(filter(None, companies)),
                "addresses": list(filter(None, addresses)),
                "persons": list(filter(None, persons)),
                "names_autocomplete": list(filter(None, names_autocomplete)),
                "raw_records": list(filter(None, raw_records)),
                "type": self.get_type_display(),
                "period": self.period,
                "ultimate_recepient": self.ultimate_recepient,
            }
        )

        return res
Example #5
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
            "details_url": "https://z.texty.org.ua/deal/{}".format(dt["id"]),
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        companies = (set([
            dt["purchase"]["buyer"]["name"],
            dt["purchase"]["buyer"]["name_en"],
            dt["seller"]["name"],
        ])
                     | generate_edrpou_options(dt["purchase"]["buyer"]["code"])
                     | generate_edrpou_options(dt["seller"]["code"])
                     | generate_edrpou_options(
                         dt["purchase"]["cost_dispatcher_code"]))

        addresses = set([
            dt["seller"]["address"],
            dt["seller"]["address_full"],
            dt["purchase"]["buyer"]["address"],
            dt["purchase"]["buyer"]["address_en"],
        ])

        persons = set()

        if dt["purchase"]["buyer"]["person"]:
            persons |= parse_and_generate(dt["purchase"]["buyer"]["person"],
                                          "Представник замовника")

        raw_records = set(
            [dt["purchase"]["goods_name"], dt["purchase"]["goods_name_short"]])

        names_autocomplete = companies
        if dt["purchase"]["buyer"]["person"]:
            names_autocomplete |= autocomplete_suggestions(
                dt["purchase"]["buyer"]["person"])

        res.update(dt)
        res.update({
            "companies": list(filter(None, companies)),
            "addresses": list(filter(None, addresses)),
            "persons": list(filter(None, persons)),
            "names_autocomplete": list(filter(None, names_autocomplete)),
            "raw_records": list(filter(None, raw_records)),
        })

        return res
Example #6
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
        }

        names_autocomplete = set()
        companies = set([dt["party"]])
        persons = set()

        persons |= parse_and_generate(dt["name"], "Кандидат в депутати")

        names_autocomplete |= autocomplete_suggestions(dt["name"])

        if "область" in dt["body"].lower() or "київ" in dt["body"].lower():
            splits = re.split(r"область", dt["body"], flags=re.I, maxsplit=1)
            if len(splits) != 2:
                splits = re.split(r"київ", dt["body"], flags=re.I, maxsplit=1)

            if len(splits) == 2:
                companies.add(splits[1])
                dt["body_name"] = splits[1]
                dt["body_region"] = dt["body"].replace(dt["body_name"],
                                                       "").strip()
            else:
                logger.warning("Cannot parse body name out of {}".format(
                    dt["body"]))

        names_autocomplete |= companies

        res.update(dt)

        res.update({
            "companies":
            list(filter(None, companies)),
            "persons":
            list(filter(None, persons)),
            "names_autocomplete":
            list(filter(None, names_autocomplete)),
        })

        return res
Example #7
0
    def to_dict(self):
        dt = self.data
        res = {
            "_id": self.pk,
            "report_id": dt["report"]["id"],
            "timestamp": dt_parse(dt["report"]["timestamp"]),
            "last_updated_from_dataset": self.last_updated_from_dataset,
            "first_updated_from_dataset": self.first_updated_from_dataset,
        }

        names_autocomplete = set()
        companies = set()
        persons = set()
        addresses = set()

        title1 = self.title1_jmespath.search(dt)
        title2 = self.title2_jmespath.search(dt)
        report_title = self.title3_jmespath.search(dt)
        if not report_title:
            return None

        report_title = report_title[0]

        report_title["STD"] = dt_parse(report_title["STD"])
        report_title["FID"] = dt_parse(report_title["FID"])

        titles = title1 + title2
        if titles:
            title = titles[0]

            address = ", ".join(
                filter(
                    None,
                    [
                        title.get("E_CONT"),
                        title.get("E_ADRES"),
                        title.get("E_POST"),
                        title.get("E_RAYON"),
                        title.get("E_STREET"),
                    ],
                )
            )
            addresses.add(address)

            res["detailed_title"] = title
            companies |= deal_with_mixed_lang(title.get("E_NAME"))

            if title.get("FIO_PODP"):
                for p in deal_with_mixed_lang(title["FIO_PODP"]):
                    persons |= parse_and_generate(
                        p, title["POS_PODP"] or ""
                    )

                    names_autocomplete |= autocomplete_suggestions(p)

        res["report_title"] = report_title
        companies |= generate_edrpou_options(report_title.get("D_EDRPOU"))
        companies |= deal_with_mixed_lang(report_title.get("D_NAME"))

        associates = self.current_persons_jmespath.search(dt)
        dismissed_associates = self.fired_persons_jmespath.search(dt)

        res["associates"] = associates
        res["dismissed_associates"] = dismissed_associates

        for assoc in associates + dismissed_associates:
            assoc["DAT_PASP"] = assoc.get("DAT_PASP")
            if assoc["DAT_PASP"]:
                assoc["DAT_PASP"] = dt_parse(assoc["DAT_PASP"])

            full_name = assoc.get("P_I_B", "") or ""

            if full_name.strip():
                parsed_name = ""
                parsed_chunks = []

                # TODO: better word splitting
                for chunk in full_name.split():
                    # TODO: better detection of latin
                    chunk = try_to_fix_mixed_charset(chunk)

                    if (
                        is_eng(chunk)
                        or chunk.startswith("(")
                        or chunk.endswith(")")
                        or chunk in "-"
                        or chunk.startswith("-")
                    ):
                        break
                    elif chunk:
                        parsed_chunks.append(chunk)

                # Looks like real person
                if len(parsed_chunks) in [2, 3]:
                    persons |= parse_and_generate(
                        " ".join(parsed_chunks), assoc.get("POSADA", "") or ""
                    )

                    names_autocomplete |= autocomplete_suggestions(" ".join(parsed_chunks))

                    persons |= parse_and_generate(
                        full_name, assoc.get("POSADA", "") or ""
                    )
                    names_autocomplete |= autocomplete_suggestions(full_name)
                else:
                    companies.add(" ".join(parsed_chunks))
                    companies |= deal_with_mixed_lang(full_name)


        names_autocomplete |= companies

        res.update(
            {
                "companies": list(filter(None, companies)),
                "addresses": list(filter(None, addresses)),
                "persons": list(filter(None, persons)),
                "names_autocomplete": list(filter(None, names_autocomplete)),
            }
        )

        return res
Example #8
0
    def to_dict(self):
        addresses = set()
        persons = set()
        all_persons = set()
        names_autocomplete = set()
        companies = set()
        company_profiles = set()
        raw_records = set()

        companies.add(self.full_edrpou)
        companies.add(str(self.pk))

        latest_record = None
        latest_revision = 0
        for company_record in (
            self.records.all()
            .defer("company_hash", "location_parsing_quality")
            .nocache()
        ):
            addresses.add(company_record.location)
            addresses.add(company_record.parsed_location)
            addresses.add(company_record.validated_location)
            raw_records |= phone_variants(company_record.phone1)
            raw_records |= phone_variants(company_record.phone2)
            raw_records |= phone_variants(company_record.fax)
            raw_records.add(company_record.email)
            raw_records.add(company_record.form)

            company_profiles.add(company_record.company_profile)
            companies.add(company_record.name)

            names_autocomplete.add(company_record.name)
            names_autocomplete.add(company_record.short_name)
            names_autocomplete.add(self.full_edrpou)
            names_autocomplete.add(str(self.pk))

            companies.add(company_record.short_name)

            if company_record.revisions:
                if max(company_record.revisions) > latest_revision:
                    latest_record = company_record
                    latest_revision = max(company_record.revisions)
            else:
                logger.warning(
                    "Cannot find revisions for the CompanyRecord {}".format(self.pk)
                )

        for person in (
            self.persons.all().defer("tokenized_record", "share", "revisions").nocache()
        ):
            for name in person.name:
                persons.add((name, person.get_person_type_display()))

                for addr in person.address:
                    addresses.add(addr)

                for country in person.country:
                    addresses.add(country)

            raw_records.add(person.raw_record)

        snapshot = self.snapshot_stats.order_by("-revision_id").first()
        flags = None
        if snapshot:
            flags = snapshot.to_dict()

        for name, position in persons:
            all_persons |= parse_and_generate(name, position)
            names_autocomplete |= autocomplete_suggestions(name)

        return {
            "full_edrpou": self.full_edrpou,
            "addresses": list(filter(None, addresses)),
            "raw_persons": list(filter(None, persons)),
            "persons": list(filter(None, all_persons)),
            "companies": list(filter(None, companies)),
            "company_profiles": list(filter(None, company_profiles)),
            "latest_record": latest_record.to_dict(),
            "raw_records": list(filter(None, raw_records)),
            "names_autocomplete": list(filter(None, names_autocomplete)),
            "internals": {"flags": flags},
        }