def to_dict(self): """ Convert Minion model to an indexable presentation for ES. """ all_persons = set() names_autocomplete = set() d = model_to_dict(self, fields=["paid"]) d["mp"] = self.mp2convocation.to_dict() all_persons |= parse_and_generate(self.mp2convocation.mp.name, "Депутат") names_autocomplete |= autocomplete_suggestions( self.mp2convocation.mp.name) all_persons |= parse_and_generate(self.minion.name, "Помічник") names_autocomplete |= autocomplete_suggestions(self.minion.name) d["_id"] = self.id d["id"] = self.minion.id d["name"] = self.minion.name d["companies"] = [self.mp2convocation.party] d["persons"] = list(filter(None, all_persons)) d["names_autocomplete"] = list(names_autocomplete) return d
def to_dict(self): dt = self.data res = {"_id": self.pk} names_autocomplete = set() addresses = set() raw_records = set([ dt["obj"], dt["land_plot_info"], dt["number"], dt["tech_oversee"] ]) persons = set([dt["authors_oversee"]]) companies = set([ dt["customer"].strip(" 0"), dt["designer"].strip(" 0"), dt["contractor"].strip(" 0"), ]) for k in ["customer", "designer", "contractor"]: edrpou = parse_edrpou(dt[k]) if edrpou: companies |= generate_edrpou_options(edrpou) if ";" in dt["obj"]: _, adr = dt["obj"].replace("\xa0", " ").split(";", 1) addresses = set([adr]) names_autocomplete |= companies if dt["tech_oversee"]: m = re.search(r"\d{2,}(\s*.*)", dt["tech_oversee"]) if m: parsed = m.group(1) parsed = parsed.replace(";", ",") for p in parsed.split(","): if re.search(r"\d{2,}", p) is None: names_autocomplete |= parse_and_generate( p, "технічний нагляд") else: raw_records.add(p) res.update(dt) res.update({ "persons": list(filter(None, persons)), "companies": list( filter( None, [c for c in companies if not c.lower() == "фізична особа"])), "addresses": list(filter(None, addresses)), "names_autocomplete": list(filter(None, names_autocomplete)), "raw_records": list(filter(None, raw_records)), }) return res
def to_dict(self): dt = self.data res = { "_id": self.pk, "last_updated_from_dataset": self.last_updated_from_dataset, "first_updated_from_dataset": self.first_updated_from_dataset, } companies = set() addresses = set() persons = set() if dt["TIN_S"]: companies |= deal_with_mixed_lang(dt["NAME"]) companies |= generate_edrpou_options(dt["TIN_S"]) persons |= parse_and_generate(dt["PIB"], "боржник") else: persons |= parse_and_generate(dt["NAME"], "боржник") companies |= deal_with_mixed_lang(dt["DPI"]) persons |= parse_and_generate(dt["DPI_BOSS"], "керівник податкової") names_autocomplete = ( companies | autocomplete_suggestions(dt["NAME"]) | autocomplete_suggestions(dt["PIB"]) | autocomplete_suggestions(dt["DPI_BOSS"]) ) res.update(dt) res.update( { "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), } ) return res
def to_dict(self): dt = self.data res = {"_id": self.pk} names_autocomplete = set() companies = ( set([dt["party"]]) | generate_edrpou_options(dt["donator_code"]) | generate_edrpou_options(dt["party"]) ) if dt.get("branch_code"): companies |= generate_edrpou_options(dt["branch_code"]) if dt.get("branch_name"): companies |= generate_edrpou_options(dt["branch_name"]) addresses = set([dt["donator_location"]]) persons = set([dt.get("candidate_name")]) if dt["donator_code"]: companies |= set([dt["donator_name"]]) else: persons |= parse_and_generate(dt["donator_name"], "Донор") names_autocomplete |= autocomplete_suggestions(dt["donator_name"]) names_autocomplete |= companies raw_records = set( [ dt.get("account_number"), dt.get("payment_subject"), dt["transaction_doc_number"], ] ) res.update(dt) res.update( { "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), "raw_records": list(filter(None, raw_records)), "type": self.get_type_display(), "period": self.period, "ultimate_recepient": self.ultimate_recepient, } ) return res
def to_dict(self): dt = self.data res = { "_id": self.pk, "details_url": "https://z.texty.org.ua/deal/{}".format(dt["id"]), "last_updated_from_dataset": self.last_updated_from_dataset, "first_updated_from_dataset": self.first_updated_from_dataset, } companies = (set([ dt["purchase"]["buyer"]["name"], dt["purchase"]["buyer"]["name_en"], dt["seller"]["name"], ]) | generate_edrpou_options(dt["purchase"]["buyer"]["code"]) | generate_edrpou_options(dt["seller"]["code"]) | generate_edrpou_options( dt["purchase"]["cost_dispatcher_code"])) addresses = set([ dt["seller"]["address"], dt["seller"]["address_full"], dt["purchase"]["buyer"]["address"], dt["purchase"]["buyer"]["address_en"], ]) persons = set() if dt["purchase"]["buyer"]["person"]: persons |= parse_and_generate(dt["purchase"]["buyer"]["person"], "Представник замовника") raw_records = set( [dt["purchase"]["goods_name"], dt["purchase"]["goods_name_short"]]) names_autocomplete = companies if dt["purchase"]["buyer"]["person"]: names_autocomplete |= autocomplete_suggestions( dt["purchase"]["buyer"]["person"]) res.update(dt) res.update({ "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), "raw_records": list(filter(None, raw_records)), }) return res
def to_dict(self): dt = self.data res = { "_id": self.pk, } names_autocomplete = set() companies = set([dt["party"]]) persons = set() persons |= parse_and_generate(dt["name"], "Кандидат в депутати") names_autocomplete |= autocomplete_suggestions(dt["name"]) if "область" in dt["body"].lower() or "київ" in dt["body"].lower(): splits = re.split(r"область", dt["body"], flags=re.I, maxsplit=1) if len(splits) != 2: splits = re.split(r"київ", dt["body"], flags=re.I, maxsplit=1) if len(splits) == 2: companies.add(splits[1]) dt["body_name"] = splits[1] dt["body_region"] = dt["body"].replace(dt["body_name"], "").strip() else: logger.warning("Cannot parse body name out of {}".format( dt["body"])) names_autocomplete |= companies res.update(dt) res.update({ "companies": list(filter(None, companies)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), }) return res
def to_dict(self): dt = self.data res = { "_id": self.pk, "report_id": dt["report"]["id"], "timestamp": dt_parse(dt["report"]["timestamp"]), "last_updated_from_dataset": self.last_updated_from_dataset, "first_updated_from_dataset": self.first_updated_from_dataset, } names_autocomplete = set() companies = set() persons = set() addresses = set() title1 = self.title1_jmespath.search(dt) title2 = self.title2_jmespath.search(dt) report_title = self.title3_jmespath.search(dt) if not report_title: return None report_title = report_title[0] report_title["STD"] = dt_parse(report_title["STD"]) report_title["FID"] = dt_parse(report_title["FID"]) titles = title1 + title2 if titles: title = titles[0] address = ", ".join( filter( None, [ title.get("E_CONT"), title.get("E_ADRES"), title.get("E_POST"), title.get("E_RAYON"), title.get("E_STREET"), ], ) ) addresses.add(address) res["detailed_title"] = title companies |= deal_with_mixed_lang(title.get("E_NAME")) if title.get("FIO_PODP"): for p in deal_with_mixed_lang(title["FIO_PODP"]): persons |= parse_and_generate( p, title["POS_PODP"] or "" ) names_autocomplete |= autocomplete_suggestions(p) res["report_title"] = report_title companies |= generate_edrpou_options(report_title.get("D_EDRPOU")) companies |= deal_with_mixed_lang(report_title.get("D_NAME")) associates = self.current_persons_jmespath.search(dt) dismissed_associates = self.fired_persons_jmespath.search(dt) res["associates"] = associates res["dismissed_associates"] = dismissed_associates for assoc in associates + dismissed_associates: assoc["DAT_PASP"] = assoc.get("DAT_PASP") if assoc["DAT_PASP"]: assoc["DAT_PASP"] = dt_parse(assoc["DAT_PASP"]) full_name = assoc.get("P_I_B", "") or "" if full_name.strip(): parsed_name = "" parsed_chunks = [] # TODO: better word splitting for chunk in full_name.split(): # TODO: better detection of latin chunk = try_to_fix_mixed_charset(chunk) if ( is_eng(chunk) or chunk.startswith("(") or chunk.endswith(")") or chunk in "-" or chunk.startswith("-") ): break elif chunk: parsed_chunks.append(chunk) # Looks like real person if len(parsed_chunks) in [2, 3]: persons |= parse_and_generate( " ".join(parsed_chunks), assoc.get("POSADA", "") or "" ) names_autocomplete |= autocomplete_suggestions(" ".join(parsed_chunks)) persons |= parse_and_generate( full_name, assoc.get("POSADA", "") or "" ) names_autocomplete |= autocomplete_suggestions(full_name) else: companies.add(" ".join(parsed_chunks)) companies |= deal_with_mixed_lang(full_name) names_autocomplete |= companies res.update( { "companies": list(filter(None, companies)), "addresses": list(filter(None, addresses)), "persons": list(filter(None, persons)), "names_autocomplete": list(filter(None, names_autocomplete)), } ) return res
def to_dict(self): addresses = set() persons = set() all_persons = set() names_autocomplete = set() companies = set() company_profiles = set() raw_records = set() companies.add(self.full_edrpou) companies.add(str(self.pk)) latest_record = None latest_revision = 0 for company_record in ( self.records.all() .defer("company_hash", "location_parsing_quality") .nocache() ): addresses.add(company_record.location) addresses.add(company_record.parsed_location) addresses.add(company_record.validated_location) raw_records |= phone_variants(company_record.phone1) raw_records |= phone_variants(company_record.phone2) raw_records |= phone_variants(company_record.fax) raw_records.add(company_record.email) raw_records.add(company_record.form) company_profiles.add(company_record.company_profile) companies.add(company_record.name) names_autocomplete.add(company_record.name) names_autocomplete.add(company_record.short_name) names_autocomplete.add(self.full_edrpou) names_autocomplete.add(str(self.pk)) companies.add(company_record.short_name) if company_record.revisions: if max(company_record.revisions) > latest_revision: latest_record = company_record latest_revision = max(company_record.revisions) else: logger.warning( "Cannot find revisions for the CompanyRecord {}".format(self.pk) ) for person in ( self.persons.all().defer("tokenized_record", "share", "revisions").nocache() ): for name in person.name: persons.add((name, person.get_person_type_display())) for addr in person.address: addresses.add(addr) for country in person.country: addresses.add(country) raw_records.add(person.raw_record) snapshot = self.snapshot_stats.order_by("-revision_id").first() flags = None if snapshot: flags = snapshot.to_dict() for name, position in persons: all_persons |= parse_and_generate(name, position) names_autocomplete |= autocomplete_suggestions(name) return { "full_edrpou": self.full_edrpou, "addresses": list(filter(None, addresses)), "raw_persons": list(filter(None, persons)), "persons": list(filter(None, all_persons)), "companies": list(filter(None, companies)), "company_profiles": list(filter(None, company_profiles)), "latest_record": latest_record.to_dict(), "raw_records": list(filter(None, raw_records)), "names_autocomplete": list(filter(None, names_autocomplete)), "internals": {"flags": flags}, }