コード例 #1
0
    def _search_edr(self, company, fuzziness):
        ans = None
        if company["beneficial_owner_company_code"]:
            res = EDRPOU.search().query(
                "term",
                edrpou=company["beneficial_owner_company_code"].lstrip("0"))
            ans = res.execute()
            if not ans:
                self.stdout.write(
                    "Cannot find a company by code %s, falling back to search by name %s"
                    % (
                        company["beneficial_owner_company_code"],
                        company["company_name"],
                    ))

        if not ans:
            should = [
                Q(
                    "multi_match",
                    query=company["company_name"],
                    fuzziness=fuzziness,
                    fields=["name", "short_name", "location"],
                    boost=2.0,
                )
            ]

            if company["address"]:
                should.append(
                    Q(
                        "match",
                        location={
                            "query": company["address"],
                            "fuzziness": fuzziness
                        },
                    ))

            res = (EDRPOU.search().query(Q("bool",
                                           should=should)).highlight_options(
                                               order="score",
                                               fragment_size=500,
                                               number_of_fragments=100,
                                               pre_tags=['<u class="match">'],
                                               post_tags=["</u>"],
                                           ).highlight("name", "short_name",
                                                       "location"))

            ans = res.execute()

        return ans
コード例 #2
0
ファイル: admin.py プロジェクト: rodionvolovik/pep.org.ua
    def edr_export(self, request):
        data = []

        for rec_id in request.POST.getlist("iswear"):
            meta_id = request.POST.get("company_%s_id" % rec_id)
            res = EDRPOU.get(id=meta_id)
            if res:
                rec = res.to_dict()

                if isinstance(rec.get("founders"), list):
                    rec["founders"] = ";;;".join(rec["founders"])
                data.append(rec)

        if not data:
            self.message_user(request, "Нічого експортувати")
            return redirect(reverse("admin:edr_search"))

        fp = StringIO()
        w = DictWriter(fp, fieldnames=data[0].keys())
        w.writeheader()
        w.writerows(data)
        payload = fp.getvalue()
        fp.close()

        response = HttpResponse(payload, content_type="text/csv")

        response[
            "Content-Disposition"] = "attachment; filename=edr_{:%Y%m%d_%H%M}.csv".format(
                datetime.datetime.now())

        response["Content-Length"] = len(response.content)

        return response
コード例 #3
0
    def iter_docs(self):
        """
        Reads input file record by record.

        :returns: iterator over company records from registry
        :rtype: collections.Iterable[dict]
        """

        if self.file_type == "zip":
            with ZipFile(self.file) as zip_arch:
                for fname in zip_arch.namelist():
                    try:
                        dec_fname = unicode(fname)
                    except UnicodeDecodeError:
                        dec_fname = fname.decode("cp866")

                    if "uo" in dec_fname.lower() or "юо" in dec_fname.lower():
                        logger.info("Reading {} file from archive {}".format(
                            dec_fname, self.file))

                        if dec_fname.lower().endswith(".xml"):
                            with zip_arch.open(fname, 'r') as fp_raw:
                                for l in self._iter_xml(fp_raw):
                                    yield EDRPOU(**l).to_dict(True)

                        if dec_fname.lower().endswith(".csv"):
                            with zip_arch.open(fname, 'r') as fp_raw:
                                for l in self._iter_csv(fp_raw):
                                    yield EDRPOU(**l).to_dict(True)
        elif self.file_type == "xml":
            for l in self._iter_xml(self.file):
                yield EDRPOU(**l).to_dict(True)

        elif self.file_type == "csv":
            for l in self._iter_csv(self.file):
                yield EDRPOU(**l).to_dict(True)
コード例 #4
0
ファイル: admin.py プロジェクト: rodionvolovik/pep.org.ua
    def edr_search(self, request):
        query = request.GET.get("q")

        s = None
        if query:
            s = (EDRPOU.search().query(
                ES_Q(
                    "multi_match",
                    operator="and",
                    query=query,
                    fields=[
                        "name", "short_name", "edrpou", "head", "founders"
                    ],
                ))[:200].execute())

        return render(
            request,
            "admin/core/company/edr_search.html",
            {
                "query": query,
                "search_results": s
            },
        )
コード例 #5
0
    def connect_domestic_companies(self, save_it):
        for ownership in BeneficiariesMatching.objects.filter(status="m"):
            k = ownership.edrpou_match.lstrip("0")

            if k == "NONE":
                continue

            if not k:
                self.stderr.write(
                    "Approved company with the key %s has no edrpou!, skipping"
                    % (ownership.company_key, ))

                self.failed += 1
                continue

            ans = EDRPOU.find_by_edrpou(k)

            if len(ans) > 1:
                self.stderr.write(
                    "Too many companies found by code %s, for the key %s, skipping"
                    % (ownership.edrpou_match, ownership.company_key))

                self.failed += 1
                continue

            if not ans:
                try:
                    company = Company.objects.get(
                        edrpou=unicode(ownership.edrpou_match).rjust(8, "0"))
                except Company.DoesNotExist:
                    self.stderr.write(
                        "Cannot find a company by code %s, for the key %s, skipping"
                        % (ownership.edrpou_match, ownership.company_key))

                    self.failed += 1
                    continue
            else:
                company, created = self.importer.get_or_create_from_edr_record(
                    ans[0].to_dict(), save_it)

                if not company:
                    self.stderr.write(
                        "Cannot create a company by code %s, for the key %s, skipping"
                        % (ownership.edrpou_match, ownership.company_key))

                    self.failed += 1
                    continue
                else:
                    company.affiliated_with_pep = True
                    company.save()

                if created:
                    self.companies_created += 1
                    self.stdout.write("Created company %s" % company)
                else:
                    self.companies_updated += 1
                    self.stdout.write("Updated company %s" % company)

            try:
                person = Person.objects.get(pk=ownership.person)
            except Person.DoesNotExist:
                self.stderr.write(
                    "Cannot find a person by code %s, for the key %s, skipping"
                    % (ownership.person, ownership.company_key))
                self.failed += 1
                continue

            most_recent_record = self.get_latest_declaration_record(ownership)
            for d in ownership.declarations:
                try:
                    decl = Declaration.objects.get(pk=d)
                except Declaration.DoesNotExist:
                    self.stderr.write(
                        "Cannot find a declaration by id %s, for the key %s, skipping"
                        % (d, ownership.company_key))
                    continue

                conn, conn_created = self.conn_importer.get_or_create_from_declaration(
                    person, company,
                    most_recent_record.get("link_type",
                                           "Бенефіціарний власник"), decl,
                    save_it)

                if most_recent_record.get("percent_of_cost"):
                    conn.share = most_recent_record["percent_of_cost"]

                    if save_it:
                        conn.save()

                if conn_created:
                    self.connections_created += 1
                    self.stdout.write("Created connection %s" % conn)
                else:
                    self.connections_updated += 1
                    self.stdout.write("Updated connection %s" % conn)

            self.successful += 1
コード例 #6
0
    def handle(self, *args, **options):
        self.proxies = {}
        if hasattr(settings, "PROXY"):
            self.proxies["http"] = settings.PROXY
            self.proxies["https"] = settings.PROXY

        GUID = options["guid"]
        fp = None
        if not options["revision"]:
            latest = EDRPOU.search().aggs.metric(
                "max_last_update", "max", field="last_update")[:1].execute()
            if latest:
                update_after = latest[0].last_update
                self.stdout.write(
                    "Only loading dumps after {}".format(update_after))
            else:
                raise EDRImportException(
                    "Current index is empty, please run manual import. For f**k sake"
                )

        if not options["filename"]:
            data_url = None
            timestamp = None
            revision = None

            try:
                response = requests.get(
                    "https://data.gov.ua/api/3/action/resource_show", {
                        "id": GUID,
                        "nocache": randrange(100)
                    }).json()

                if not response.get("success"):
                    self.stderr.write("Unsuccessful response from api.")
                    return

                revisions = sorted(response["result"]["resource_revisions"],
                                   key=lambda x: parse(x["resource_created"]))

                for rev in revisions:
                    revision = rev["url"].strip("/").rsplit('/', 1)[-1]

                    if not options["revision"]:
                        timestamp = parse(rev["resource_created"])

                        if update_after is None or update_after < timestamp:
                            data_url = rev["url"]
                            break

                    if revision == options["revision"]:
                        timestamp = parse(rev["resource_created"])
                        data_url = rev["url"]
                        break

            except (TypeError, IndexError, KeyError):
                self.stderr.write("Cannot obtain information about dump file")
                raise

            if not data_url:
                self.stderr.write("Can not get dataset url from api.")
                return

            self.stdout.write(
                "Loading data of revision: {}, created at: {}".format(
                    revision, timestamp))

            r = requests.get(data_url, stream=True)

            ext = r.headers["Content-Type"].split("/")[-1]
            ext = ext.lower().lstrip(".")
            if ext not in ["zip", "xml", "csv"]:
                self.stderr.write(
                    "Unsupported dataset file type: {}".format(ext))
                return

            reader = EDR_Reader(StringIO(r.content), timestamp, revision, ext)
        elif options["revision"] and options["dump_date"]:
            dump_date = timezone.make_aware(
                parse(options["dump_date"], dayfirst=True))
            _, ext = os.path.splitext(options["filename"])

            fp = open(options["filename"], "rb")
            reader = EDR_Reader(fp, dump_date, options["revision"],
                                ext.lower().lstrip("."))
        else:
            self.stderr.write(
                "You should provide (possibly fake) revision id and date of dump when loading files manually"
            )

        iterator = reader.iter_docs()

        first_portion = list(islice(iterator, 1000))
        if first_portion:
            Index(EDRPOU._doc_type.index).delete(ignore=404)
            EDRPOU.init()
            es = connections.get_connection()

            bulk(es, first_portion)
            bulk(es, iterator, chunk_size=10000)
        else:
            raise EDRImportException(
                "Less than 1000 valid records, for f**k sake")

        if fp:
            fp.close()
コード例 #7
0
    def search_me(self, company, fuzziness=1, candidates=10):
        if company["edrpou"]:
            res = EDRPOU.search().query("term",
                                        edrpou=company["edrpou"].lstrip("0"))
        else:
            should = [
                Q("match",
                  location={
                      "query": company["city"],
                      "fuzziness": fuzziness
                  }),
                Q("multi_match",
                  query=u"%s %s" %
                  (company["name_uk"], company["short_name_uk"]),
                  fuzziness=fuzziness,
                  fields=["name", "short_name", "location"],
                  boost=1.5)
            ]

            for headname in company["heads"]:
                should.append(
                    Q("match",
                      head={
                          "query": headname,
                          "operator": "or",
                          "minimum_should_match": 3,
                          "fuzziness": fuzziness
                      }))

            res = EDRPOU.search() \
                .query(Q("bool", should=should)) \
                .highlight_options(
                    order='score',
                    fragment_size=500,
                    number_of_fragments=100,
                    pre_tags=['<u class="match">'], post_tags=["</u>"]) \
                .highlight("name", "head", "short_name", "location")

        ans = res.execute()
        res = []
        for a in ans[:candidates]:
            highlight = getattr(a.meta, "highlight", {})

            name = " ".join(a.meta.highlight.name) \
                if "name" in highlight else a.name
            short_name = " ".join(a.meta.highlight.short_name) \
                if "short_name" in highlight else a.short_name
            head = " ".join(a.meta.highlight.head) \
                if "head" in highlight else a.head
            location = " ".join(a.meta.highlight.location) \
                if "location" in highlight else a.location

            res.append({
                "name": name,
                "short_name": short_name,
                "head": head,
                "location": location,
                "edrpou": a.edrpou,
                "status": a.status,
                "company_profile": a.company_profile,
                "score": a._score
            })

        return res
コード例 #8
0
    def handle(self, *args, **options):
        tasks = CompanyMatching.objects.exclude(edrpou_match="NONE").exclude(
            edrpou_match="").exclude(edrpou_match__isnull=True).filter(
                status="m")

        for t in tasks:
            try:
                company = Company.objects.get(pk=t.company_id)
            except Company.DoesNotExist:
                self.stderr.write("Cannot find company %s" % t.company_id)
                continue

            res = EDRPOU.search().query(
                "term", edrpou=t.edrpou_match.lstrip("0")).execute()

            res = sorted(res, key=lambda x: self.company_types.index(x.status))

            for r in res[:1]:
                parsed = parse_address(r.location)
                r.edrpou = r.edrpou.rjust(8, "0")

                if parsed:
                    skip = False
                    zip_code, city, street, appt = parsed

                    if company.zip_code and company.zip_code != zip_code:
                        self.stdout.write(
                            "NOT replacing zipcode %s with %s for company %s, %s"
                            % (company.zip_code, zip_code, company.name,
                               company.id))
                        skip = True

                    if company.city and company.city != city:
                        self.stdout.write(
                            "NOT replacing city %s with %s for company %s, %s"
                            % (company.city, city, company.name, company.id))
                        skip = True

                    if company.street and company.street != street:
                        self.stdout.write(
                            "NOT replacing street %s with %s for company %s, %s"
                            %
                            (company.street, street, company.name, company.id))
                        skip = True

                    if company.appt and company.appt != appt:
                        self.stdout.write(
                            "NOT replacing appt %s with %s for company %s, %s"
                            % (company.appt, appt, company.name, company.id))
                        skip = True

                    if skip:
                        self.stdout.write("=======\n\n")
                        continue

                    company.zip_code = zip_code
                    company.city = city
                    company.street = street
                    company.appt = appt
                else:
                    company.raw_address = r.location

                if company.edrpou and company.edrpou != r.edrpou:
                    self.stdout.write(
                        "Replacing edrpou %s with %s for company %s, %s" %
                        (company.edrpou, r.edrpou, company.name, company.id))

                company.edrpou = r.edrpou

                if options["real_run"]:
                    company.save()
コード例 #9
0
    def handle(self, *args, **options):
        company_code_path = jmespath.compile(
            "nacp_orig.step_7.*.emitent_ua_company_code")
        save_it = options["real_run"]
        activate(settings.LANGUAGE_CODE)

        self.importer = CompanyImporter(logger=PythonLogger("cli_commands"))
        self.conn_importer = Person2CompanyImporter(
            logger=PythonLogger("cli_commands"))

        successful = 0
        failed = 0
        total = 0
        companies_created = 0
        companies_updated = 0
        connections_created = 0
        connections_updated = 0

        for rec in AdHocMatch.objects.filter(
                status="a",
                dataset_id="smida_10").prefetch_related("person").nocache():

            total += 1
            if "EDRPOU" not in rec.matched_json:
                self.stderr.write(
                    "Approved company {} has no edrpou!, skipping".format(
                        rec.pk))

                failed += 1
                continue

            if rec.person is None:
                self.stderr.write(
                    "Cannot find a person rec {}, skipping".format(rec.pk))
                failed += 1
                continue

            ans = EDRPOU.find_by_edrpou(rec.matched_json["EDRPOU"])

            if len(ans) > 1:
                self.stderr.write(
                    "Too many companies found by code {}, skipping".format(
                        rec.matched_json["EDRPOU"]))

                failed += 1
                continue

            if not ans:
                self.stderr.write(
                    "No company found by code {}, skipping".format(
                        rec.matched_json["EDRPOU"]))

                failed += 1
                continue

            company, created = self.importer.get_or_create_from_edr_record(
                ans[0].to_dict(), save_it)

            if not company:
                self.stderr.write(
                    "Cannot create a company by code {}, for the rec {}, skipping"
                    .format(rec.matched_json["EDRPOU"], rec.pk))

                failed += 1
                continue

            if created:
                companies_created += 1
                self.stdout.write("Created company {}".format(company))
            else:
                companies_updated += 1
                self.stdout.write("Updated company {}".format(company))

            existing_connections = Person2Company.objects.filter(
                from_person=rec.person,
                to_company=company).exclude(relationship_type_uk="Акціонер")

            if existing_connections:
                for ex_conn in existing_connections:
                    self.stderr.write(
                        "Connection between {} and {} already exists but has type {}"
                        .format(ex_conn.from_person, ex_conn.to_company,
                                ex_conn.relationship_type))

            conn, conn_created = self.conn_importer.get_or_create(
                rec.person, company, "Акціонер",
                rec.last_updated_from_dataset.date(),
                "https://smida.gov.ua/db/emitent/{}".format(
                    rec.matched_json["EDRPOU"]),
                "За інформацією Агентства з розвитку інфраструктури фондового ринку України (АРІФРУ)",
                "According to the information Stock market infrastructure development agency of Ukraine (SMIDA)",
                save_it)
            if conn_created:
                connections_created += 1
            else:
                connections_updated += 1

            if "share" in rec.matched_json:
                conn.share = float(rec.matched_json["share"].replace(
                    ",", ".").strip())
                if save_it:
                    conn.save()

            decls = rec.person.get_declarations()
            if decls:
                decl = decls[0]
                if decl.nacp_declaration:
                    declared_companies = company_code_path.search(
                        decl.source) or []
                    declared_companies = list(
                        filter(
                            None,
                            set(
                                map(lambda x: x.lstrip("0"),
                                    declared_companies))))
                    if rec.matched_json["EDRPOU"].lstrip(
                            "0") not in declared_companies:
                        self.stderr.write(
                            "Cannot find company {} ({}) in declaration {} of {}"
                            .format(company, company.edrpou, decl.url,
                                    rec.person))
                else:
                    self.stderr.write(
                        "No declaration found for person {}".format(
                            rec.person))

        self.stdout.write(
            "{} records processed, failed: {}, successed: {}".format(
                total, failed, successful))

        self.stdout.write(
            "Companies created: {}, companies updated: {}".format(
                companies_created, companies_updated))

        self.stdout.write(
            "Connections created: {}, connections updated: {}".format(
                connections_created, connections_updated))
コード例 #10
0
    def handle(self, *args, **options):
        self.stdout.write("Starting matching job ...")
        activate(settings.LANGUAGE_CODE)

        # region Companies
        self.stdout.write("Starting import Companies.")
        pep_heads = self.company_heads_mapping()

        companies_dict = {}
        created_companies_total = 0
        updated_companies_total = 0
        failed_companies_total = 0

        company_importer = CompanyImporter(logger=PythonLogger("cli_commands"))

        smida_candidates = SMIDACandidate.objects.filter(status="a")

        for candidate in tqdm(smida_candidates.nocache().iterator(),
                              total=smida_candidates.count()):
            edrpou = candidate.smida_edrpou

            if companies_dict.get(edrpou):
                continue

            ans = EDRPOU.find_by_edrpou(candidate.smida_edrpou)

            if len(ans) > 1:
                self.stderr.write(
                    "Too many companies found by code {}, skipping".format(edrpou)
                )

                failed_companies_total += 1
                continue

            if not ans:
                self.stderr.write(
                    "No company found by code {}, skipping".format(edrpou)
                )

                failed_companies_total += 1
                continue

            company, created = company_importer.get_or_create_from_edr_record(
                ans[0].to_dict(),
                options["real_run"])

            if created and edrpou in pep_heads:
                company.state_company = True
                if options["real_run"]:
                    company.save()

            if not company:
                self.stderr.write(
                    "Cannot create a company by code {}, for the rec {}, skipping".format(
                        edrpou,
                        candidate.pk
                    )
                )

                failed_companies_total += 1
                continue

            if created:
                created_companies_total += 1
                tqdm.write("Created {} {}".format("state company" if company.state_company else "company", company))
            else:
                updated_companies_total += 1
                tqdm.write("Updated company {}".format(company))

            companies_dict[edrpou] = company

        self.stdout.write("Finished import companies.")
        # endregion

        # region Persons and P2C
        self.stdout.write("Starting import Persons and Person2Company relations.")
        smida_candidates = SMIDACandidate.objects.filter(status="a",
                                                         smida_is_real_person=True)\
                                                 .order_by("dt_of_first_entry")

        peps = self.all_peps_names()
        self.persons_dict = {}
        self.new_persons_pk = []
        self.persons_stats = {"created_total": 0, "matched_resolved": 0, "matched_not_resolved": 0}
        p2c_links_created = 0
        p2c_links_updated = 0
        self.smida_p2c = self.person_2_companies_relations()

        for candidate in tqdm(smida_candidates.nocache().iterator(),
                              total=smida_candidates.count()):
            person_name = candidate.smida_parsed_name.strip().lower()

            # If can't tie person with company skip it to avoid duplicates
            if not any(edrpou in companies_dict for edrpou in self.smida_p2c[person_name]):
                tqdm.write("Skipped person: {} from processing as he not tied to any valid EDRPOU."
                           .format(person_name))
                continue

            is_pep = person_name in peps

            person = self.persons_dict.get(person_name)
            if not person:
                person = self.create_person(person_name, is_pep, candidate.smida_yob,
                                            options["real_run"])
            # The same person might have been created from a record without smida_yob
            else:
                self.update_person_dob(person, candidate.smida_yob, real_run=options["real_run"])

            if person:
                company = companies_dict.get(candidate.smida_edrpou)

                if not company:
                    continue

                pb_key = "{} {}".format(candidate.smida_position_class, candidate.smida_position_body)
                relationship_type = SMIDA_POSITIONS_MAPPING.get(pb_key)

                if not relationship_type:
                    relationship_type = candidate.smida_position
                    tqdm.write("Relation missing from a mapping for SMIDACandidate ID: {}"
                               .format(candidate.id))

                # Calc date finished
                last_entry = candidate.dt_of_last_entry
                date_finished = self.p2c_get_date_finished(candidate)

                if not date_finished and last_entry and last_entry.date() < self.threshold_quarter_end():
                    date_finished = last_entry

                # Calc date established
                date_established = self.p2c_get_date_established(candidate)

                if date_established and (not date_finished or date_established.date() < date_finished.date()):
                    # update previous position on this work
                    prev_position = Person2Company.objects \
                        .filter(from_person=person, to_company=company,
                                is_employee=True, date_established__lt=date_established) \
                        .exclude(relationship_type__icontains=relationship_type)\
                        .order_by("-date_established").first()

                    if prev_position:
                        prev_position.date_finished = date_established
                        prev_position.date_finished_details = 0

                        if options["real_run"]:
                            prev_position.save()

                        tqdm.write("Updated previous position for SMIDACandidate ID: {}"
                                   .format(candidate.id))

                else:
                    date_established = candidate.dt_of_first_entry

                # Get or create p2c
                try:
                    p2c = Person2Company.objects.get(from_person=person,
                                   to_company=company,
                                   relationship_type__icontains=relationship_type,
                                   is_employee=True)

                    updated = False

                    if (not p2c.date_finished and date_finished) or\
                            ((p2c.date_finished and date_finished) and
                            (p2c.date_finished_details > 0 or date_finished.date() > p2c.date_finished)):
                        tqdm.write("Updated date_finished for P2C relation with id: {} Old: {}, New: {}"
                                   .format(p2c.id,
                                           p2c.date_finished,
                                           date_finished))

                        p2c.date_finished = date_finished
                        p2c.date_finished_details = 0
                        updated = True

                    if (not p2c.date_established and date_established and date_established.date() < p2c.date_finished) or\
                            ((p2c.date_established and date_established) and
                            (p2c.date_established_details > 0 or date_established.date() < p2c.date_established)):
                        tqdm.write("Updated date_established for P2C relation with id: {} Old: {}, New: {}"
                                   .format(p2c.id,
                                           p2c.date_established,
                                           date_established))

                        p2c.date_established = date_established
                        p2c.date_established_details = 0
                        updated = True

                    p2c_links_updated += int(updated)

                    if options["real_run"]:
                        p2c.save()

                except Person2Company.DoesNotExist:
                    p2c = Person2Company(from_person=person,
                                         to_company=company,
                                         relationship_type=relationship_type,
                                         is_employee=True,
                                         date_established=date_established,
                                         date_finished=date_finished)

                    p2c_links_created += 1
                    tqdm.write("Created P2C relation: id: {} ({}) <=> id: {} ({}) EST. {}, FIN. {}"
                               .format(person.id or "N/A",
                                       person_name,
                                       company.id or "N/A",
                                       company.name_uk,
                                       p2c.date_established or "N/A",
                                       p2c.date_finished or "N/A"))

                    if options["real_run"]:
                        p2c.save()

        self.stdout.write("Finished import Persons and Person2Company relations.")
        # endregion
        self.stdout.write("New persons having multiple companies related")
        self.new_persons_having_multiple_company_relations()

        # region Create P2P connections

        smida_candidates = SMIDACandidate.objects.filter(status="a",
                                                         smida_is_real_person=True) \
            .order_by("dt_of_last_entry")

        p2p_links_total = 0

        for candidate in tqdm(smida_candidates.nocache().iterator(),
                              total=smida_candidates.count()):
            person_name = candidate.smida_parsed_name.strip().lower()
            heads_of_company = pep_heads.get(candidate.smida_edrpou) or []
            from_person = self.persons_dict.get(person_name)

            for head in heads_of_company:
                to_person = self.persons_dict.get(head)

                if from_person == to_person:
                    continue

                try:
                    p2p = Person2Person.objects.get(from_person=from_person,
                                  to_person=to_person,
                                  from_relationship_type="ділові зв'язки",
                                  to_relationship_type="ділові зв'язки")

                    p2p.date_confirmed = candidate.dt_of_last_entry\
                                         or p2p.date_confirmed\
                                         or datetime.now()
                    if options["real_run"]:
                        p2p.save()

                    tqdm.write("Updated P2P relation: id: {} ({}) <=> id: {} ({})// DC: {}"
                               .format(from_person.id or "N/A",
                                       from_person.full_name,
                                       to_person.id or "N/A",
                                       to_person.full_name,
                                       p2p.date_confirmed))

                except Person2Person.DoesNotExist:
                    p2p = Person2Person(from_person=from_person,
                                        to_person=to_person,
                                        from_relationship_type="ділові зв'язки",
                                        to_relationship_type="ділові зв'язки",
                                        date_confirmed=candidate.dt_of_last_entry or datetime.now())

                    tqdm.write("Created P2P relation: id: {} ({}) <=> id: {} ({})"
                               .format(from_person.id or "N/A",
                                       from_person.full_name,
                                       to_person.id or "N/A",
                                       to_person.full_name))
                    p2p_links_total += 1

                    if options["real_run"]:
                        p2p.save()

        self.stdout.write("Finished import Person2Person relations.")
        # endregion

        self.stdout.write(
            "Updated existing companies: {}.\n"
            "Created new companies: {}.\n"
            "Failed create companies: {}.\n"
            "Created new persons: {}.\n"
            "Matched existing resolved: {}.\n"
            "Matched existing not resolved: {}.\n"
            "Created P2C links: {}.\n"
            "Updated P2C links: {}.\n"
            "Created P2P links: {}."
            .format(updated_companies_total,
                    created_companies_total,
                    failed_companies_total,
                    self.persons_stats["created_total"],
                    self.persons_stats["matched_resolved"],
                    self.persons_stats["matched_not_resolved"],
                    p2c_links_created,
                    p2c_links_updated,
                    p2p_links_total)
        )
コード例 #11
0
    def handle(self, *args, **options):
        activate(settings.LANGUAGE_CODE)

        successful = 0
        failed = 0

        exact_matches = 0
        fuzzy_matches = 0
        connections_created = 0
        persons_created = 0

        for company in Company.objects.filter(state_company=True).exclude(
                edrpou=""):
            k = company.edrpou.lstrip("0")

            # Because open copy of registry has no dates and some of companies
            # has more than one record we are using heuristic here to determine
            # latest record using registration status (they have "priorities")
            for order in self.status_order:
                res = EDRPOU.search().query(
                    "bool",
                    must=[Q("term", edrpou=k),
                          Q("term", status=order)])
                ans = res.execute()
                if ans:
                    break

            # Last attempt
            if not ans:
                res = EDRPOU.search().query(
                    "term",
                    edrpou=k,
                )
                ans = res.execute()

            if len(ans) > 1:
                self.stderr.write(
                    "Too many companies found by code %s, for the name %s, skipping"
                    % (k, company))

                failed += 1
                continue

            if len(ans) == 0:
                self.stderr.write("Cannot find the company by code %s" % (k, ))

                failed += 1
                continue

            edr_company = ans[0]
            if not edr_company.head:
                self.stderr.write("Cannot find head for the company %s, (%s)" %
                                  (ans[0].name, k))

                failed += 1
                continue

            successful += 1
            lastname, firstname, patronymic, _ = parse_fullname(
                edr_company.head)

            exact_links = Person2Company.objects.select_related(
                "from_person").filter(
                    to_company_id=company.pk,
                    from_person__first_name__iexact=firstname,
                    from_person__last_name__iexact=lastname)

            if patronymic:
                exact_links = exact_links.filter(
                    from_person__patronymic__iexact=patronymic)

            if exact_links.count():
                exact_matches += 1
                for l in exact_links:
                    l.created_from_edr = True
                    l.date_confirmed = edr_company.last_update
                    l.date_confirmed_details = 0
                    l.save()

                    if l.relationship_type != "Керівник":
                        self.stdout.write(
                            "Relation %s exists but has different type: %s" %
                            (l, l.relationship_type))

                continue
            else:
                fuzzy_links = Person2Company.objects.select_related(
                    "from_person").filter(
                        to_company_id=company.pk,
                        from_person__last_name__iexact=lastname,
                        from_person__first_name__istartswith=firstname[0],
                    )

                if patronymic:
                    fuzzy_links = fuzzy_links.filter(
                        from_person__patronymic__istartswith=patronymic[0])

                if fuzzy_links:
                    fuzzy_matches += 1
                    for l in fuzzy_links:
                        l.created_from_edr = True
                        l.date_confirmed = edr_company.last_update
                        l.date_confirmed_details = 0
                        l.save()

                        self.stdout.write(
                            "Fuzzy match: %s vs %s" %
                            (edr_company.head, l.from_person.full_name))

                        if l.relationship_type != "Керівник":
                            self.stdout.write(
                                "Relation %s exists but has different type: %s"
                                % (l, l.relationship_type))

                    continue

            try:
                if options["real_run"]:
                    person = Person.objects.create(first_name=firstname,
                                                   last_name=lastname,
                                                   patronymic=patronymic,
                                                   is_pep=True,
                                                   type_of_official=1)
                persons_created += 1

                if options["real_run"]:
                    Person2Company.objects.create(
                        from_person=person,
                        to_company=company,
                        relationship_type="Керівник",
                        is_employee=True,
                        created_from_edr=True,
                        date_confirmed=edr_company.last_update,
                        # TODO: decide what to do with connection proofs
                        proof_title="Інформація, отримана з ЄДР",
                    )

                connections_created += 1
            except DataError:
                self.stdout.write("Cannot create %s person or connection" %
                                  edr_company.head)

        self.stdout.write("Creation failed: %s, creation successful: %s" %
                          (failed, successful))
        self.stdout.write("Exact matches: %s, fuzzy matches: %s" %
                          (exact_matches, fuzzy_matches))
        self.stdout.write("Persons created: %s, connections created: %s" %
                          (persons_created, connections_created))