Beispiel #1
0
    def handle(self, *args, **options):
        activate(settings.LANGUAGE_CODE)
        conn = connections.get_connection('default')

        person_qs = Person.objects.filter(publish=True)
        docs_to_index = [
            ElasticPerson(**p.to_dict())
            for p in tqdm(person_qs.nocache().iterator(), total=person_qs.count())
        ]

        if options["drop_indices"]:
            Index(ElasticPerson._doc_type.index).delete(ignore=404)
            ElasticPerson.init()

            conn.indices.put_settings(
                index=ElasticPerson._doc_type.index,
                body={
                    'index.max_result_window': 100000
                }
            )

        self.bulk_write(conn, docs_to_index)

        if options["drop_indices"]:
            # invalidate old values and immediatelly cache again
            ElasticPerson.get_all_persons.invalidate(ElasticPerson)
            ElasticPerson.get_all_persons()

        self.stdout.write(
            'Loaded {} persons to persistence storage'.format(
                len(docs_to_index)))

        company_qs = Company.objects.filter(publish=True)
        docs_to_index = [
            ElasticCompany(**p.to_dict())
            for p in tqdm(company_qs.nocache().iterator(), total=company_qs.count())]

        if options["drop_indices"]:
            Index(ElasticCompany._doc_type.index).delete(ignore=404)
            ElasticCompany.init()
            conn.indices.put_settings(
                index=ElasticCompany._doc_type.index,
                body={
                    'index.max_result_window': 100000
                }
            )

        self.bulk_write(conn, docs_to_index)

        if options["drop_indices"]:
            # invalidate old values and immediatelly cache again
            ElasticCompany.get_all_companies.invalidate(ElasticCompany)
            ElasticCompany.get_all_companies()

        self.stdout.write(
            'Loaded {} companies to persistence storage'.format(
                len(docs_to_index)))
Beispiel #2
0
def _search_related(request):
    query = request.GET.get("q", "")
    _fields = ["related_persons.person_uk", "related_persons.person_en"]
    _fields_pep = ["full_name", "names"]

    if query:
        all_related = Q("multi_match",
                        query=query,
                        operator="and",
                        fields=_fields)

        non_peps = Q(
            "multi_match", query=query, operator="and",
            fields=_fields_pep) & Q("match", is_pep=False)

        related_persons = ElasticPerson.search().query(all_related | non_peps)

        if related_persons.count() == 0:
            # PLAN B, PLAN B
            all_related = Q(
                "multi_match",
                query=query,
                operator="or",
                minimum_should_match="2",
                fields=_fields,
            )

            non_peps = Q(
                "multi_match",
                query=query,
                operator="or",
                minimum_should_match="2",
                fields=_fields_pep,
            ) & Q("match", is_pep=False)

            related_persons = ElasticPerson.search().query(all_related
                                                           | non_peps)

    else:
        related_persons = (ElasticPerson.search().query("match_all").filter(
            "term", is_pep=False))

    return paginated_search(
        request,
        # We are using highlight here to find which exact related person
        # caused the match to show it in the person's card on the top of the
        # list. Check Person.relevant_related_persons method for details
        related_persons.highlight("related_persons.person_uk",
                                  order="score",
                                  pre_tags=[""],
                                  post_tags=[""]).highlight(
                                      "related_persons.person_en",
                                      order="score",
                                      pre_tags=[""],
                                      post_tags=[""]),
    )
Beispiel #3
0
def _search_person(request, country_obj=None):
    query = request.GET.get("q", "")

    _fields = [
        "full_name^3",
        "names^2",
        "full_name_en^3",
        "also_known_as_uk^2",
        "also_known_as_en^2",
        "related_persons.person_uk",
        "related_persons.person_en",
        "inn",
        "passport",
        "related_countries.to_country_uk",
        "related_countries.to_country_en",
    ]

    if query:
        persons = ElasticPerson.search().query(
            Q(
                "bool",
                should=[Q("match", is_pep=True)],
                must=[
                    Q("multi_match",
                      query=query,
                      operator="and",
                      fields=_fields)
                ],
            ))
    else:
        persons = ElasticPerson.search().query("match_all")

    if country_obj is not None:
        persons = persons.query("match",
                                related_countries__to_country_uk={
                                    "query": country_obj.name_uk,
                                    "operator": "and"
                                })

    return paginated_search(
        request,
        persons.highlight("related_persons.person_uk",
                          order="score",
                          pre_tags=[""],
                          post_tags=[""
                                     ]).highlight("related_persons.person_en",
                                                  order="score",
                                                  pre_tags=[""],
                                                  post_tags=[""]),
        settings.CATALOG_PER_PAGE,
    )
Beispiel #4
0
    def search_for_person(self, name):
        base_q = {
            "query":
            name,
            "operator":
            "and",
            "fuzziness":
            0,
            "fields": [
                "full_name", "names", "full_name_en", "also_known_as_uk",
                "also_known_as_en"
            ]
        }

        fuzziness = 0
        while fuzziness < 3:
            base_q["fuzziness"] = fuzziness

            s = ElasticPerson.search().query({"multi_match": base_q})

            if s.count():
                return s.execute(), fuzziness

            fuzziness += 1

        return [], 0
Beispiel #5
0
def export_persons(request, fmt):
    if not request.user.has_perm("core.export_persons"):
        return HttpResponseForbidden()

    if request.user.has_perm("core.export_id_and_last_modified"):
        fields_to_blacklist = []
    else:
        fields_to_blacklist = ["id", "last_change"]

    data = map(
        lambda p: blacklist(
            add_encrypted_url(p, request.user, "encrypted_person_redirect"),
            fields_to_blacklist),
        ElasticPerson.get_all_persons(),
    )

    ActionLog(user=request.user, action="download_dataset", details=fmt).save()

    if fmt == "json":
        response = JsonResponse(data, safe=False)

    if fmt == "xml":
        response = render(request,
                          "xml.jinja", {"data": data},
                          content_type="application/xhtml+xml")

    response[
        "Content-Disposition"] = "attachment; filename=peps_{:%Y%m%d_%H%M}.{}".format(
            datetime.now(), fmt)

    response["Content-Length"] = len(response.content)

    return response
Beispiel #6
0
def _suggest_person(request):
    query = request.GET.get("q", "")
    if query:
        _fields = [
            "full_name^3", "names^2", "full_name_en^3", "also_known_as_uk^2",
            "also_known_as_en^2"
        ]

        persons = ElasticPerson.search().query(
            Q(
                "bool",
                should=[Q("match", is_pep=True)],
                must=[
                    Q("multi_match",
                      query=query,
                      operator="and",
                      fields=_fields,
                      fuzziness="auto")
                ],
            ))[:1]

        res = persons.execute()

        if res:
            return res[0]
Beispiel #7
0
def countries(request, sources=("persons", "companies"), country_id=None):
    country = None
    if country_id is not None:
        country = get_object_or_404(Country, iso2=country_id)

    used_countries = (Country.objects.annotate(
        persons_count=Count("person2country", distinct=True),
        companies_count=Count("company2country", distinct=True),
    ).annotate(usages=F("persons_count") + F("companies_count")).exclude(
        usages=0).exclude(iso2="").order_by("-usages"))

    params = {"used_countries": used_countries, "country": country}

    if "persons" in sources:
        if country_id is None:
            persons = ElasticPerson.search().query("match_all")
        else:
            persons = ElasticPerson.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    if "companies" in sources:
        if country_id is None:
            companies = ElasticCompany.search().query("match_all")
        else:
            companies = ElasticCompany.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    try:
        params["persons"] = paginated_search(request, persons)
        params["companies"] = paginated_search(request, companies)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "countries.jinja", params)
Beispiel #8
0
def search(request, sources=("persons", "companies")):
    query = request.GET.get("q", "")
    is_exact = request.GET.get("is_exact", "") == "on"

    params = {"query": query, "sources": sources, "today": datetime.now()}

    if is_exact:
        persons = ElasticPerson.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=[
                "full_name",
                "names",
                "full_name_en",
                "also_known_as_uk",
                "also_known_as_en",
            ],
        )

        # Special case when we were looking for one exact person and found it.
        if persons.count() == 1:
            person = persons.execute()[0]

            return redirect(
                reverse("person_details", kwargs={"person_id": person.id}))

        companies = ElasticCompany.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=["short_name_en", "short_name_uk", "name_en", "name_uk"],
        )

        # Special case when we were looking for one exact company and found it.
        if companies.count() == 1:
            company = companies.execute()[0]

            return redirect(
                reverse("company_details", kwargs={"company_id": company.id}))

    try:
        if "persons" in sources:
            params["persons"] = _search_person(request)

            if not params["persons"]:
                params["suggested_person"] = _suggest_person(request)

        if "companies" in sources:
            params["companies"] = _search_company(request)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "search.jinja", params)
Beispiel #9
0
def countries(request, sources=("persons", "companies"), country_id=None):
    country = None
    if country_id is not None:
        country = get_object_or_404(Country, iso2=country_id)

    params = {
        "country": country,
        "today": now(),
        "query": "",
        "include_related_persons": False,
    }

    if "persons" in sources:
        if country_id is None:
            persons = ElasticPerson.search().query("match_all")
        else:
            persons = ElasticPerson.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    if "companies" in sources:
        if country_id is None:
            companies = ElasticCompany.search().query("match_all")
        else:
            companies = ElasticCompany.search().query(
                "match",
                related_countries__to_country_uk={
                    "query": country.name_uk,
                    "operator": "and"
                })

    try:
        params["persons"] = paginated_search(request, persons)
        params["companies"] = paginated_search(request, companies)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "countries.jinja", params)
Beispiel #10
0
    def assume(q, fuzziness):
        results = []

        search = (ElasticPerson.search().source(["full_name_suggest", field
                                                 ]).params(size=0).suggest(
                                                     "name",
                                                     q,
                                                     completion={
                                                         "field":
                                                         "full_name_suggest",
                                                         "size": 10,
                                                         "fuzzy": {
                                                             "fuzziness":
                                                             fuzziness,
                                                             "unicode_aware":
                                                             True
                                                         },
                                                     },
                                                 ))

        res = search.execute()
        if res.success:
            results += res.suggest["name"][0]["options"]

        search = (ElasticCompany.search().source(
            ["name_suggest", company_field]).params(size=0).suggest(
                "name",
                q,
                completion={
                    "field": "name_suggest",
                    "size": 5,
                    "fuzzy": {
                        "fuzziness": fuzziness,
                        "unicode_aware": True
                    },
                },
            ))

        # TODO: Investigate, completion doesn't work with numbers

        res = search.execute()
        if res.success and hasattr(res, "suggest"):
            results += res.suggest["name"][0]["options"]

        results = sorted(results, key=itemgetter("_score"), reverse=True)

        if results:
            return unique(
                getattr(val._source, company_field, "")
                or getattr(val._source, field, "") for val in results)
        else:
            return []
Beispiel #11
0
    def handle(self, *args, **options):
        activate(settings.LANGUAGE_CODE)
        conn = connections.get_connection("default")

        person_qs = Person.objects.filter(publish=True)
        docs_to_index = [
            ElasticPerson(**p.to_dict())
            for p in tqdm(person_qs.nocache().iterator(), total=person_qs.count())
        ]

        persons_total = len(docs_to_index)

        if options["drop_indices"]:
            person_idx.delete(ignore=404)
            person_idx.create()

            ElasticPerson.init()

            conn.indices.put_settings(
                index=ElasticPerson._doc_type.index,
                body={"index.max_result_window": settings.ES_MAX_RESULT_WINDOW},
            )

        self.bulk_write(conn, docs_to_index)

        self.stdout.write(
            "Loaded {} persons to persistence storage".format(len(docs_to_index))
        )

        company_qs = Company.objects.filter(publish=True)
        docs_to_index = [
            ElasticCompany(**p.to_dict())
            for p in tqdm(company_qs.nocache().iterator(), total=company_qs.count())
        ]

        companies_total = len(docs_to_index)

        if options["drop_indices"]:
            company_idx.delete(ignore=404)
            company_idx.create()

            ElasticCompany.init()
            conn.indices.put_settings(
                index=ElasticCompany._doc_type.index,
                body={"index.max_result_window": settings.ES_MAX_RESULT_WINDOW},
            )

        self.bulk_write(conn, docs_to_index)

        self.stdout.write(
            "Loaded {} companies to persistence storage".format(len(docs_to_index))
        )

        if options["drop_indices"]:
            sleep(60)
            # invalidate old values and immediatelly cache again
            ElasticPerson.get_all_persons.invalidate(ElasticPerson)
            indexed_persons_total = len(ElasticPerson.get_all_persons())

            # invalidate old values and immediatelly cache again
            ElasticCompany.get_all_companies.invalidate(ElasticCompany)
            indexed_companies_total = len(ElasticCompany.get_all_companies())

            if persons_total != indexed_persons_total:
                self.stderr.write(
                    "Mismatch between persons in DB ({}) and indexed persons ({})".format(
                        persons_total, indexed_persons_total
                    )
                )

            if companies_total != indexed_companies_total:
                self.stderr.write(
                    "Mismatch between companies in DB ({}) and indexed companies ({})".format(
                        companies_total, indexed_companies_total
                    )
                )
Beispiel #12
0
def search(request, sources=("persons", "companies")):
    query = request.GET.get("q", "")
    country = request.GET.get("country", "")

    if country:
        country_obj = Country.objects.filter(iso2=country).first()
    else:
        country_obj = None

    is_exact = request.GET.get("is_exact", "") == "on"

    params = {
        "query": query,
        "sources": sources,
        "today": now(),
        "country_obj": country_obj,
        "include_related_persons": True,
    }

    if is_exact:
        persons = ElasticPerson.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=[
                "full_name",
                "names",
                "full_name_en",
                "also_known_as_uk",
                "also_known_as_en",
            ],
        )

        if country_obj is not None:
            persons = persons.query(
                "match",
                related_countries__to_country_uk={
                    "query": country_obj.name_uk,
                    "operator": "and",
                },
            )

        # Special case when we were looking for one exact person and found it.
        if persons.count() == 1:
            person = persons.execute()[0]

            return redirect(
                reverse("person_details", kwargs={"person_id": person.id}))

        companies = ElasticCompany.search().query(
            "multi_match",
            query=query,
            operator="and",
            fields=["short_name_en", "short_name_uk", "name_en", "name_uk"],
        )

        if country_obj is not None:
            companies = companies.query(
                "match",
                related_countries__to_country_uk={
                    "query": country_obj.name_uk,
                    "operator": "and",
                },
            )

        # Special case when we were looking for one exact company and found it.
        if companies.count() == 1:
            company = companies.execute()[0]

            return redirect(
                reverse("company_details", kwargs={"company_id": company.id}))

    try:
        if "persons" in sources:
            params["persons"] = _search_person(request, country_obj)

            if not params["persons"]:
                params["suggested_person"] = _suggest_person(request)

        if "companies" in sources:
            params["companies"] = _search_company(request, country_obj)
    except EmptyPage:
        raise Http404("Page is empty")
    except PageNotAnInteger:
        raise Http404("No page")

    return render(request, "search.jinja", params)