def handle(self, *args, **options): activate(settings.LANGUAGE_CODE) conn = connections.get_connection('default') person_qs = Person.objects.filter(publish=True) docs_to_index = [ ElasticPerson(**p.to_dict()) for p in tqdm(person_qs.nocache().iterator(), total=person_qs.count()) ] if options["drop_indices"]: Index(ElasticPerson._doc_type.index).delete(ignore=404) ElasticPerson.init() conn.indices.put_settings( index=ElasticPerson._doc_type.index, body={ 'index.max_result_window': 100000 } ) self.bulk_write(conn, docs_to_index) if options["drop_indices"]: # invalidate old values and immediatelly cache again ElasticPerson.get_all_persons.invalidate(ElasticPerson) ElasticPerson.get_all_persons() self.stdout.write( 'Loaded {} persons to persistence storage'.format( len(docs_to_index))) company_qs = Company.objects.filter(publish=True) docs_to_index = [ ElasticCompany(**p.to_dict()) for p in tqdm(company_qs.nocache().iterator(), total=company_qs.count())] if options["drop_indices"]: Index(ElasticCompany._doc_type.index).delete(ignore=404) ElasticCompany.init() conn.indices.put_settings( index=ElasticCompany._doc_type.index, body={ 'index.max_result_window': 100000 } ) self.bulk_write(conn, docs_to_index) if options["drop_indices"]: # invalidate old values and immediatelly cache again ElasticCompany.get_all_companies.invalidate(ElasticCompany) ElasticCompany.get_all_companies() self.stdout.write( 'Loaded {} companies to persistence storage'.format( len(docs_to_index)))
def _search_related(request): query = request.GET.get("q", "") _fields = ["related_persons.person_uk", "related_persons.person_en"] _fields_pep = ["full_name", "names"] if query: all_related = Q("multi_match", query=query, operator="and", fields=_fields) non_peps = Q( "multi_match", query=query, operator="and", fields=_fields_pep) & Q("match", is_pep=False) related_persons = ElasticPerson.search().query(all_related | non_peps) if related_persons.count() == 0: # PLAN B, PLAN B all_related = Q( "multi_match", query=query, operator="or", minimum_should_match="2", fields=_fields, ) non_peps = Q( "multi_match", query=query, operator="or", minimum_should_match="2", fields=_fields_pep, ) & Q("match", is_pep=False) related_persons = ElasticPerson.search().query(all_related | non_peps) else: related_persons = (ElasticPerson.search().query("match_all").filter( "term", is_pep=False)) return paginated_search( request, # We are using highlight here to find which exact related person # caused the match to show it in the person's card on the top of the # list. Check Person.relevant_related_persons method for details related_persons.highlight("related_persons.person_uk", order="score", pre_tags=[""], post_tags=[""]).highlight( "related_persons.person_en", order="score", pre_tags=[""], post_tags=[""]), )
def _search_person(request, country_obj=None): query = request.GET.get("q", "") _fields = [ "full_name^3", "names^2", "full_name_en^3", "also_known_as_uk^2", "also_known_as_en^2", "related_persons.person_uk", "related_persons.person_en", "inn", "passport", "related_countries.to_country_uk", "related_countries.to_country_en", ] if query: persons = ElasticPerson.search().query( Q( "bool", should=[Q("match", is_pep=True)], must=[ Q("multi_match", query=query, operator="and", fields=_fields) ], )) else: persons = ElasticPerson.search().query("match_all") if country_obj is not None: persons = persons.query("match", related_countries__to_country_uk={ "query": country_obj.name_uk, "operator": "and" }) return paginated_search( request, persons.highlight("related_persons.person_uk", order="score", pre_tags=[""], post_tags=["" ]).highlight("related_persons.person_en", order="score", pre_tags=[""], post_tags=[""]), settings.CATALOG_PER_PAGE, )
def search_for_person(self, name): base_q = { "query": name, "operator": "and", "fuzziness": 0, "fields": [ "full_name", "names", "full_name_en", "also_known_as_uk", "also_known_as_en" ] } fuzziness = 0 while fuzziness < 3: base_q["fuzziness"] = fuzziness s = ElasticPerson.search().query({"multi_match": base_q}) if s.count(): return s.execute(), fuzziness fuzziness += 1 return [], 0
def export_persons(request, fmt): if not request.user.has_perm("core.export_persons"): return HttpResponseForbidden() if request.user.has_perm("core.export_id_and_last_modified"): fields_to_blacklist = [] else: fields_to_blacklist = ["id", "last_change"] data = map( lambda p: blacklist( add_encrypted_url(p, request.user, "encrypted_person_redirect"), fields_to_blacklist), ElasticPerson.get_all_persons(), ) ActionLog(user=request.user, action="download_dataset", details=fmt).save() if fmt == "json": response = JsonResponse(data, safe=False) if fmt == "xml": response = render(request, "xml.jinja", {"data": data}, content_type="application/xhtml+xml") response[ "Content-Disposition"] = "attachment; filename=peps_{:%Y%m%d_%H%M}.{}".format( datetime.now(), fmt) response["Content-Length"] = len(response.content) return response
def _suggest_person(request): query = request.GET.get("q", "") if query: _fields = [ "full_name^3", "names^2", "full_name_en^3", "also_known_as_uk^2", "also_known_as_en^2" ] persons = ElasticPerson.search().query( Q( "bool", should=[Q("match", is_pep=True)], must=[ Q("multi_match", query=query, operator="and", fields=_fields, fuzziness="auto") ], ))[:1] res = persons.execute() if res: return res[0]
def countries(request, sources=("persons", "companies"), country_id=None): country = None if country_id is not None: country = get_object_or_404(Country, iso2=country_id) used_countries = (Country.objects.annotate( persons_count=Count("person2country", distinct=True), companies_count=Count("company2country", distinct=True), ).annotate(usages=F("persons_count") + F("companies_count")).exclude( usages=0).exclude(iso2="").order_by("-usages")) params = {"used_countries": used_countries, "country": country} if "persons" in sources: if country_id is None: persons = ElasticPerson.search().query("match_all") else: persons = ElasticPerson.search().query( "match", related_countries__to_country_uk={ "query": country.name_uk, "operator": "and" }) if "companies" in sources: if country_id is None: companies = ElasticCompany.search().query("match_all") else: companies = ElasticCompany.search().query( "match", related_countries__to_country_uk={ "query": country.name_uk, "operator": "and" }) try: params["persons"] = paginated_search(request, persons) params["companies"] = paginated_search(request, companies) except EmptyPage: raise Http404("Page is empty") except PageNotAnInteger: raise Http404("No page") return render(request, "countries.jinja", params)
def search(request, sources=("persons", "companies")): query = request.GET.get("q", "") is_exact = request.GET.get("is_exact", "") == "on" params = {"query": query, "sources": sources, "today": datetime.now()} if is_exact: persons = ElasticPerson.search().query( "multi_match", query=query, operator="and", fields=[ "full_name", "names", "full_name_en", "also_known_as_uk", "also_known_as_en", ], ) # Special case when we were looking for one exact person and found it. if persons.count() == 1: person = persons.execute()[0] return redirect( reverse("person_details", kwargs={"person_id": person.id})) companies = ElasticCompany.search().query( "multi_match", query=query, operator="and", fields=["short_name_en", "short_name_uk", "name_en", "name_uk"], ) # Special case when we were looking for one exact company and found it. if companies.count() == 1: company = companies.execute()[0] return redirect( reverse("company_details", kwargs={"company_id": company.id})) try: if "persons" in sources: params["persons"] = _search_person(request) if not params["persons"]: params["suggested_person"] = _suggest_person(request) if "companies" in sources: params["companies"] = _search_company(request) except EmptyPage: raise Http404("Page is empty") except PageNotAnInteger: raise Http404("No page") return render(request, "search.jinja", params)
def countries(request, sources=("persons", "companies"), country_id=None): country = None if country_id is not None: country = get_object_or_404(Country, iso2=country_id) params = { "country": country, "today": now(), "query": "", "include_related_persons": False, } if "persons" in sources: if country_id is None: persons = ElasticPerson.search().query("match_all") else: persons = ElasticPerson.search().query( "match", related_countries__to_country_uk={ "query": country.name_uk, "operator": "and" }) if "companies" in sources: if country_id is None: companies = ElasticCompany.search().query("match_all") else: companies = ElasticCompany.search().query( "match", related_countries__to_country_uk={ "query": country.name_uk, "operator": "and" }) try: params["persons"] = paginated_search(request, persons) params["companies"] = paginated_search(request, companies) except EmptyPage: raise Http404("Page is empty") except PageNotAnInteger: raise Http404("No page") return render(request, "countries.jinja", params)
def assume(q, fuzziness): results = [] search = (ElasticPerson.search().source(["full_name_suggest", field ]).params(size=0).suggest( "name", q, completion={ "field": "full_name_suggest", "size": 10, "fuzzy": { "fuzziness": fuzziness, "unicode_aware": True }, }, )) res = search.execute() if res.success: results += res.suggest["name"][0]["options"] search = (ElasticCompany.search().source( ["name_suggest", company_field]).params(size=0).suggest( "name", q, completion={ "field": "name_suggest", "size": 5, "fuzzy": { "fuzziness": fuzziness, "unicode_aware": True }, }, )) # TODO: Investigate, completion doesn't work with numbers res = search.execute() if res.success and hasattr(res, "suggest"): results += res.suggest["name"][0]["options"] results = sorted(results, key=itemgetter("_score"), reverse=True) if results: return unique( getattr(val._source, company_field, "") or getattr(val._source, field, "") for val in results) else: return []
def handle(self, *args, **options): activate(settings.LANGUAGE_CODE) conn = connections.get_connection("default") person_qs = Person.objects.filter(publish=True) docs_to_index = [ ElasticPerson(**p.to_dict()) for p in tqdm(person_qs.nocache().iterator(), total=person_qs.count()) ] persons_total = len(docs_to_index) if options["drop_indices"]: person_idx.delete(ignore=404) person_idx.create() ElasticPerson.init() conn.indices.put_settings( index=ElasticPerson._doc_type.index, body={"index.max_result_window": settings.ES_MAX_RESULT_WINDOW}, ) self.bulk_write(conn, docs_to_index) self.stdout.write( "Loaded {} persons to persistence storage".format(len(docs_to_index)) ) company_qs = Company.objects.filter(publish=True) docs_to_index = [ ElasticCompany(**p.to_dict()) for p in tqdm(company_qs.nocache().iterator(), total=company_qs.count()) ] companies_total = len(docs_to_index) if options["drop_indices"]: company_idx.delete(ignore=404) company_idx.create() ElasticCompany.init() conn.indices.put_settings( index=ElasticCompany._doc_type.index, body={"index.max_result_window": settings.ES_MAX_RESULT_WINDOW}, ) self.bulk_write(conn, docs_to_index) self.stdout.write( "Loaded {} companies to persistence storage".format(len(docs_to_index)) ) if options["drop_indices"]: sleep(60) # invalidate old values and immediatelly cache again ElasticPerson.get_all_persons.invalidate(ElasticPerson) indexed_persons_total = len(ElasticPerson.get_all_persons()) # invalidate old values and immediatelly cache again ElasticCompany.get_all_companies.invalidate(ElasticCompany) indexed_companies_total = len(ElasticCompany.get_all_companies()) if persons_total != indexed_persons_total: self.stderr.write( "Mismatch between persons in DB ({}) and indexed persons ({})".format( persons_total, indexed_persons_total ) ) if companies_total != indexed_companies_total: self.stderr.write( "Mismatch between companies in DB ({}) and indexed companies ({})".format( companies_total, indexed_companies_total ) )
def search(request, sources=("persons", "companies")): query = request.GET.get("q", "") country = request.GET.get("country", "") if country: country_obj = Country.objects.filter(iso2=country).first() else: country_obj = None is_exact = request.GET.get("is_exact", "") == "on" params = { "query": query, "sources": sources, "today": now(), "country_obj": country_obj, "include_related_persons": True, } if is_exact: persons = ElasticPerson.search().query( "multi_match", query=query, operator="and", fields=[ "full_name", "names", "full_name_en", "also_known_as_uk", "also_known_as_en", ], ) if country_obj is not None: persons = persons.query( "match", related_countries__to_country_uk={ "query": country_obj.name_uk, "operator": "and", }, ) # Special case when we were looking for one exact person and found it. if persons.count() == 1: person = persons.execute()[0] return redirect( reverse("person_details", kwargs={"person_id": person.id})) companies = ElasticCompany.search().query( "multi_match", query=query, operator="and", fields=["short_name_en", "short_name_uk", "name_en", "name_uk"], ) if country_obj is not None: companies = companies.query( "match", related_countries__to_country_uk={ "query": country_obj.name_uk, "operator": "and", }, ) # Special case when we were looking for one exact company and found it. if companies.count() == 1: company = companies.execute()[0] return redirect( reverse("company_details", kwargs={"company_id": company.id})) try: if "persons" in sources: params["persons"] = _search_person(request, country_obj) if not params["persons"]: params["suggested_person"] = _suggest_person(request) if "companies" in sources: params["companies"] = _search_company(request, country_obj) except EmptyPage: raise Http404("Page is empty") except PageNotAnInteger: raise Http404("No page") return render(request, "search.jinja", params)