def load_entries_from_database(entries): search = PaperDocument.search() dois_for_entry = list() for entry in entries: if settings.USING_ELASTICSEARCH: query = None # Try to find record using its doi or full title if entry.doi: query = QEs({"match": {"_id": entry.doi}}) elif entry.title: query = QEs({"match_phrase": {"title": entry.title}}) search = search.source(excludes=['*']) results = search.query(query).execute() if results: dois_for_entry += [(result.meta.id, entry) for result in results] entry.result_found = True elif entry.title: # If not found try to find the entry by its title using match. results = PaperDocument.search().query( QEs({ 'match': { 'title': { 'query': entry.title, 'minimum_should_match': "90%" } } })).execute() if results: dois_for_entry.append((results[0].meta.id, entry)) entry.result_found = True entry.inexact_result_found = True else: print(entry.title, entry.doi) papers = Paper.objects.filter( Q(pk=entry.doi) | Q(title=entry.title)) print(papers.count()) if papers.count() > 0: dois_for_entry.append((papers.first().doi, entry)) entry.result_found = True whens = [] for doi, entry in dois_for_entry: if entry.inexact_result_found: whens.append(When(pk=doi, then=Value(entry.title))) return Paper.objects.filter( pk__in=[doi for doi, _ in dois_for_entry]).annotate(entry_title=Case( *whens, default=None, output_field=TextField()))
def _get_abstract_match(query: str, boost=0.8): return QEs('match', abstract={ 'query': query, 'fuzziness': 'AUTO', 'boost': boost })
def _get_title_match(query: str, boost=0.8): return QEs('match', title={ 'query': query, 'fuzziness': 'AUTO', 'boost': boost })
def _build_search_request(must_match, should_match): search = PaperDocument.search() search = search.query( QEs('bool', must=must_match, should=should_match, minimum_should_match=1)) return search.source(excludes=['*'])
def authors(authors: List, query: str, excluded_author_ids: List, max_author_count: int = 8): search = AuthorDocument.search() search = search.query(QEs('bool', should=[QEs('match', full_name={ 'query': ElasticsearchQueryHelper.remove_common_words(query)})], must_not=[QEs('ids', values=excluded_author_ids)])) \ .highlight('full_name', number_of_fragments=0, fragment_size=0) search = search[0:max_author_count] results = search.execute() for result in results: if hasattr(result.meta, 'highlight'): if hasattr(result.meta.highlight, 'full_name'): authors.append({ 'pk': result.meta.id, 'full_name': result.meta.highlight.full_name[0] })
def _get_ids_match(ids: List[str]): return QEs('ids', values=ids)
def _get_author_match(query: str, boost=0.8): return QEs('match', authors__full_name={ 'query': query, 'boost': boost })
def _get_doi_match(query: str, boost=10): return QEs({'ids': {'values': query.split(), 'boost': boost}})
def _get_title_exact_match(query: str, boost=1): return QEs({'match_phrase': {'title': {'query': query}}})