def load_entries_from_database(entries):
        search = PaperDocument.search()

        dois_for_entry = list()
        for entry in entries:

            if settings.USING_ELASTICSEARCH:
                query = None

                # Try to find record using its doi or full title

                if entry.doi:
                    query = QEs({"match": {"_id": entry.doi}})
                elif entry.title:
                    query = QEs({"match_phrase": {"title": entry.title}})

                search = search.source(excludes=['*'])
                results = search.query(query).execute()

                if results:
                    dois_for_entry += [(result.meta.id, entry)
                                       for result in results]
                    entry.result_found = True
                elif entry.title:

                    # If not found try to find the entry by its title using match.
                    results = PaperDocument.search().query(
                        QEs({
                            'match': {
                                'title': {
                                    'query': entry.title,
                                    'minimum_should_match': "90%"
                                }
                            }
                        })).execute()

                    if results:
                        dois_for_entry.append((results[0].meta.id, entry))
                        entry.result_found = True
                        entry.inexact_result_found = True
            else:
                print(entry.title, entry.doi)
                papers = Paper.objects.filter(
                    Q(pk=entry.doi) | Q(title=entry.title))
                print(papers.count())

                if papers.count() > 0:
                    dois_for_entry.append((papers.first().doi, entry))
                    entry.result_found = True

        whens = []
        for doi, entry in dois_for_entry:

            if entry.inexact_result_found:
                whens.append(When(pk=doi, then=Value(entry.title)))

        return Paper.objects.filter(
            pk__in=[doi
                    for doi, _ in dois_for_entry]).annotate(entry_title=Case(
                        *whens, default=None, output_field=TextField()))
Beispiel #2
0
 def _get_abstract_match(query: str, boost=0.8):
     return QEs('match',
                abstract={
                    'query': query,
                    'fuzziness': 'AUTO',
                    'boost': boost
                })
Beispiel #3
0
 def _get_title_match(query: str, boost=0.8):
     return QEs('match',
                title={
                    'query': query,
                    'fuzziness': 'AUTO',
                    'boost': boost
                })
Beispiel #4
0
    def _build_search_request(must_match, should_match):

        search = PaperDocument.search()
        search = search.query(
            QEs('bool',
                must=must_match,
                should=should_match,
                minimum_should_match=1))

        return search.source(excludes=['*'])
Beispiel #5
0
    def authors(authors: List,
                query: str,
                excluded_author_ids: List,
                max_author_count: int = 8):
        search = AuthorDocument.search()
        search = search.query(QEs('bool',
                                  should=[QEs('match', full_name={
                                      'query': ElasticsearchQueryHelper.remove_common_words(query)})],
                                  must_not=[QEs('ids', values=excluded_author_ids)])) \
            .highlight('full_name', number_of_fragments=0, fragment_size=0)

        search = search[0:max_author_count]
        results = search.execute()

        for result in results:
            if hasattr(result.meta, 'highlight'):
                if hasattr(result.meta.highlight, 'full_name'):
                    authors.append({
                        'pk':
                        result.meta.id,
                        'full_name':
                        result.meta.highlight.full_name[0]
                    })
Beispiel #6
0
 def _get_ids_match(ids: List[str]):
     return QEs('ids', values=ids)
Beispiel #7
0
 def _get_author_match(query: str, boost=0.8):
     return QEs('match',
                authors__full_name={
                    'query': query,
                    'boost': boost
                })
Beispiel #8
0
    def _get_doi_match(query: str, boost=10):

        return QEs({'ids': {'values': query.split(), 'boost': boost}})
Beispiel #9
0
 def _get_title_exact_match(query: str, boost=1):
     return QEs({'match_phrase': {'title': {'query': query}}})