def test_generate_minimal_name_variations_with_dotted_initial():
    name = 'Oz, Y.'
    expected_variations = {
        'oz y.',
        'oz y',
        'y. oz',
    }

    result = generate_minimal_name_variations(name)

    assert len(expected_variations) == len(result)

    assert expected_variations == set(generate_minimal_name_variations(name))
    def _generate_author_query(self, author_name):
        """Generates a query handling specifically authors.

        Notes:
            The match query is generic enough to return many results. Then, using the filter clause we truncate these
            so that we imitate legacy's behaviour on returning more "exact" results. E.g. Searching for `Smith, John`
            shouldn't return papers of 'Smith, Bob'.

            Additionally, doing a ``match`` with ``"operator": "and"`` in order to be even more exact in our search, by
            requiring that ``full_name`` field contains both
        """
        name_variations = [name_variation.lower()
                           for name_variation
                           in generate_minimal_name_variations(author_name)]

        # When the query contains sufficient data, i.e. full names, e.g. ``Mele, Salvatore`` (and not ``Mele, S`` or
        # ``Mele``) we can improve our filtering in order to filter out results containing records with authors that
        # have the same non lastnames prefix, e.g. 'Mele, Samuele'.
        if author_name_contains_fullnames(author_name):
            specialized_author_filter = [
                {
                    'bool': {
                        'must': [
                            {
                                'term': {ElasticSearchVisitor.AUTHORS_NAME_VARIATIONS_FIELD: names_variation[0]}
                            },
                            generate_match_query(
                                ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author'],
                                names_variation[1],
                                with_operator_and=True
                            )
                        ]
                    }
                } for names_variation
                in product(name_variations, name_variations)
            ]

        else:
            # In the case of initials or even single lastname search, filter with only the name variations.
            specialized_author_filter = [
                {'term': {ElasticSearchVisitor.AUTHORS_NAME_VARIATIONS_FIELD: name_variation}}
                for name_variation in name_variations
            ]

        query = {
            'bool': {
                'filter': {
                    'bool': {
                        'should': specialized_author_filter
                    }
                },
                'must': {
                    'match': {
                        ElasticSearchVisitor.KEYWORD_TO_ES_FIELDNAME['author']: author_name
                    }
                }
            }
        }

        return generate_nested_query(ElasticSearchVisitor.AUTHORS_NESTED_QUERY_PATH, query)
def test_generate_minimal_name_variations_with_initial_strips_multiple_consecutive_whitespace(
):
    name = 'oz,y'
    expected_variations = {
        'oz y',
        'y oz',
    }

    assert expected_variations == set(generate_minimal_name_variations(name))
def test_generate_minimal_name_variations_firstname_lastname():
    name = 'John Ellis'
    expected_variations = {
        'ellis john',
        'ellis j',
        'john ellis',
        'john e',
    }

    assert expected_variations == set(generate_minimal_name_variations(name))
def test_generate_minimal_name_variations_without_dotted_initial_doesnt_generate_same_variation(
):
    name = 'Oz, Y'
    expected_variations = {
        'oz y',
        'y oz',
    }

    result = generate_minimal_name_variations(name)

    assert len(expected_variations) == len(result)

    assert expected_variations == set(result)
def test_generate_minimal_name_variations_with_dashed_lastname():
    name = 'Caro-Estevez'
    expected_variations = ['caro estevez']

    assert expected_variations == generate_minimal_name_variations(name)
def test_generate_minimal_name_variations_with_lastname_lowercases():
    name = 'Mele'
    expected_variations = ['mele']

    assert expected_variations == generate_minimal_name_variations(name)