Exemple #1
0
    def test_detect_multimatch_for_query_and_single_match_for_content(self):
        CONTENT_TO_MATCH = "oui"
        FAILING_MATCH = "non"
        TARGET_MATCH = '<div class="cible" id="bob">{content}</div>'.format(
            content=CONTENT_TO_MATCH)
        TARGET_NO_MATCH = '<div class="cible" id="bob">{content}</div>'.format(
            content=FAILING_MATCH)
        PAGE = """<!DOCTYPE html>
            <html lang="fr">
                <head>
                    <meta charset="utf-8">
                    <title>test page</title>
                </head>
                <body>
                    {target_1}
                    {target_2}
                </body>
            </html>""".format(target_1=TARGET_MATCH, target_2=TARGET_NO_MATCH)
        QUERY = ".cible#bob"

        res = mincer.utils.extract_content_from_html(QUERY, CONTENT_TO_MATCH,
                                                     PAGE)

        assert is_div(res)
        assert CONTENT_TO_MATCH in res
Exemple #2
0
    def test_return_a_no_result_partial_if_no_result_are_found(
            self, client, tmp_db, fake_serv, fake_prov):
        QUERY = "search without result"
        URL = self._build_url_from_query(QUERY)
        response = client.get(URL)

        # We have an answer...
        assert response.status_code == OK

        # Any web page can use this content
        assert response.headers["Access-Control-Allow-Origin"] == "*"

        # ...it's an HTML document...
        assert response.mimetype == "text/html"

        # Let's convert it for easy inspection
        data = response.get_data(as_text=True)

        # ...containing a <div> with correct class and id
        assert is_div(data,
                      cls_name=HtmlClasses.NO_RESULT,
                      id_name=fake_prov.slug)

        # And we have the provider info in it
        assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER)
        prov_data = all_div_content(data, query=HtmlClasses.provider_query())
        assert is_substring_in(fake_prov.name, prov_data)
        REMOTE_URL = fake_prov.remote_url.format(param=quote_plus(QUERY))
        assert is_substring_in(REMOTE_URL, prov_data)
Exemple #3
0
    def test_return_a_no_result_partial_if_no_result_are_found(
            self, client, tmp_db, koha_search_prov):
        # This search returns absolutly no result
        SEARCH_QUERY = 'zxkml'

        URL = self._build_url(SEARCH_QUERY)
        response = client.get(URL)

        # We have an answer...
        assert response.status_code == OK

        # Any web page can use this content
        assert response.headers["Access-Control-Allow-Origin"] == "*"

        # ...it's an HTML document...
        assert response.mimetype == "text/html"

        # Let's convert it for easy inspection
        data = response.get_data(as_text=True)

        # ...containing only a <div>
        assert is_div(data, cls_name=HtmlClasses.NO_RESULT)

        # And we have the provider info in it
        assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER)
        prov_data = all_div_content(data, query=HtmlClasses.provider_query())
        assert is_substring_in(koha_search_prov.name, prov_data)
        REMOTE_URL = koha_search_prov.remote_url.format(
            param=quote_plus(SEARCH_QUERY))
        assert is_substring_in(dominescape(REMOTE_URL), prov_data)
Exemple #4
0
    def test_search_works_with_unicode_query(self, client, tmp_db,
                                             koha_search_prov):
        # This search returns only a few results (in japanese)
        SEARCH_QUERY = '龍 車 日'  # dragon car day

        url = self._build_url(SEARCH_QUERY)
        response = client.get(url)

        # We have an answer...
        assert response.status_code == OK

        # Any web page can use this content
        assert response.headers["Access-Control-Allow-Origin"] == "*"

        # ...it's an HTML document...
        assert response.mimetype == "text/html"

        # Let's convert it for easy inspection
        data = response.get_data(as_text=True)

        # ...containing only a <div>
        assert is_div(data, cls_name=HtmlClasses.RESULT)

        # And we have the correct books in it
        results = all_div_content(data, query=HtmlClasses.result_item_query())
        assert is_substring_in("新疆史志", results)
        assert is_substring_in("永井龍男集", results)
Exemple #5
0
    def test_return_result_partial_if_result_are_found(self, client, tmp_db,
                                                       koha_booklist_prov):
        # We are using the ID of of an existing list
        LIST_ID = "9896"

        URL = self._build_url(LIST_ID)
        response = client.get(URL)

        # We have an answer...
        assert response.status_code == OK

        # Any web page can use this content
        assert response.headers["Access-Control-Allow-Origin"] == "*"

        # ...it's an HTML document
        assert response.mimetype == "text/html"

        # Let's convert it for easy inspection
        data = response.get_data(as_text=True)

        # ...containing only a <div>
        assert is_div(data, cls_name=HtmlClasses.RESULT)

        # And we have the provider info in it
        assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER)
        prov_data = all_div_content(data, query=HtmlClasses.provider_query())
        assert is_substring_in(koha_booklist_prov.name, prov_data)
        REMOTE_URL = koha_booklist_prov.remote_url.format(
            param=quote_plus(LIST_ID))
        assert is_substring_in(dominescape(REMOTE_URL), prov_data)

        # And we have the correct books in it
        results = all_div_content(data, query=HtmlClasses.result_item_query())
        assert is_substring_in("Africa in Russia, Russia in Africa", results)
        assert is_substring_in("Cahiers d'études africaines", results)
        assert is_substring_in("Étudier à l'Est", results)
        assert is_substring_in("Forced labour in colonial Africa", results)
        assert is_substring_in("Le gel", results)
        assert is_substring_in(
            "Revue européenne des migrations internationales", results)
        assert is_substring_in("The Cold War in the Third World", results)
Exemple #6
0
    def test_detect_matching_content(self):
        CONTENT_TO_MATCH = "ca va matcher"
        TARGET_NODE = '<div class="cible" id="bob">{content}</div>'.format(
            content=CONTENT_TO_MATCH)
        PAGE = """<!DOCTYPE html>
            <html lang="fr">
                <head>
                    <meta charset="utf-8">
                    <title>test page</title>
                </head>
                <body>
                    {node}
                </body>
            </html>""".format(node=TARGET_NODE)
        QUERY = ".cible#bob"

        res = mincer.utils.extract_content_from_html(QUERY, CONTENT_TO_MATCH,
                                                     PAGE)

        assert is_div(res)
        assert CONTENT_TO_MATCH in res
Exemple #7
0
    def test_search_works(self, client, tmp_db, koha_search_prov):
        # This search returns only a few results
        SEARCH_QUERY = 'afrique voiture'

        URL = self._build_url(SEARCH_QUERY)
        response = client.get(URL)

        # We have an answer...
        assert response.status_code == OK

        # Any web page can use this content
        assert response.headers["Access-Control-Allow-Origin"] == "*"

        # ...it's an HTML document...
        assert response.mimetype == "text/html"

        # Let's convert it for easy inspection
        data = response.get_data(as_text=True)

        # TODO add this new assert to the other query related tests
        # ...containing a <div> with correct class and id
        assert is_div(data, cls_name=HtmlClasses.RESULT, id_name="koha-search")

        # And we have the provider info in it
        assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER)
        prov_data = all_div_content(data, query=HtmlClasses.provider_query())
        assert is_substring_in(koha_search_prov.name, prov_data)
        REMOTE_URL = koha_search_prov.remote_url.format(
            param=quote_plus(SEARCH_QUERY))
        assert is_substring_in(dominescape(REMOTE_URL), prov_data)

        # And we have the correct books in it
        results = all_div_content(data, query=HtmlClasses.result_item_query())
        assert is_substring_in(
            "Différenciation régionale et régionalisation en Afrique francophone et à Madagascar",
            results)
        assert is_substring_in("Qui se nourrit de la famine en Afrique ?",
                               results)
        assert is_substring_in("Les jachères en Afrique tropicale", results)
Exemple #8
0
    def test_search_works_with_unicode_query(self, client, tmp_db, fake_serv,
                                             fake_prov):
        # A query with some japanese
        QUERY = "search with unicode 龍 車 日"  # dragon car day
        URL = self._build_url_from_query(QUERY)
        response = client.get(URL)

        # We have an answer...
        assert response.status_code == OK

        # Any web page can use this content
        assert response.headers["Access-Control-Allow-Origin"] == "*"

        # ...it's an HTML document...
        assert response.mimetype == "text/html"

        # Let's convert it for easy inspection
        data = response.get_data(as_text=True)

        # ...containing a <div> with correct class and id
        assert is_div(data,
                      cls_name=HtmlClasses.RESULT,
                      id_name=fake_prov.slug)

        # And we have the provider info in it
        assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER)
        prov_data = all_div_content(data, query=HtmlClasses.provider_query())
        assert is_substring_in(fake_prov.name, prov_data)
        REMOTE_URL = fake_prov.remote_url.format(param=quote_plus(QUERY))
        assert is_substring_in(REMOTE_URL, prov_data)

        # And we have the correct books in it
        results = all_div_content(data,
                                  query=".{surrounding} .{item}".format(
                                      surrounding=HtmlClasses.RESULT,
                                      item=HtmlClasses.RESULT_ITEM))
        assert is_substring_in("Result with japanese 新疆史志", results)
        assert is_substring_in("Result with japanese 永井龍男集", results)