def test_detect_multimatch_for_query_and_single_match_for_content(self): CONTENT_TO_MATCH = "oui" FAILING_MATCH = "non" TARGET_MATCH = '<div class="cible" id="bob">{content}</div>'.format( content=CONTENT_TO_MATCH) TARGET_NO_MATCH = '<div class="cible" id="bob">{content}</div>'.format( content=FAILING_MATCH) PAGE = """<!DOCTYPE html> <html lang="fr"> <head> <meta charset="utf-8"> <title>test page</title> </head> <body> {target_1} {target_2} </body> </html>""".format(target_1=TARGET_MATCH, target_2=TARGET_NO_MATCH) QUERY = ".cible#bob" res = mincer.utils.extract_content_from_html(QUERY, CONTENT_TO_MATCH, PAGE) assert is_div(res) assert CONTENT_TO_MATCH in res
def test_return_a_no_result_partial_if_no_result_are_found( self, client, tmp_db, fake_serv, fake_prov): QUERY = "search without result" URL = self._build_url_from_query(QUERY) response = client.get(URL) # We have an answer... assert response.status_code == OK # Any web page can use this content assert response.headers["Access-Control-Allow-Origin"] == "*" # ...it's an HTML document... assert response.mimetype == "text/html" # Let's convert it for easy inspection data = response.get_data(as_text=True) # ...containing a <div> with correct class and id assert is_div(data, cls_name=HtmlClasses.NO_RESULT, id_name=fake_prov.slug) # And we have the provider info in it assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER) prov_data = all_div_content(data, query=HtmlClasses.provider_query()) assert is_substring_in(fake_prov.name, prov_data) REMOTE_URL = fake_prov.remote_url.format(param=quote_plus(QUERY)) assert is_substring_in(REMOTE_URL, prov_data)
def test_return_a_no_result_partial_if_no_result_are_found( self, client, tmp_db, koha_search_prov): # This search returns absolutly no result SEARCH_QUERY = 'zxkml' URL = self._build_url(SEARCH_QUERY) response = client.get(URL) # We have an answer... assert response.status_code == OK # Any web page can use this content assert response.headers["Access-Control-Allow-Origin"] == "*" # ...it's an HTML document... assert response.mimetype == "text/html" # Let's convert it for easy inspection data = response.get_data(as_text=True) # ...containing only a <div> assert is_div(data, cls_name=HtmlClasses.NO_RESULT) # And we have the provider info in it assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER) prov_data = all_div_content(data, query=HtmlClasses.provider_query()) assert is_substring_in(koha_search_prov.name, prov_data) REMOTE_URL = koha_search_prov.remote_url.format( param=quote_plus(SEARCH_QUERY)) assert is_substring_in(dominescape(REMOTE_URL), prov_data)
def test_search_works_with_unicode_query(self, client, tmp_db, koha_search_prov): # This search returns only a few results (in japanese) SEARCH_QUERY = '龍 車 日' # dragon car day url = self._build_url(SEARCH_QUERY) response = client.get(url) # We have an answer... assert response.status_code == OK # Any web page can use this content assert response.headers["Access-Control-Allow-Origin"] == "*" # ...it's an HTML document... assert response.mimetype == "text/html" # Let's convert it for easy inspection data = response.get_data(as_text=True) # ...containing only a <div> assert is_div(data, cls_name=HtmlClasses.RESULT) # And we have the correct books in it results = all_div_content(data, query=HtmlClasses.result_item_query()) assert is_substring_in("新疆史志", results) assert is_substring_in("永井龍男集", results)
def test_return_result_partial_if_result_are_found(self, client, tmp_db, koha_booklist_prov): # We are using the ID of of an existing list LIST_ID = "9896" URL = self._build_url(LIST_ID) response = client.get(URL) # We have an answer... assert response.status_code == OK # Any web page can use this content assert response.headers["Access-Control-Allow-Origin"] == "*" # ...it's an HTML document assert response.mimetype == "text/html" # Let's convert it for easy inspection data = response.get_data(as_text=True) # ...containing only a <div> assert is_div(data, cls_name=HtmlClasses.RESULT) # And we have the provider info in it assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER) prov_data = all_div_content(data, query=HtmlClasses.provider_query()) assert is_substring_in(koha_booklist_prov.name, prov_data) REMOTE_URL = koha_booklist_prov.remote_url.format( param=quote_plus(LIST_ID)) assert is_substring_in(dominescape(REMOTE_URL), prov_data) # And we have the correct books in it results = all_div_content(data, query=HtmlClasses.result_item_query()) assert is_substring_in("Africa in Russia, Russia in Africa", results) assert is_substring_in("Cahiers d'études africaines", results) assert is_substring_in("Étudier à l'Est", results) assert is_substring_in("Forced labour in colonial Africa", results) assert is_substring_in("Le gel", results) assert is_substring_in( "Revue européenne des migrations internationales", results) assert is_substring_in("The Cold War in the Third World", results)
def test_detect_matching_content(self): CONTENT_TO_MATCH = "ca va matcher" TARGET_NODE = '<div class="cible" id="bob">{content}</div>'.format( content=CONTENT_TO_MATCH) PAGE = """<!DOCTYPE html> <html lang="fr"> <head> <meta charset="utf-8"> <title>test page</title> </head> <body> {node} </body> </html>""".format(node=TARGET_NODE) QUERY = ".cible#bob" res = mincer.utils.extract_content_from_html(QUERY, CONTENT_TO_MATCH, PAGE) assert is_div(res) assert CONTENT_TO_MATCH in res
def test_search_works(self, client, tmp_db, koha_search_prov): # This search returns only a few results SEARCH_QUERY = 'afrique voiture' URL = self._build_url(SEARCH_QUERY) response = client.get(URL) # We have an answer... assert response.status_code == OK # Any web page can use this content assert response.headers["Access-Control-Allow-Origin"] == "*" # ...it's an HTML document... assert response.mimetype == "text/html" # Let's convert it for easy inspection data = response.get_data(as_text=True) # TODO add this new assert to the other query related tests # ...containing a <div> with correct class and id assert is_div(data, cls_name=HtmlClasses.RESULT, id_name="koha-search") # And we have the provider info in it assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER) prov_data = all_div_content(data, query=HtmlClasses.provider_query()) assert is_substring_in(koha_search_prov.name, prov_data) REMOTE_URL = koha_search_prov.remote_url.format( param=quote_plus(SEARCH_QUERY)) assert is_substring_in(dominescape(REMOTE_URL), prov_data) # And we have the correct books in it results = all_div_content(data, query=HtmlClasses.result_item_query()) assert is_substring_in( "Différenciation régionale et régionalisation en Afrique francophone et à Madagascar", results) assert is_substring_in("Qui se nourrit de la famine en Afrique ?", results) assert is_substring_in("Les jachères en Afrique tropicale", results)
def test_search_works_with_unicode_query(self, client, tmp_db, fake_serv, fake_prov): # A query with some japanese QUERY = "search with unicode 龍 車 日" # dragon car day URL = self._build_url_from_query(QUERY) response = client.get(URL) # We have an answer... assert response.status_code == OK # Any web page can use this content assert response.headers["Access-Control-Allow-Origin"] == "*" # ...it's an HTML document... assert response.mimetype == "text/html" # Let's convert it for easy inspection data = response.get_data(as_text=True) # ...containing a <div> with correct class and id assert is_div(data, cls_name=HtmlClasses.RESULT, id_name=fake_prov.slug) # And we have the provider info in it assert has_div_with_class(data, cls_name=HtmlClasses.PROVIDER) prov_data = all_div_content(data, query=HtmlClasses.provider_query()) assert is_substring_in(fake_prov.name, prov_data) REMOTE_URL = fake_prov.remote_url.format(param=quote_plus(QUERY)) assert is_substring_in(REMOTE_URL, prov_data) # And we have the correct books in it results = all_div_content(data, query=".{surrounding} .{item}".format( surrounding=HtmlClasses.RESULT, item=HtmlClasses.RESULT_ITEM)) assert is_substring_in("Result with japanese 新疆史志", results) assert is_substring_in("Result with japanese 永井龍男集", results)