def run_filetype_test(filename, result_dict, filetype, evaluate_scraper, allowed_mimetypes=[]): """ Runs scraper result evaluation for a scraper with forced MIME type/version :filename: Name of the file, not containing the tests/data/mime_type/ part :result_dict: Result dict to be given to Correct :filetype: A dict containing the forced, expected and real file types under the following keys: * given_mimetype: the forced MIME type * given_version: the forced version * expected_mimetype: the expected resulting MIME type * expected_version: the expected resulting version * correct_mimetype: the real MIME type of the file """ correct = force_correct_filetype(filename, result_dict, filetype, allowed_mimetypes) if correct.mimetype == "application/xhtml+xml": correct.streams[0]["stream_type"] = "text" if filetype["given_mimetype"]: mimetype_guess = filetype["given_mimetype"] else: mimetype_guess = filetype["correct_mimetype"] params = {"mimetype": filetype["given_mimetype"], "version": filetype["given_version"], "mimetype_guess": mimetype_guess} scraper = PilScraper(correct.filename, True, params) scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_is_supported(mime, ver): """ Test is_supported method. :mime: MIME type :ver: File format version """ assert PilScraper.is_supported(mime, ver, True) assert PilScraper.is_supported(mime, None, True) assert PilScraper.is_supported(mime, ver, False) assert PilScraper.is_supported(mime, "foo", True) assert not PilScraper.is_supported("foo", ver, True)
def test_scraper_gif(filename, result_dict, evaluate_scraper): """Test scraper with gif files.""" correct = parse_results(filename, "image/gif", result_dict, True) # GIF is an index image if correct.well_formed: correct.streams[0]["samples_per_pixel"] = "1" for stream in correct.streams.values(): stream["version"] = "(:unav)" if correct.well_formed: correct.stdout_part = VALID_MSG correct.stderr_part = "" else: correct.stdout_part = "" correct.stderr_part = INVALID_MSG scraper = PilScraper(correct.filename, True, correct.params) scraper.scrape_file() if correct.well_formed: evaluate_scraper(scraper, correct) else: assert not scraper.well_formed assert partial_message_included(correct.stdout_part, scraper.messages()) assert partial_message_included(correct.stderr_part, scraper.errors()) assert not scraper.streams
def test_scraper_tif(filename, result_dict, evaluate_scraper): """ Test scraper with tiff files. :filename: Test file name :result_dict: Result dict containing the test purpose, parts of expected results of stdout and stderr, and expected streams """ correct = parse_results(filename, "image/tiff", result_dict, True) if correct.well_formed: correct.stdout_part = VALID_MSG correct.stderr_part = "" else: correct.stdout_part = "" correct.stderr_part = INVALID_MSG scraper = PilScraper(filename=correct.filename, mimetype="image/tiff") scraper.scrape_file() if correct.well_formed: for index, _ in enumerate(correct.streams): correct.streams[index]["version"] = UNAV evaluate_scraper(scraper, correct) else: assert not scraper.well_formed assert partial_message_included(correct.stdout_part, scraper.messages()) assert partial_message_included(correct.stderr_part, scraper.errors()) assert not scraper.streams
def test_scraper_gif(filename, result_dict, evaluate_scraper): """ Test scraper with gif files. :filename: Test file name :result_dict: Result dict containing the test purpose, parts of expected results of stdout and stderr, and expected streams """ correct = parse_results(filename, "image/gif", result_dict, True) # GIF is an index image if correct.well_formed: correct.streams[0]["samples_per_pixel"] = "1" for stream in correct.streams.values(): stream["version"] = UNAV if correct.well_formed: correct.stdout_part = VALID_MSG correct.stderr_part = "" else: correct.stdout_part = "" correct.stderr_part = INVALID_MSG scraper = PilScraper(filename=correct.filename, mimetype="image/gif") scraper.scrape_file() if correct.well_formed: evaluate_scraper(scraper, correct) else: assert not scraper.well_formed assert partial_message_included(correct.stdout_part, scraper.messages()) assert partial_message_included(correct.stderr_part, scraper.errors()) assert not scraper.streams
def test_scraper_jp2(filename, result_dict, evaluate_scraper): """Test scraper with jp2 files.""" correct = parse_results(filename, "image/jp2", result_dict, True) if correct.well_formed: correct.stdout_part = VALID_MSG correct.stderr_part = "" else: correct.stdout_part = "" correct.stderr_part = INVALID_MSG scraper = PilScraper(correct.filename, True, correct.params) scraper.scrape_file() if correct.well_formed: evaluate_scraper(scraper, correct) else: assert not scraper.well_formed assert partial_message_included(correct.stdout_part, scraper.messages()) assert partial_message_included(correct.stderr_part, scraper.errors()) assert not scraper.streams
def test_scraper_tif(filename, result_dict, evaluate_scraper): """Test scraper with tiff files.""" correct = parse_results(filename, "image/tiff", result_dict, True) if correct.well_formed: correct.stdout_part = VALID_MSG correct.stderr_part = "" else: correct.stdout_part = "" correct.stderr_part = INVALID_MSG scraper = PilScraper(correct.filename, True, correct.params) scraper.scrape_file() if correct.well_formed: for index in range(0, len(correct.streams)): correct.streams[index]["version"] = "(:unav)" evaluate_scraper(scraper, correct) else: assert not scraper.well_formed assert partial_message_included(correct.stdout_part, scraper.messages()) assert partial_message_included(correct.stderr_part, scraper.errors()) assert not scraper.streams
def test_no_wellformed(): """Test scraper without well-formed check.""" scraper = PilScraper("tests/data/image_gif/valid_1987a.gif", False) scraper.scrape_file() assert not partial_message_included("Skipping scraper", scraper.messages()) assert scraper.well_formed is None