def run_filetype_test(filename, result_dict, filetype, evaluate_scraper, allowed_mimetypes=[]): """ Runs scraper result evaluation for a scraper with forced MIME type/version :filename: Name of the file, not containing the tests/data/mime_type/ part :result_dict: Result dict to be given to Correct :filetype: A dict containing the forced, expected and real file types under the following keys: * given_mimetype: the forced MIME type * given_version: the forced version * expected_mimetype: the expected resulting MIME type * expected_version: the expected resulting version * correct_mimetype: the real MIME type of the file """ correct = force_correct_filetype(filename, result_dict, filetype, allowed_mimetypes) if filetype["given_mimetype"]: mimetype_guess = filetype["given_mimetype"] else: mimetype_guess = filetype["correct_mimetype"] params = { "mimetype": filetype["given_mimetype"], "version": filetype["given_version"], "mimetype_guess": mimetype_guess } scraper = WandScraper(correct.filename, True, params) scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper_jpg(filename, result_dict, evaluate_scraper): """Test scraper with jpeg files.""" correct = parse_results(filename, "image/jpeg", result_dict, True) if correct.well_formed: correct.streams[0]["compression"] = "jpeg" correct.streams[0]["version"] = "(:unav)" scraper = WandScraper(correct.filename) scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper_colorspace(mimetype, filename, expected): """ Test that correct colorspace is returned. """ scraper = WandScraper(filename=os.path.join("tests/data/", mimetype.replace("/", "_"), filename), mimetype=mimetype) scraper.scrape_file() assert scraper.streams[0].colorspace().lower() == expected.lower()
def test_scraper_invalid(filename, mimetype, stderr_part): """Test WandScraper with invalid tiff files.""" scraper = WandScraper( os.path.join("tests/data/", mimetype.replace("/", "_"), filename)) scraper.scrape_file() assert not scraper.streams assert scraper.info()["class"] == "WandScraper" assert not scraper.messages() assert partial_message_included(stderr_part, scraper.errors()) assert not scraper.well_formed
def test_scraper_gif(filename, result_dict, evaluate_scraper): """Test scraper with gif files.""" correct = parse_results(filename, "image/gif", result_dict, True) if correct.well_formed: correct.streams[0]["compression"] = "lzw" for stream in correct.streams.values(): stream["version"] = "(:unav)" scraper = WandScraper(correct.filename) scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper_jp2(filename, result_dict, evaluate_scraper): """Test scraper with jp2 files.""" correct = parse_results(filename, "image/jp2", result_dict, True) if correct.well_formed: correct.streams[0]["compression"] = "jpeg2000" correct.streams[0]["colorspace"] = "rgb" correct.version = None scraper = WandScraper(correct.filename) scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper_jpg(filename, result_dict, evaluate_scraper): """ Test scraper with valid jpeg files. :filename: Test file name :result_dict: Result dict containing the test purpose, parts of expected results of stdout and stderr, and expected streams """ correct = parse_results(filename, "image/jpeg", result_dict, True) if correct.well_formed: correct.streams[0]["compression"] = "jpeg" scraper = WandScraper(filename=correct.filename, mimetype="image/jpeg") scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper_tif(filename, result_dict, evaluate_scraper): """Test scraper with valid tiff files.""" correct = parse_results(filename, "image/tiff", result_dict, True) for index in range(0, len(correct.streams)): correct.streams[index]["compression"] = "zip" correct.streams[index]["byte_order"] = "little endian" correct.streams[index]["mimetype"] = \ correct.streams[0]["mimetype"] correct.streams[index]["stream_type"] = \ correct.streams[0]["stream_type"] correct.streams[index]["version"] = "(:unav)" scraper = WandScraper(correct.filename) scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper_gif(filename, result_dict, evaluate_scraper): """ Test scraper with valid gif files. :filename: Test file name :result_dict: Result dict containing the test purpose, parts of expected results of stdout and stderr, and expected streams """ correct = parse_results(filename, "image/gif", result_dict, True) if correct.well_formed: correct.streams[0]["compression"] = "lzw" for stream in correct.streams.values(): stream["version"] = UNAV scraper = WandScraper(filename=correct.filename, mimetype="image/gif") scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_scraper_invalid(filename, mimetype, stderr_part): """ Test WandScraper with invalid files. :filename: Test file name :mimetype: File MIME type :stderr_part: Part of the expected stderr """ scraper = WandScraper(filename=os.path.join("tests/data/", mimetype.replace("/", "_"), filename), mimetype=mimetype) scraper.scrape_file() assert not scraper.streams assert scraper.info()["class"] == "WandScraper" assert not scraper.messages() assert partial_message_included(stderr_part, scraper.errors()) assert not scraper.well_formed
def test_scraper_tif(filename, result_dict, evaluate_scraper): """ Test scraper with valid tiff files. :filename: Test file name :result_dict: Result dict containing the test purpose, parts of expected results of stdout and stderr, and expected streams """ correct = parse_results(filename, "image/tiff", result_dict, True) for index, _ in enumerate(correct.streams): correct.streams[index]["compression"] = "zip" correct.streams[index]["byte_order"] = "little endian" correct.streams[index]["mimetype"] = \ correct.streams[0]["mimetype"] correct.streams[index]["stream_type"] = \ correct.streams[0]["stream_type"] correct.streams[index]["version"] = UNAV scraper = WandScraper(filename=correct.filename, mimetype="image/tiff") scraper.scrape_file() evaluate_scraper(scraper, correct)
def test_no_wellformed(): """Test scraper without well-formed check.""" scraper = WandScraper("tests/data/image_tiff/valid_6.0.tif", False) scraper.scrape_file() assert not partial_message_included("Skipping scraper", scraper.messages()) assert scraper.well_formed is None