def test_scraper_valid(filename, result_dict, params, evaluate_scraper): """ Test scraper with valid files. :filename: Test file name :result_dict: Result dict containing test purpose, and parts of expected results of stdout and stderr :params: Extra parameters for Scraper """ correct = parse_results(filename, "text/xml", result_dict, True, params) scraper = XmllintScraper(filename=correct.filename, mimetype="text/xml", params=correct.params) scraper.scrape_file() if not correct.well_formed: assert not scraper.well_formed assert not scraper.streams assert partial_message_included(correct.stdout_part, scraper.messages()) assert partial_message_included(correct.stderr_part, scraper.errors()) else: evaluate_scraper(scraper, correct) assert not partial_message_included("<note>", scraper.messages())
def test_scraper_pdf(filename, result_dict, evaluate_scraper): """Test scraper.""" for ver in ["1.7", "A-1a", "A-2b", "A-3b"]: filename = filename.replace("X", ver) correct = parse_results(filename, "application/pdf", result_dict, True) scraper = GhostscriptScraper(correct.filename, True, correct.params) scraper.scrape_file() # Ghostscript cannot handle version or MIME type correct.version = "(:unav)" correct.streams[0]["version"] = "(:unav)" correct.mimetype = "(:unav)" correct.streams[0]["mimetype"] = "(:unav)" evaluate_scraper(scraper, correct, eval_output=False) if scraper.well_formed: assert not partial_message_included("Error", scraper.messages()) assert not scraper.errors() else: assert partial_message_included(correct.stderr_part, scraper.errors()) assert partial_message_included(correct.stdout_part, scraper.messages())
def test_existing_files(filepath): """Test that existent files are identified correctly.""" scraper = FileExists(filepath, True) scraper.scrape_file() streams = DEFAULTSTREAMS.copy() assert scraper.well_formed is None assert not scraper.errors() assert partial_message_included("was found", scraper.messages()) assert scraper.info()["class"] == "FileExists" for stream_index, stream_metadata in six.iteritems(streams): scraped_metadata = scraper.streams[stream_index] for key, value in six.iteritems(stream_metadata): assert getattr(scraped_metadata, key)() == value
def test_overriding_filetype(given_mimetype, given_version, expected_mimetype, expected_version, expected_message): """ Test forcing the base scraper to use certain MIME type and/or version. """ scraper = BaseScraperBasic("testfilename", params={ "mimetype": given_mimetype, "version": given_version }) scraper.scrape_file() assert scraper.streams[0].mimetype() == expected_mimetype assert scraper.streams[0].version() == expected_version if expected_message: assert partial_message_included(expected_message, scraper.messages()) else: assert not scraper.messages()
def test_check_supported(scraper_class, mimetype, version, errors): """ Test scraper's _check_supported() method. :scraper_class: Test scraper class :mimetype: File MIME type :version: File format version :errors: Expected errors """ # pylint: disable=protected-access scraper = scraper_class("testfilename", mimetype) scraper.streams.append(BaseMetaCustom(mimetype=mimetype, version=version)) scraper._check_supported() if not errors: assert not scraper.errors() else: assert partial_message_included(errors, scraper.errors())
def test_scraper_invalid(filename, mimetype, stderr_part): """ Test WandScraper with invalid files. :filename: Test file name :mimetype: File MIME type :stderr_part: Part of the expected stderr """ scraper = WandScraper(filename=os.path.join("tests/data/", mimetype.replace("/", "_"), filename), mimetype=mimetype) scraper.scrape_file() assert not scraper.streams assert scraper.info()["class"] == "WandScraper" assert not scraper.messages() assert partial_message_included(stderr_part, scraper.errors()) assert not scraper.well_formed
def test_existing_files(filename, mimetype, is_textfile, evaluate_scraper): """Test detecting whether file is a textfile.""" correct = parse_results(filename, mimetype, {}, True) scraper = TextfileScraper(correct.filename, True) scraper.scrape_file() correct.version = None correct.streams[0]["version"] = "(:unav)" correct.streams[0]["mimetype"] = "(:unav)" correct.streams[0]["stream_type"] = "(:unav)" correct.well_formed = is_textfile if correct.well_formed: correct.stdout_part = VALID_MSG correct.stderr_part = "" evaluate_scraper(scraper, correct) else: assert partial_message_included(INVALID_MSG, scraper.errors()) assert scraper.errors() assert not scraper.well_formed
def test_scraper_correct_application(filename, mimetype, application): """ Test that the correct LibreOffice application is selected. If all necessary LibreOffice components are not installed, some files may be scraped with a different application than intended (e.g. using Impress for ODG files), and this may work, but it should not be relied on. This test makes sure that all components are present and they are used for the correct files. :filename: Test file name :mimetype: File MIME type :application: Correct office application """ testfile = os.path.join("tests/data", mimetype.replace("/", "_"), filename) scraper = OfficeScraper(filename=testfile, mimetype=mimetype) scraper.scrape_file() assert partial_message_included("using filter : {}".format(application), scraper.messages())
def test_no_wellformed(): """Test scraper without well-formed check.""" scraper = VnuScraper("tests/data/text_html/valid_5.0.html", False) scraper.scrape_file() assert partial_message_included("Skipping scraper", scraper.messages()) assert scraper.well_formed is None
def test_no_wellformed(): """Test scraper without well-formed check.""" scraper = WandScraper("tests/data/image_tiff/valid_6.0.tif", False) scraper.scrape_file() assert not partial_message_included("Skipping scraper", scraper.messages()) assert scraper.well_formed is None
def test_no_wellformed(): """Test scraper without well-formed check.""" scraper = PngcheckScraper("tests/data/image_png/valid_1.2.png", False) scraper.scrape_file() assert partial_message_included("Skipping scraper", scraper.messages()) assert scraper.well_formed is None
def test_no_wellformed(): """Test scraper without well-formed check.""" scraper = PsppScraper("tests/data/application_x-spss-por/valid.por", False) scraper.scrape_file() assert partial_message_included("Skipping scraper", scraper.messages()) assert scraper.well_formed is None