Example #1
0
def run_filetype_test(filename, result_dict, filetype, evaluate_scraper,
                      allowed_mimetypes=[]):
    """
    Runs scraper result evaluation for a scraper with forced MIME type/version

    :filename: Name of the file, not containing the tests/data/mime_type/ part
    :result_dict: Result dict to be given to Correct
    :filetype: A dict containing the forced, expected and real file types under
               the following keys:
                * given_mimetype: the forced MIME type
                * given_version: the forced version
                * expected_mimetype: the expected resulting MIME type
                * expected_version: the expected resulting version
                * correct_mimetype: the real MIME type of the file
    """
    correct = force_correct_filetype(filename, result_dict,
                                     filetype, allowed_mimetypes)
    if correct.mimetype == "application/xhtml+xml":
        correct.streams[0]["stream_type"] = "text"

    if filetype["given_mimetype"]:
        mimetype_guess = filetype["given_mimetype"]
    else:
        mimetype_guess = filetype["correct_mimetype"]
    params = {"mimetype": filetype["given_mimetype"],
              "version": filetype["given_version"],
              "mimetype_guess": mimetype_guess}
    scraper = PilScraper(correct.filename, True, params)
    scraper.scrape_file()

    evaluate_scraper(scraper, correct)
def test_is_supported(mime, ver):
    """
    Test is_supported method.

    :mime: MIME type
    :ver: File format version
    """
    assert PilScraper.is_supported(mime, ver, True)
    assert PilScraper.is_supported(mime, None, True)
    assert PilScraper.is_supported(mime, ver, False)
    assert PilScraper.is_supported(mime, "foo", True)
    assert not PilScraper.is_supported("foo", ver, True)
Example #3
0
def test_scraper_gif(filename, result_dict, evaluate_scraper):
    """Test scraper with gif files."""
    correct = parse_results(filename, "image/gif",
                            result_dict, True)
    # GIF is an index image
    if correct.well_formed:
        correct.streams[0]["samples_per_pixel"] = "1"
    for stream in correct.streams.values():
        stream["version"] = "(:unav)"
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(correct.filename, True, correct.params)
    scraper.scrape_file()

    if correct.well_formed:
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part, scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
        assert not scraper.streams
def test_scraper_tif(filename, result_dict, evaluate_scraper):
    """
    Test scraper with tiff files.

    :filename: Test file name
    :result_dict: Result dict containing the test purpose, parts of
                  expected results of stdout and stderr, and expected streams
    """
    correct = parse_results(filename, "image/tiff",
                            result_dict, True)
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(filename=correct.filename, mimetype="image/tiff")
    scraper.scrape_file()

    if correct.well_formed:
        for index, _ in enumerate(correct.streams):
            correct.streams[index]["version"] = UNAV
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part,
                                        scraper.errors())
        assert not scraper.streams
def test_scraper_gif(filename, result_dict, evaluate_scraper):
    """
    Test scraper with gif files.

    :filename: Test file name
    :result_dict: Result dict containing the test purpose, parts of
                  expected results of stdout and stderr, and expected streams
    """
    correct = parse_results(filename, "image/gif", result_dict, True)
    # GIF is an index image
    if correct.well_formed:
        correct.streams[0]["samples_per_pixel"] = "1"
    for stream in correct.streams.values():
        stream["version"] = UNAV
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(filename=correct.filename, mimetype="image/gif")
    scraper.scrape_file()

    if correct.well_formed:
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part,
                                        scraper.messages())
        assert partial_message_included(correct.stderr_part,
                                        scraper.errors())
        assert not scraper.streams
Example #6
0
def test_scraper_jp2(filename, result_dict, evaluate_scraper):
    """Test scraper with jp2 files."""
    correct = parse_results(filename, "image/jp2",
                            result_dict, True)
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(correct.filename, True, correct.params)
    scraper.scrape_file()

    if correct.well_formed:
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part, scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
        assert not scraper.streams
Example #7
0
def test_scraper_tif(filename, result_dict, evaluate_scraper):
    """Test scraper with tiff files."""
    correct = parse_results(filename, "image/tiff",
                            result_dict, True)
    if correct.well_formed:
        correct.stdout_part = VALID_MSG
        correct.stderr_part = ""
    else:
        correct.stdout_part = ""
        correct.stderr_part = INVALID_MSG
    scraper = PilScraper(correct.filename, True, correct.params)
    scraper.scrape_file()

    if correct.well_formed:
        for index in range(0, len(correct.streams)):
            correct.streams[index]["version"] = "(:unav)"
        evaluate_scraper(scraper, correct)
    else:
        assert not scraper.well_formed
        assert partial_message_included(correct.stdout_part, scraper.messages())
        assert partial_message_included(correct.stderr_part, scraper.errors())
        assert not scraper.streams
Example #8
0
def test_no_wellformed():
    """Test scraper without well-formed check."""
    scraper = PilScraper("tests/data/image_gif/valid_1987a.gif", False)
    scraper.scrape_file()
    assert not partial_message_included("Skipping scraper", scraper.messages())
    assert scraper.well_formed is None