def test_grade(file_path, expected_grade): """Test that scraper returns correct digital preservation grade.""" scraper = Scraper(file_path) # File can not be graded before scraping assert scraper.grade() == "(:unav)" # After scraping the file should have expected grade scraper.scrape() assert scraper.grade() == expected_grade
def test_grading(fullname, mimetype, version): """Test grading for a valid test file. Test that file format is graded as recommended unless the file is explicitly listed as acceptable or unacceptable. """ if fullname in UNAV_VERSION: pytest.skip( "File format version of file {} can not be defined.".format( fullname)) charset = GIVEN_CHARSETS.get(fullname, None) scraper = Scraper(fullname, mimetype=mimetype, version=version, charset=charset) scraper.scrape() if fullname in UNACCEPTABLE_FILES: expected_grade = UNACCEPTABLE elif fullname in BIT_LEVEL_FILES: expected_grade = BIT_LEVEL elif fullname in BIT_LEVEL_WITH_RECOMMENDED_FILES: expected_grade = BIT_LEVEL_WITH_RECOMMENDED elif fullname in ACCEPTABLE_FILES: expected_grade = ACCEPTABLE else: expected_grade = RECOMMENDED assert scraper.grade() == expected_grade
def scrape_file(ctx, filename, check_wellformed, tool_info, mimetype, version): """ Identify file type, collect metadata, and optionally check well-formedness. In addition to the given options, the user can provide any extra options that are passed onto the scraper. These options must be in the long form, e.g. "--charset=UTF-8" or "--charset UTF-8". \f :ctx: Context object :filename: Path to the file that should be scraped :check_wellformed: Flag whether the scraper checks wellformedness :tool_info: Flag whether the scraper includes messages from different 3rd party tools :mimetype: Specified mimetype for the scraped file :version: Specified version for the scraped file """ scraper = Scraper(filename, mimetype=mimetype, version=version, **_extra_options_to_dict(ctx.args)) scraper.scrape(check_wellformed=check_wellformed) results = { "path": ensure_text(scraper.filename), "MIME type": ensure_text(scraper.mimetype), "version": ensure_text(scraper.version), "metadata": scraper.streams, "grade": scraper.grade() } if check_wellformed: results["well-formed"] = scraper.well_formed if tool_info: results["tool_info"] = scraper.info errors = {} for item in scraper.info.values(): if "ScraperNotFound" in item["class"]: raise click.ClickException("Proper scraper was not found. The " "file was not analyzed.") if item["errors"]: errors[item["class"]] = item["errors"] if errors: results["errors"] = errors click.echo(json.dumps(results, indent=4))