Beispiel #1
0
def test_dataset_card(dataset_name):
    card_path = repo_path / "datasets" / dataset_name / "README.md"
    assert card_path.exists()
    error_messages = []
    try:
        readme = ReadMe.from_readme(card_path)
    except Exception as readme_parsing_error:
        error_messages.append(
            f"The following issues have been found in the dataset cards:\nREADME Parsing:\n{readme_parsing_error}"
        )
    try:
        readme = ReadMe.from_readme(card_path, suppress_parsing_errors=True)
        readme.validate()
    except Exception as readme_validation_error:
        error_messages.append(
            f"The following issues have been found in the dataset cards:\nREADME Validation:\n{readme_validation_error}"
        )
    try:
        metadata = DatasetMetadata.from_readme(card_path)
        metadata.validate()
    except Exception as metadata_error:
        error_messages.append(
            f"The following issues have been found in the dataset cards:\nYAML tags:\n{metadata_error}"
        )

    if error_messages:
        raise ValueError("\n".join(error_messages))
Beispiel #2
0
def test_readme_from_readme_suppress_parsing_errors(readme_md):
    with tempfile.TemporaryDirectory() as tmp_dir:
        path = Path(tmp_dir) / "README.md"
        with open(path, "w+") as readme_file:
            readme_file.write(readme_md)
        ReadMe.from_readme(path,
                           example_yaml_structure,
                           suppress_parsing_errors=True)
Beispiel #3
0
def test_readme_from_readme_parsing_errors(readme_md, expected_error):
    with tempfile.TemporaryDirectory() as tmp_dir:
        path = Path(tmp_dir) / "README.md"
        with open(path, "w+") as readme_file:
            readme_file.write(readme_md)
        expected_error = expected_error.format(path=path)
        with pytest.raises(ValueError, match=re.escape(expected_error)):
            ReadMe.from_readme(path, example_yaml_structure)
Beispiel #4
0
def test_dataset_card(dataset_name):
    card_path = repo_path / "datasets" / dataset_name / "README.md"
    assert card_path.exists()
    error_messages = []
    try:
        ReadMe.from_readme(card_path)
    except Exception as readme_error:
        error_messages.append(
            f"The following issues have been found in the dataset cards:\nREADME:\n{readme_error}"
        )
    try:
        DatasetMetadata.from_readme(card_path)
    except Exception as metadata_error:
        error_messages.append(
            f"The following issues have been found in the dataset cards:\nYAML tags:\n{metadata_error}"
        )

    if error_messages:
        raise ValueError("\n".join(error_messages))
Beispiel #5
0
def test_readme_from_readme_correct(readme_md, expected_dict):
    with tempfile.TemporaryDirectory() as tmp_dir:
        path = Path(tmp_dir) / "README.md"
        with open(path, "w+") as readme_file:
            readme_file.write(readme_md)
        out = ReadMe.from_readme(path, example_yaml_structure).to_dict()
        assert out["name"] == path
        assert out["text"] == ""
        assert out["is_empty_text"]
        assert out["subsections"] == expected_dict["subsections"]
    if args.check_all:
        readmes = [
            dd / "README.md" for dd in (repo_path / "datasets").iterdir()
        ]
    else:
        changed_files = get_changed_files(repo_path)
        readmes = [
            f for f in changed_files
            if f.exists() and f.name.lower() == "readme.md"
            and f.parent.parent.name == "datasets"
        ]

    failed: List[Path] = []
    for readme in sorted(readmes):
        try:
            ReadMe.from_readme(readme)
            logging.debug(f"✅️ Validated '{readme.relative_to(repo_path)}'")
        except ValueError as e:
            failed.append(readme)
            logging.warning(
                f"❌ Validation failed for '{readme.relative_to(repo_path)}':\n{e}"
            )
        except Exception as e:
            failed.append(readme)
            logging.warning(
                f"⁉️ Something unexpected happened on '{readme.relative_to(repo_path)}':\n{e}"
            )

    if len(failed) > 0:
        logging.info(f"❌ Failed on {len(failed)} files.")
        exit(1)
Beispiel #7
0
def test_readme_from_string_suppress_parsing_errors(readme_md):
    ReadMe.from_string(readme_md,
                       example_yaml_structure,
                       suppress_parsing_errors=True)
Beispiel #8
0
def test_readme_from_string_parsing_errors(readme_md, expected_error):
    with pytest.raises(ValueError,
                       match=re.escape(expected_error.format(path="root"))):
        ReadMe.from_string(readme_md, example_yaml_structure)
Beispiel #9
0
def test_readme_from_string_correct(readme_md, expected_dict):
    assert ReadMe.from_string(
        readme_md, example_yaml_structure).to_dict() == expected_dict