def gettree(lp: LocalPath): assert lp.check() if lp.isdir(): return {df.basename: gettree(df) for df in lp.listdir()} elif lp.isfile(): return lp.read_text("utf8") else: raise Exception("not directory or file: {}".format(lp))
def test_increment_without_metadata_without_schema( self, capsys: CaptureFixture, archive_dir: LocalPath, archive_fixture: "TestArchive.ArchiveCacheAndHashPassthruChecker", schema_file: Optional[LocalPath], verbose: bool, ): # List of (expected frame filename, data filename) tuples targets: List[Tuple[str, str]] = [ ("iris-part-1-of-6-combined.csv", "iris-part-1-of-6.csv"), ("iris-part-1-2.csv", "iris-part-2-of-6.csv"), ("iris-part-1-2-3.csv", "iris-part-3-of-6.csv"), ("iris-part-1-2-3-4.csv", "iris-part-4-of-6.csv"), ("iris-part-1-2-3-4-5.csv", "iris-part-5-of-6.csv"), ("iris_plus.csv", "iris-part-6-of-6.csv"), ] expected_hashfile = ( LocalPath(archive_fixture.cache_file).dirpath(DEFAULT_HASH_FILE) if archive_fixture.hash_file is None else archive_fixture.hash_file) assert not os.path.exists(expected_hashfile) assert not os.path.exists(archive_fixture.cache_file) assert len(archive_dir.listdir()) == 0 for expected_frame_filename, data_filename in targets: assert archive_fixture( archive_dir, [os.path.join(get_data_path(), data_filename)], cache_filepath=archive_fixture.cache_file, hash_filepath=archive_fixture.hash_file, verbose=verbose, ) assert_captured_outerr(capsys.readouterr(), verbose, False) expected_frame = DataFrame( read_csv( os.path.join(get_data_path(), expected_frame_filename), dtype=str, index_col="Index", )) del expected_frame["Species"] del expected_frame["PetalColor"] expected_frame.sort_index(inplace=True) actual_frame = DataFrame( read_csv(str(archive_fixture.cache_file), dtype=str, index_col="Index")) actual_frame.sort_index(inplace=True) assert_captured_outerr(capsys.readouterr(), False, False) assert_frame_equal(expected_frame, actual_frame) assert os.path.exists(expected_hashfile) assert syphon.check( archive_fixture.cache_file, hash_filepath=expected_hashfile, verbose=verbose, )
def gettree(lp: LocalPath, max_len=120): """ Get a dict representing the file tree for a directory """ assert lp.check() if lp.isdir(): return {df.basename: gettree(df, max_len=max_len) for df in lp.listdir()} else: assert lp.isfile() content = lp.read_text('utf8') if max_len and len(content) > max_len: content = content[:max_len - 3] + '...' return content
def test_crash_in_file(linter: PyLinter, capsys: CaptureFixture, tmpdir: LocalPath) -> None: args = linter.load_command_line_configuration([__file__]) linter.crash_file_path = str(tmpdir / "pylint-crash-%Y") linter.check(args) out, err = capsys.readouterr() assert not out assert not err files = tmpdir.listdir() assert len(files) == 1 assert "pylint-crash-20" in str(files[0]) with open(files[0], encoding="utf8") as f: content = f.read() assert "Failed to import module spam." in content
def test_init(archive_dir: LocalPath): assert len(archive_dir.listdir()) == 0 assert syphon.__main__.main(_init_args(archive_dir)) == 0 assert SCHEMA == syphon.schema.load( os.path.join(archive_dir, syphon.schema.DEFAULT_FILE))
def test_increment_one_to_many_with_metadata_with_schema( self, capsys: CaptureFixture, archive_dir: LocalPath, archive_fixture: "TestArchive.ArchiveCacheAndHashPassthruChecker", schema_file: Optional[LocalPath], verbose: bool, ): # List of (expected frame filename, data filename, metadata filename) tuples targets: List[Tuple[str, str, List[str]]] = [ ( "iris-part-1-of-6-combined.csv", "iris-part-1-of-6.csv", [ "iris-part-1-of-6-meta-part-1-of-2.meta", "iris-part-1-of-6-meta-part-2-of-2.meta", ], ), ( "iris-part-1-2.csv", "iris-part-2-of-6.csv", [ "iris-part-2-of-6-meta-part-1-of-2.meta", "iris-part-2-of-6-meta-part-2-of-2.meta", ], ), ( "iris-part-1-2-3.csv", "iris-part-3-of-6.csv", [ "iris-part-3-of-6-meta-part-1-of-2.meta", "iris-part-3-of-6-meta-part-2-of-2.meta", ], ), ( "iris-part-1-2-3-4.csv", "iris-part-4-of-6.csv", [ "iris-part-4-of-6-meta-part-1-of-2.meta", "iris-part-4-of-6-meta-part-2-of-2.meta", ], ), ( "iris-part-1-2-3-4-5.csv", "iris-part-5-of-6.csv", [ "iris-part-5-of-6-meta-part-1-of-2.meta", "iris-part-5-of-6-meta-part-2-of-2.meta", ], ), ( "iris_plus.csv", "iris-part-6-of-6.csv", [ "iris-part-6-of-6-meta-part-1-of-2.meta", "iris-part-6-of-6-meta-part-2-of-2.meta", ], ), ] expected_hashfile = ( LocalPath(archive_fixture.cache_file).dirpath(DEFAULT_HASH_FILE) if archive_fixture.hash_file is None else archive_fixture.hash_file) assert not os.path.exists(expected_hashfile) assert not os.path.exists(archive_fixture.cache_file) assert len(archive_dir.listdir()) == 0 expected_schemafile = (archive_dir.join(syphon.schema.DEFAULT_FILE) if schema_file is None else schema_file) assert not os.path.exists(expected_schemafile) syphon.init(SortedDict({ "0": "PetalColor", "1": "Species" }), expected_schemafile) assert os.path.exists(expected_schemafile) for expected_frame_filename, data_filename, metadata_filenames in targets: assert archive_fixture( archive_dir, [os.path.join(get_data_path(), data_filename)], meta_files=[ os.path.join(get_data_path(), m) for m in metadata_filenames ], filemap_behavior=MappingBehavior.ONE_TO_MANY, schema_filepath=schema_file, cache_filepath=archive_fixture.cache_file, hash_filepath=archive_fixture.hash_file, verbose=verbose, ) assert_captured_outerr(capsys.readouterr(), verbose, False) expected_frame = DataFrame( read_csv( os.path.join(get_data_path(), expected_frame_filename), dtype=str, index_col="Index", )) expected_frame.sort_index(inplace=True) actual_frame = DataFrame( read_csv(str(archive_fixture.cache_file), dtype=str, index_col="Index")) actual_frame = actual_frame.reindex(columns=expected_frame.columns) actual_frame.sort_index(inplace=True) assert_captured_outerr(capsys.readouterr(), False, False) assert_frame_equal(expected_frame, actual_frame) assert os.path.exists(expected_hashfile) assert syphon.check( archive_fixture.cache_file, hash_filepath=expected_hashfile, verbose=verbose, )