def from_folder(folder: Path) -> List["DiskData"]: data = [] for file in folder.rglob("*"): rel_path = get_relative_path(base_dir=folder, file=file) data.append(DiskData(relative_path=rel_path, file_path=file)) return data
def get_template_destination(self, template_file: Path, template_source: Path): relative_file = get_relative_path(template_source, template_file) rendered_path = render_template_path(relative_file, self.collection_config) destination_file = self.base_dir / rendered_path destination_file.parent.mkdir(parents=True, exist_ok=True) return destination_file
def test_rel_file(): base_dir = Path("/home/user/datasets") file_path = Path("/home/user/datasets/name/train/01.jpg") assert get_relative_path(base_dir=base_dir, file=file_path) == "name/train/01.jpg" base_dir = Path("/home/user/datasets/") assert get_relative_path(base_dir=base_dir, file=file_path) == "name/train/01.jpg" destination_dir = Path("/home/user/datasets/images") assert get_relative_path(base_dir=base_dir, file=destination_dir) == "images" destination_dir = Path("/home/user/datasets/images/") assert get_relative_path(base_dir=base_dir, file=destination_dir) == "images"
def write_vdir_to_folder(self, directory: Path) -> Path: vdir_file = directory / self.get_vdir_name() with open(vdir_file, "w") as f: json.dump( { "data_folder": get_relative_path(self.base_dir, self.data_dir), "tag": self.tag, }, f, indent=4, ) return vdir_file
def test_python_package(): base_dir, git_url, original_files = setup_test_package() print(base_dir) venv_dir = get_temp_path(prefix="venv_dir") / "venv" python_loc = venv_dir / "bin" / "python" poif_setup_location = Path(__file__).parents[3] subprocess.run(["python3", "-m", "venv", str(venv_dir)], check=True) subprocess.run([ str(python_loc), "-m", "pip", "install", "--upgrade", "setuptools", "pip" ], check=True) subprocess.run( [str(python_loc), "-m", "pip", "install", str(poif_setup_location)], check=True) subprocess.run([str(python_loc), "-m", "pip", "install", str(base_dir)], check=True) download_script = Path( __file__).parent / "python_package" / "download_to_folder.py_test" download_to_folder = get_temp_path(prefix="download_to_folder") print(download_to_folder) subprocess.run( [str(python_loc), str(download_script), str(download_to_folder)], check=True) assert len(original_files) > 0 for original_file in original_files: rel_path = get_relative_path(base_dir=base_dir, file=original_file) with open(original_file, "rb") as f: original_bytes = f.read() with open(download_to_folder / rel_path, "rb") as f: downloaded_bytes = f.read() assert original_bytes == downloaded_bytes
def write_to_package(self, package: Package): resource_dir = package.get_resource_directory() write_in_base_dir = [self.package, self.readme] write_in_resource_dir = [self.collection] # Files that are written in at the top level, used for updating the dataset for item in write_in_base_dir: new_file = package.base_dir / item.get_default_name() item.write(new_file) self.add_created_file(new_file) for item in write_in_resource_dir: new_file = resource_dir / item.get_default_name() item.write(new_file) self.add_created_file(new_file) resource_dir_link = package.base_dir / ".resource_folder" with open(resource_dir_link, "w") as f: f.write(get_relative_path(base_dir=package.base_dir, file=resource_dir)) self.add_created_file(resource_dir_link)
def _get_file_name(self): relative_path = get_relative_path(self.base_dir, self.data_dir) path_snake_case = relative_path.replace("/", "_") return path_snake_case
def __init__(self, base_dir: Path, file_path: Path, tag: FileHash = None): relative_path = get_relative_path(base_dir, file_path) super().__init__(file_path, relative_path, tag=tag)