def create_data_collection(remote: RemoteConfig): base_dir, dirs, files = create_realistic_folder_structure() return base_dir, DataCollectionConfig(name="test", folders=dirs, files=files, data_remote=remote)
def read_from_package(cls, base_dir: Path): readme_file = base_dir / ReadmeConfig.get_default_name() readme_config = ReadmeConfig.read(readme_file) package_file = base_dir / PackageConfig.get_default_name() package_config = PackageConfig.read(package_file) resource_dir_link = base_dir / ".resource_folder" with open(resource_dir_link, "r") as f: relative_resource_dir = f.read() resource_dir = base_dir / relative_resource_dir collection_file = resource_dir / DataCollectionConfig.get_default_name() collection_config = DataCollectionConfig.read(collection_file) return DataRepoConfig(collection=collection_config, readme=readme_config, package=package_config)
def test_readme(): base_dir = create_standard_folder_structure() collection_config = DataCollectionConfig(name="dummy", files=[], folders=["train", "test", "val"], data_remote=None) readme = DatasetReadme(base_dir=base_dir, config=collection_config) readme.write_to_folder(base_dir)
def __init__(self, resource_dir: Path): super().__init__() self._resource_dir = resource_dir collection_config = DataCollectionConfig.read(self._resource_dir / "collection_config.json") file_remote = collection_config.data_remote.config.get_configured_remote() data_folder = collection_config.data_remote.data_folder self._tagged_repo = FileRemoteTaggedRepo(remote=file_remote, data_folder=data_folder) self._retrieve_mappings()
def get_collection_sequence(expected_result: DataCollectionConfig = None): if expected_result is None: remote_sequence, remote_config = get_remote_config_sequence() expected_result = DataCollectionConfig( name="name", folders=["val", "test"], files=["01.jpg"], data_remote=remote_config, ) else: remote_sequence, remote_config = get_remote_config_sequence( expected_result.data_remote) sequence = [expected_result.name] + expected_result.folders + [ "" ] + expected_result.files + [""] + remote_sequence return sequence, expected_result
def dummy_config() -> DataCollectionConfig: dummy_s3_config = S3Config(url="", profile="", bucket="") dummy_remote_config = RemoteConfig(config=dummy_s3_config, data_folder="data", remote_type=RemoteType.S3) dummy_ds_config = DataCollectionConfig(name="dummy", data_remote=dummy_remote_config, files=[], folders=[]) return dummy_ds_config
def test_collection_prompt(monkeypatch): sequence, expected_output = get_collection_sequence() monkeypatch.setattr("builtins.input", MonkeyPatchSequence(sequence)) config1 = DataCollectionConfig.prompt() assert config1 == expected_output
def prompt(): collection = DataCollectionConfig.prompt() readme = ReadmeConfig.prompt() package = PackageConfig.prompt() return DataRepoConfig(collection=collection, readme=readme, package=package)