def benchmark_stat_many(self) -> None: for testcase in [10, 100]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "file{}".format(index) file.write_text("hello") try: _setup(url=self.url_prefix, path=tmp_dir.path) time_gsutilwrap = 0.0 urls = gsutilwrap.ls(self.url_prefix + "**") for url in urls: time_gsutilwrap += timer(gsutilwrap.stat, url=url) client = gswrap.Client() time_gswrap = 0.0 urls = client.ls(url=self.url_prefix, recursive=True) for url in urls: time_gswrap += timer(client.stat, url=url) finally: _tear_down(url=self.url_prefix) print_benchmark( benchmark="Benchmark stat {} files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def setUp(self) -> None: self.client = gswrap.Client() self.client._change_bucket(tests.common.TEST_GCS_BUCKET) self.bucket_prefix = str(uuid.uuid4()) self.tmp_dir = tempfile.TemporaryDirectory() tests.common.gcs_test_setup( tmp_dir_name=self.tmp_dir.name, prefix=self.bucket_prefix)
def benchmark_rm_many(self) -> None: for testcase in [10, 100, 1000]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "file{}".format(index) file.write_text("hello") _setup(url=self.url_prefix, path=tmp_dir.path) time_gsutilwrap = timer(gsutilwrap.remove, pattern=self.url_prefix, quiet=True, multithreaded=True, recursive=True) _setup(url=self.url_prefix, path=tmp_dir.path) client = gswrap.Client() time_gswrap = timer(client.rm, url=self.url_prefix, recursive=True, multithreaded=True) print_benchmark( benchmark="Benchmark remove {} files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def benchmark_write_many(self) -> None: for testcase in [10, 30]: try: time_gsutilwrap = 0.0 for index in range(testcase): time_gsutilwrap += timer(gsutilwrap.write_text, url="{}/gsutil/file{}".format( self.url_prefix, index), text="hello", quiet=True) client = gswrap.Client() time_gswrap = 0.0 for index in range(testcase): time_gswrap += timer(client.write_text, url="{}/gswrap/file{}".format( self.url_prefix, index), text="hello") finally: _tear_down(url=self.url_prefix) print_benchmark( benchmark="Benchmark write {} files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def benchmark_copy_many_files_on_remote(self) -> None: for testcase in [10, 100, 1000]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "file{}".format(index) file.write_text("hello") copy_url = "gs://{}/{}".format(self.bucket, str(uuid.uuid4())) try: _setup(url=self.url_prefix, path=tmp_dir.path) time_gsutilwrap = timer(_gsutilwrap_cp, src=self.url_prefix, dst=copy_url + "/gsutil") client = gswrap.Client() time_gswrap = timer(_gswrap_cp, client=client, src=self.url_prefix, dst=copy_url + "/gswrap") finally: _tear_down(url=self.url_prefix) _tear_down(url=copy_url) print_benchmark( benchmark="Benchmark copy on remote {} files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def benchmark_copy_many_to_many_on_remote(self) -> None: for testcase in [10, 100, 500]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "{}/file".format(index) file.parent.mkdir(parents=True, exist_ok=True) file.write_text("text") copy_url = "gs://{}/{}".format(self.bucket, str(uuid.uuid4())) try: _setup(url=self.url_prefix, path=tmp_dir.path) client = gswrap.Client() srcs_dsts = _gswrap_list_for_cp_many_to_many( client=client, src=self.url_prefix, dst=copy_url + "/gswrap") time_gswrap = timer(_gswrap_copy_many_to_many_files, client=client, srcs_dsts=srcs_dsts) srcs_dsts = _gsutilwrap_list_for_cp_many_to_many( src=self.url_prefix, dst=copy_url + "/gsutil") time_gsutilwrap = timer( _gsutilwrap_copy_many_to_many_files, srcs_dsts=srcs_dsts) finally: _tear_down(url=self.url_prefix) _tear_down(url=copy_url) print_benchmark(benchmark="Benchmark copy-many-to-many-on-remote" " {} files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def benchmark_download_many_files(self) -> None: for testcase in [10, 1000, 10**4]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "file{}".format(index) file.write_text("hello") try: _setup(url=self.url_prefix, path=tmp_dir.path) gsutil_dir = tmp_dir.path / "gsutil" gsutil_dir.mkdir() time_gsutilwrap = timer(_gsutilwrap_cp, src=self.url_prefix, dst=gsutil_dir) gswrap_dir = tmp_dir.path / "gswrap" gswrap_dir.mkdir() client = gswrap.Client() time_gswrap = timer(_gswrap_cp, client=client, src=self.url_prefix, dst=gswrap_dir) finally: _tear_down(url=self.url_prefix) print_benchmark( benchmark="Benchmark download {} files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def benchmark_upload_many_to_many(self) -> None: for testcase in [10, 100, 500]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "{}/file".format(index) file.parent.mkdir(parents=True, exist_ok=True) file.write_text("text") try: client = gswrap.Client() srcs_dsts = _upload_many_to_many_local_ls( src=tmp_dir.path.as_posix(), dst=self.url_prefix + '/gswrap') time_gswrap = timer(_gswrap_copy_many_to_many_files, srcs_dsts=srcs_dsts, client=client) srcs_dsts = _upload_many_to_many_local_ls( src=tmp_dir.path.as_posix(), dst=self.url_prefix + '/gsutilwrap') time_gsutilwrap = timer( _gsutilwrap_copy_many_to_many_files, srcs_dsts=srcs_dsts) finally: _tear_down(url=self.url_prefix) print_benchmark( benchmark="Benchmark upload-many-to-many {} files".format( testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def benchmark_upload_big_files(self) -> None: number_of_files = 3 for size in [10, 1024, 1024**2, 200 * 1024**2]: # bytes with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(number_of_files): file = tmp_dir.path / "file{}".format(index) file.write_text("a" * size) try: client = gswrap.Client() time_gswrap = timer(_gswrap_cp, src=tmp_dir.path, dst=self.url_prefix, client=client) finally: _tear_down(url=self.url_prefix) try: time_gsutilwrap = timer(_gsutilwrap_cp, src=tmp_dir.path, dst=self.url_prefix) finally: _tear_down(url=self.url_prefix) print_benchmark( benchmark="Benchmark upload 3 files with {} bytes".format( size), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def benchmark_upload_many_single_files(self) -> None: for testcase in [10, 25]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "file{}".format(index) file.write_text("text") try: client = gswrap.Client() time_gswrap = 0.0 for file in tmp_dir.path.iterdir(): time_gswrap += timer(_gswrap_cp, src=file, dst=self.url_prefix, client=client) finally: _tear_down(url=self.url_prefix) try: time_gsutilwrap = 0.0 for file in tmp_dir.path.iterdir(): time_gsutilwrap += timer(_gsutilwrap_cp, src=file, dst=self.url_prefix) finally: _tear_down(url=self.url_prefix) print_benchmark( benchmark="Benchmark upload {} single files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def move_directory(bucket_name, source_path, destination_path): client = gswrap.Client() client.cp( src="gs://" + bucket_name + "/" + source_path, dst="gs://" + bucket_name + "/" + destination_path, recursive=True, ) client.rm("gs://" + bucket_name + "/" + source_path, recursive=True) return "Source " + source_path + " has been move to " + destination_path
def benchmark_list_many_files(self) -> None: for testcase in [10, 1000, 10**4]: with temppathlib.TemporaryDirectory() as tmp_dir: for index in range(testcase): file = tmp_dir.path / "file{}".format(index) file.write_text("text") try: _setup(url=self.url_prefix, path=tmp_dir.path) client = gswrap.Client() time_gswrap = timer(client.ls, url=self.url_prefix, recursive=True) time_gsutilwrap = timer(gsutilwrap.ls, self.url_prefix) finally: _tear_down(url=self.url_prefix) print_benchmark( benchmark="Benchmark list {} files".format(testcase), time_other_libraries=[(LibraryChecked.gsutilwrap, time_gsutilwrap)], time_gswrap=time_gswrap)
def setUp(self) -> None: self.client = gswrap.Client() self.client._change_bucket(tests.common.TEST_GCS_BUCKET) self.bucket_prefix = str(uuid.uuid4())
def setUp(self) -> None: self.client = gswrap.Client() self.client._change_bucket(tests.common.TEST_GCS_BUCKET) self.tmp_dir = tempfile.TemporaryDirectory() self.local_dir = pathlib.Path(self.tmp_dir.name) / str(uuid.uuid4()) self.local_dir.mkdir()