def test_set_package_entry_as_object(self): pkg = Package() nasty_string = 'a,"\tb' num_col = [11, 22, 33] str_col = ['a', 'b', nasty_string] df = pd.DataFrame({'col_num': num_col, 'col_str': str_col}) # Test with serialization_dir set pkg.set("mydataframe1.parquet", df, meta={'user_meta': 'blah'}, serialization_location=SERIALIZATION_DIR / "df1.parquet") pkg.set("mydataframe2.csv", df, meta={'user_meta': 'blah2'}, serialization_location=SERIALIZATION_DIR / "df2.csv") pkg.set("mydataframe3.tsv", df, meta={'user_meta': 'blah3'}, serialization_location=SERIALIZATION_DIR / "df3.tsv") # Test without serialization_dir set pkg.set("mydataframe4.parquet", df, meta={'user_meta': 'blah4'}) pkg.set("mydataframe5.csv", df, meta={'user_meta': 'blah5'}) pkg.set("mydataframe6.tsv", df, meta={'user_meta': 'blah6'}) for lk, entry in pkg.walk(): file_path = parse_file_url(urlparse(entry.get())) assert pathlib.Path( file_path).exists(), "The serialization files should exist" self.file_sweeper_path_list.append( file_path) # Make sure files get deleted even if test fails pkg._fix_sha256() for lk, entry in pkg.walk(): assert df.equals(entry.deserialize()), "The deserialized PackageEntry should be equal to the object that " \ "was serialized" # Test that push cleans up the temporary files, if and only if the serialization_location was not set with patch('botocore.client.BaseClient._make_api_call', new=mock_make_api_call), \ patch('quilt3.Package._materialize') as materialize_mock, \ patch('quilt3.Package.build') as build_mock: materialize_mock.return_value = pkg pkg.push('Quilt/test_pkg_name', 's3://test-bucket') for lk in [ "mydataframe1.parquet", "mydataframe2.csv", "mydataframe3.tsv" ]: file_path = parse_file_url(urlparse(pkg.get(lk))) assert pathlib.Path(file_path).exists( ), "These files should not have been deleted during push()" for lk in [ "mydataframe4.parquet", "mydataframe5.csv", "mydataframe6.tsv" ]: file_path = parse_file_url(urlparse(pkg.get(lk))) assert not pathlib.Path(file_path).exists( ), "These temp files should have been deleted during push()"
def test_load_into_quilt(self): """ Verify loading local manifest and data into S3. """ top_hash = '5333a204bbc6e21607c2bc842f4a77d2e21aa6147cf2bf493dbf6282188d01ca' self.s3_stubber.add_response(method='put_object', service_response={'VersionId': 'v1'}, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo', 'Metadata': { 'helium': '{}' } }) self.s3_stubber.add_response(method='put_object', service_response={'VersionId': 'v2'}, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': '.quilt/packages/' + top_hash, 'Metadata': { 'helium': 'null' } }) self.s3_stubber.add_response( method='put_object', service_response={'VersionId': 'v3'}, expected_params={ 'Body': top_hash.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/1234567890', 'Metadata': { 'helium': 'null' } }) self.s3_stubber.add_response( method='put_object', service_response={'VersionId': 'v4'}, expected_params={ 'Body': top_hash.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/latest', 'Metadata': { 'helium': 'null' } }) new_pkg = Package() # Create a dummy file to add to the package. contents = 'blah' test_file = Path('bar') test_file.write_text(contents) new_pkg = new_pkg.set('foo', test_file) with patch('time.time', return_value=1234567890): new_pkg.push('Quilt/package', 's3://my_test_bucket/')
def test_push_restrictions(self): p = Package() # disallow pushing not to the top level of a remote S3 registry with pytest.raises(QuiltException): p.push('Quilt/Test', 's3://test-bucket/foo/bar') # disallow pushing to the local filesystem (use install instead) with pytest.raises(QuiltException): p.push('Quilt/Test', './') # disallow pushing the package manifest to remote but package data to local with pytest.raises(QuiltException): p.push('Quilt/Test', 's3://test-bucket', dest='./') # disallow pushing the pacakge manifest to remote but package data to a different remote with pytest.raises(QuiltException): p.push('Quilt/Test', 's3://test-bucket', dest='s3://other-test-bucket')
def upload_test_resources(args: Args): # Try running the download pipeline try: # Get test resources dir resources_dir = (Path(__file__).parent.parent / "aicsimageio" / "tests" / "resources").resolve(strict=True) # Report with directory will be used for upload log.info(f"Using contents of directory: {resources_dir}") # Create quilt package package = Package() package.set_dir("resources", resources_dir) # Report package contents log.info(f"Package contents: {package}") # Construct package name package_name = "aicsimageio/test_resources" # Check for dry run if args.dry_run: # Attempt to build the package built = package.build(package_name) # Get resolved save path manifest_save_path = Path("upload_manifest.jsonl").resolve() with open(manifest_save_path, "w") as manifest_write: package.dump(manifest_write) # Report where manifest was saved log.info( f"Dry run generated manifest stored to: {manifest_save_path}") log.info( f"Completed package dry run. Result hash: {built.top_hash}") # Upload else: # Check pre-approved push if args.preapproved: confirmation = True else: # Get upload confirmation confirmation = None while confirmation is None: # Get user input user_input = input("Upload [y]/n? ") # If the user simply pressed enter assume yes if len(user_input) == 0: user_input = "y" # Get first character and lowercase else: user_input = user_input[0].lower() # Set confirmation from None to a value if user_input == "y": confirmation = True elif user_input == "n": confirmation = False # Check confirmation if confirmation: pushed = package.push( package_name, "s3://aics-modeling-packages-test-resources", message= f"Test resources for `aicsimageio` version: {__version__}.", ) log.info( f"Completed package push. Result hash: {pushed.top_hash}") else: log.info(f"Upload canceled.") # Catch any exception except Exception as e: log.error("=============================================") if args.debug: log.error("\n\n" + traceback.format_exc()) log.error("=============================================") log.error("\n\n" + str(e) + "\n") log.error("=============================================") sys.exit(1)
def run_benchmarks(args: Args): # Results are stored as they are returned all_results = {} # Try running the benchmarks try: # Get benchmark resources dir resources_dir = Path( ).parent.parent / "aicsimageio" / "tests" / "resources" # Store machine config _ = { "platform": platform.system(), "platform_version": platform.version(), "architecture": platform.machine(), "cpu_total_count": psutil.cpu_count(), "cpu_current_utilization": psutil.cpu_percent(), "memory_total_gb": psutil.virtual_memory().total / 10e8, "memory_available_gb": psutil.virtual_memory().available / 10e8, } # Store python config pyversion = sys.version_info _ = { "python_version": f"{pyversion.major}.{pyversion.minor}.{pyversion.micro}", "aicsimageio": aicsimageio.__version__, "czifile": czifile.__version__, "imageio": imageio.__version__, "tifffile": tifffile.__version__, } # Run tests ####################################################################### log.info(f"Running tests: no cluster...") log.info(f"=" * 80) all_results["no-cluster"] = _run_benchmark_suite( resources_dir=resources_dir) ####################################################################### for cluster_config in CLUSTER_CONFIGS: total_cores = cluster_config["per_worker_cores"] * cluster_config[ "workers"] log.info(f"Running tests: {cluster_config['name']} " f"(Total cores: {total_cores}) ...") log.info(f"=" * 80) # Create or get log dir # Do not include ms log_dir_name = datetime.now().isoformat().split(".")[0] log_dir = Path(f".dask_logs/{log_dir_name}").expanduser() # Log dir settings log_dir.mkdir(parents=True, exist_ok=True) # Calc per_worker_memory per_worker_memory = cluster_config["per_worker_cores"] * 2 per_worker_memory = f"{per_worker_memory}GB" # Create cluster cluster = SLURMCluster( cores=cluster_config["per_worker_cores"], memory=per_worker_memory, queue="aics_cpu_general", walltime="10:00:00", local_directory=str(log_dir), log_directory=str(log_dir), ) # Scale cluster cluster.scale(cluster_config["workers"]) # Create client connection client = Client(cluster) # Wait for a minute for the cluster to fully spin up time.sleep(60) # Run benchmark all_results[cluster_config["name"]] = _run_benchmark_suite( resources_dir=resources_dir) client.shutdown() cluster.close() # Wait for a minute for the cluster to fully shutdown time.sleep(60) ####################################################################### log.info(f"Completed all tests") log.info(f"=" * 80) # Ensure save dir exists and save results args.save_path.parent.mkdir(parents=True, exist_ok=True) with open(args.save_path, "w") as write_out: json.dump(all_results, write_out) # Construct and push package if args.upload: p = Package() p.set("results.json", args.save_path) p.push( "aicsimageio/benchmarks", "s3://aics-modeling-packages-test-resources", message=f"aicsimageio version: {aicsimageio.__version__}", ) # Catch any exception except Exception as e: log.error("=============================================") if args.debug: log.error("\n\n" + traceback.format_exc()) log.error("=============================================") log.error("\n\n" + str(e) + "\n") log.error("=============================================") sys.exit(1)
def test_load_into_quilt(self): """ Verify loading local manifest and data into S3. """ top_hash1 = 'abbf5f171cf20bfb2313ecd8684546958cd72ac4f3ec635e4510d9c771168226' self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v1' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo1', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v1' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo2', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v2' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': '.quilt/packages/' + top_hash1, } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v3' }, expected_params={ 'Body': top_hash1.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/1234567890', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v4' }, expected_params={ 'Body': top_hash1.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/latest', } ) new_pkg = Package() # Create two dummy files to add to the package. test_file1 = Path('bar1') test_file1.write_text('blah') new_pkg.set('foo1', test_file1) test_file2 = Path('bar2') test_file2.write_text('omg') new_pkg.set('foo2', test_file1) with patch('time.time', return_value=1234567890), \ patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1): remote_pkg = new_pkg.push('Quilt/package', 's3://my_test_bucket/') # Modify one file, and check that only that file gets uploaded. top_hash2 = 'd4efbb1734a53726d97086824d153e6cb5e9d8bc31d15ead0dbc019022cfe539' self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v2' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': 'Quilt/package/foo2', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v2' }, expected_params={ 'Body': ANY, 'Bucket': 'my_test_bucket', 'Key': '.quilt/packages/' + top_hash2, } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v3' }, expected_params={ 'Body': top_hash2.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/1234567891', } ) self.s3_stubber.add_response( method='put_object', service_response={ 'VersionId': 'v4' }, expected_params={ 'Body': top_hash2.encode(), 'Bucket': 'my_test_bucket', 'Key': '.quilt/named_packages/Quilt/package/latest', } ) test_file3 = Path('bar3') test_file3.write_text('!!!') remote_pkg.set('foo2', test_file3) with patch('time.time', return_value=1234567891), \ patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1): remote_pkg.push('Quilt/package', 's3://my_test_bucket/')