Esempio n. 1
0
    def test_set_package_entry_as_object(self):
        pkg = Package()
        nasty_string = 'a,"\tb'
        num_col = [11, 22, 33]
        str_col = ['a', 'b', nasty_string]
        df = pd.DataFrame({'col_num': num_col, 'col_str': str_col})

        # Test with serialization_dir set
        pkg.set("mydataframe1.parquet",
                df,
                meta={'user_meta': 'blah'},
                serialization_location=SERIALIZATION_DIR / "df1.parquet")
        pkg.set("mydataframe2.csv",
                df,
                meta={'user_meta': 'blah2'},
                serialization_location=SERIALIZATION_DIR / "df2.csv")
        pkg.set("mydataframe3.tsv",
                df,
                meta={'user_meta': 'blah3'},
                serialization_location=SERIALIZATION_DIR / "df3.tsv")

        # Test without serialization_dir set
        pkg.set("mydataframe4.parquet", df, meta={'user_meta': 'blah4'})
        pkg.set("mydataframe5.csv", df, meta={'user_meta': 'blah5'})
        pkg.set("mydataframe6.tsv", df, meta={'user_meta': 'blah6'})

        for lk, entry in pkg.walk():
            file_path = parse_file_url(urlparse(entry.get()))
            assert pathlib.Path(
                file_path).exists(), "The serialization files should exist"

            self.file_sweeper_path_list.append(
                file_path)  # Make sure files get deleted even if test fails

        pkg._fix_sha256()
        for lk, entry in pkg.walk():
            assert df.equals(entry.deserialize()), "The deserialized PackageEntry should be equal to the object that " \
                                                   "was serialized"

        # Test that push cleans up the temporary files, if and only if the serialization_location was not set
        with patch('botocore.client.BaseClient._make_api_call', new=mock_make_api_call), \
            patch('quilt3.Package._materialize') as materialize_mock, \
            patch('quilt3.Package.build') as build_mock:
            materialize_mock.return_value = pkg

            pkg.push('Quilt/test_pkg_name', 's3://test-bucket')

        for lk in [
                "mydataframe1.parquet", "mydataframe2.csv", "mydataframe3.tsv"
        ]:
            file_path = parse_file_url(urlparse(pkg.get(lk)))
            assert pathlib.Path(file_path).exists(
            ), "These files should not have been deleted during push()"

        for lk in [
                "mydataframe4.parquet", "mydataframe5.csv", "mydataframe6.tsv"
        ]:
            file_path = parse_file_url(urlparse(pkg.get(lk)))
            assert not pathlib.Path(file_path).exists(
            ), "These temp files should have been deleted during push()"
Esempio n. 2
0
    def test_load_into_quilt(self):
        """ Verify loading local manifest and data into S3. """
        top_hash = '5333a204bbc6e21607c2bc842f4a77d2e21aa6147cf2bf493dbf6282188d01ca'

        self.s3_stubber.add_response(method='put_object',
                                     service_response={'VersionId': 'v1'},
                                     expected_params={
                                         'Body': ANY,
                                         'Bucket': 'my_test_bucket',
                                         'Key': 'Quilt/package/foo',
                                         'Metadata': {
                                             'helium': '{}'
                                         }
                                     })

        self.s3_stubber.add_response(method='put_object',
                                     service_response={'VersionId': 'v2'},
                                     expected_params={
                                         'Body': ANY,
                                         'Bucket': 'my_test_bucket',
                                         'Key': '.quilt/packages/' + top_hash,
                                         'Metadata': {
                                             'helium': 'null'
                                         }
                                     })

        self.s3_stubber.add_response(
            method='put_object',
            service_response={'VersionId': 'v3'},
            expected_params={
                'Body': top_hash.encode(),
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/named_packages/Quilt/package/1234567890',
                'Metadata': {
                    'helium': 'null'
                }
            })

        self.s3_stubber.add_response(
            method='put_object',
            service_response={'VersionId': 'v4'},
            expected_params={
                'Body': top_hash.encode(),
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/named_packages/Quilt/package/latest',
                'Metadata': {
                    'helium': 'null'
                }
            })

        new_pkg = Package()
        # Create a dummy file to add to the package.
        contents = 'blah'
        test_file = Path('bar')
        test_file.write_text(contents)
        new_pkg = new_pkg.set('foo', test_file)

        with patch('time.time', return_value=1234567890):
            new_pkg.push('Quilt/package', 's3://my_test_bucket/')
Esempio n. 3
0
    def test_push_restrictions(self):
        p = Package()

        # disallow pushing not to the top level of a remote S3 registry
        with pytest.raises(QuiltException):
            p.push('Quilt/Test', 's3://test-bucket/foo/bar')

        # disallow pushing to the local filesystem (use install instead)
        with pytest.raises(QuiltException):
            p.push('Quilt/Test', './')

        # disallow pushing the package manifest to remote but package data to local
        with pytest.raises(QuiltException):
            p.push('Quilt/Test', 's3://test-bucket', dest='./')

        # disallow pushing the pacakge manifest to remote but package data to a different remote
        with pytest.raises(QuiltException):
            p.push('Quilt/Test', 's3://test-bucket', dest='s3://other-test-bucket')
Esempio n. 4
0
def upload_test_resources(args: Args):
    # Try running the download pipeline
    try:
        # Get test resources dir
        resources_dir = (Path(__file__).parent.parent / "aicsimageio" /
                         "tests" / "resources").resolve(strict=True)

        # Report with directory will be used for upload
        log.info(f"Using contents of directory: {resources_dir}")

        # Create quilt package
        package = Package()
        package.set_dir("resources", resources_dir)

        # Report package contents
        log.info(f"Package contents: {package}")

        # Construct package name
        package_name = "aicsimageio/test_resources"

        # Check for dry run
        if args.dry_run:
            # Attempt to build the package
            built = package.build(package_name)

            # Get resolved save path
            manifest_save_path = Path("upload_manifest.jsonl").resolve()
            with open(manifest_save_path, "w") as manifest_write:
                package.dump(manifest_write)

            # Report where manifest was saved
            log.info(
                f"Dry run generated manifest stored to: {manifest_save_path}")
            log.info(
                f"Completed package dry run. Result hash: {built.top_hash}")

        # Upload
        else:
            # Check pre-approved push
            if args.preapproved:
                confirmation = True
            else:
                # Get upload confirmation
                confirmation = None
                while confirmation is None:
                    # Get user input
                    user_input = input("Upload [y]/n? ")

                    # If the user simply pressed enter assume yes
                    if len(user_input) == 0:
                        user_input = "y"
                    # Get first character and lowercase
                    else:
                        user_input = user_input[0].lower()

                        # Set confirmation from None to a value
                        if user_input == "y":
                            confirmation = True
                        elif user_input == "n":
                            confirmation = False

            # Check confirmation
            if confirmation:
                pushed = package.push(
                    package_name,
                    "s3://aics-modeling-packages-test-resources",
                    message=
                    f"Test resources for `aicsimageio` version: {__version__}.",
                )

                log.info(
                    f"Completed package push. Result hash: {pushed.top_hash}")
            else:
                log.info(f"Upload canceled.")

    # Catch any exception
    except Exception as e:
        log.error("=============================================")
        if args.debug:
            log.error("\n\n" + traceback.format_exc())
            log.error("=============================================")
        log.error("\n\n" + str(e) + "\n")
        log.error("=============================================")
        sys.exit(1)
Esempio n. 5
0
def run_benchmarks(args: Args):
    # Results are stored as they are returned
    all_results = {}

    # Try running the benchmarks
    try:
        # Get benchmark resources dir
        resources_dir = Path(
        ).parent.parent / "aicsimageio" / "tests" / "resources"

        # Store machine config
        _ = {
            "platform": platform.system(),
            "platform_version": platform.version(),
            "architecture": platform.machine(),
            "cpu_total_count": psutil.cpu_count(),
            "cpu_current_utilization": psutil.cpu_percent(),
            "memory_total_gb": psutil.virtual_memory().total / 10e8,
            "memory_available_gb": psutil.virtual_memory().available / 10e8,
        }

        # Store python config
        pyversion = sys.version_info
        _ = {
            "python_version":
            f"{pyversion.major}.{pyversion.minor}.{pyversion.micro}",
            "aicsimageio": aicsimageio.__version__,
            "czifile": czifile.__version__,
            "imageio": imageio.__version__,
            "tifffile": tifffile.__version__,
        }

        # Run tests
        #######################################################################

        log.info(f"Running tests: no cluster...")
        log.info(f"=" * 80)

        all_results["no-cluster"] = _run_benchmark_suite(
            resources_dir=resources_dir)

        #######################################################################

        for cluster_config in CLUSTER_CONFIGS:
            total_cores = cluster_config["per_worker_cores"] * cluster_config[
                "workers"]
            log.info(f"Running tests: {cluster_config['name']} "
                     f"(Total cores: {total_cores}) ...")
            log.info(f"=" * 80)

            # Create or get log dir
            # Do not include ms
            log_dir_name = datetime.now().isoformat().split(".")[0]
            log_dir = Path(f".dask_logs/{log_dir_name}").expanduser()
            # Log dir settings
            log_dir.mkdir(parents=True, exist_ok=True)

            # Calc per_worker_memory
            per_worker_memory = cluster_config["per_worker_cores"] * 2
            per_worker_memory = f"{per_worker_memory}GB"

            # Create cluster
            cluster = SLURMCluster(
                cores=cluster_config["per_worker_cores"],
                memory=per_worker_memory,
                queue="aics_cpu_general",
                walltime="10:00:00",
                local_directory=str(log_dir),
                log_directory=str(log_dir),
            )

            # Scale cluster
            cluster.scale(cluster_config["workers"])

            # Create client connection
            client = Client(cluster)

            # Wait for a minute for the cluster to fully spin up
            time.sleep(60)

            # Run benchmark
            all_results[cluster_config["name"]] = _run_benchmark_suite(
                resources_dir=resources_dir)

            client.shutdown()
            cluster.close()

            # Wait for a minute for the cluster to fully shutdown
            time.sleep(60)

        #######################################################################

        log.info(f"Completed all tests")
        log.info(f"=" * 80)

        # Ensure save dir exists and save results
        args.save_path.parent.mkdir(parents=True, exist_ok=True)
        with open(args.save_path, "w") as write_out:
            json.dump(all_results, write_out)

        # Construct and push package
        if args.upload:
            p = Package()
            p.set("results.json", args.save_path)
            p.push(
                "aicsimageio/benchmarks",
                "s3://aics-modeling-packages-test-resources",
                message=f"aicsimageio version: {aicsimageio.__version__}",
            )

    # Catch any exception
    except Exception as e:
        log.error("=============================================")
        if args.debug:
            log.error("\n\n" + traceback.format_exc())
            log.error("=============================================")
        log.error("\n\n" + str(e) + "\n")
        log.error("=============================================")
        sys.exit(1)
Esempio n. 6
0
    def test_load_into_quilt(self):
        """ Verify loading local manifest and data into S3. """
        top_hash1 = 'abbf5f171cf20bfb2313ecd8684546958cd72ac4f3ec635e4510d9c771168226'

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v1'
            },
            expected_params={
                'Body': ANY,
                'Bucket': 'my_test_bucket',
                'Key': 'Quilt/package/foo1',
            }
        )

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v1'
            },
            expected_params={
                'Body': ANY,
                'Bucket': 'my_test_bucket',
                'Key': 'Quilt/package/foo2',
            }
        )

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v2'
            },
            expected_params={
                'Body': ANY,
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/packages/' + top_hash1,
            }
        )

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v3'
            },
            expected_params={
                'Body': top_hash1.encode(),
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/named_packages/Quilt/package/1234567890',
            }
        )

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v4'
            },
            expected_params={
                'Body': top_hash1.encode(),
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/named_packages/Quilt/package/latest',
            }
        )

        new_pkg = Package()
        # Create two dummy files to add to the package.
        test_file1 = Path('bar1')
        test_file1.write_text('blah')
        new_pkg.set('foo1', test_file1)
        test_file2 = Path('bar2')
        test_file2.write_text('omg')
        new_pkg.set('foo2', test_file1)

        with patch('time.time', return_value=1234567890), \
             patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1):
            remote_pkg = new_pkg.push('Quilt/package', 's3://my_test_bucket/')

        # Modify one file, and check that only that file gets uploaded.
        top_hash2 = 'd4efbb1734a53726d97086824d153e6cb5e9d8bc31d15ead0dbc019022cfe539'

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v2'
            },
            expected_params={
                'Body': ANY,
                'Bucket': 'my_test_bucket',
                'Key': 'Quilt/package/foo2',
            }
        )

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v2'
            },
            expected_params={
                'Body': ANY,
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/packages/' + top_hash2,
            }
        )

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v3'
            },
            expected_params={
                'Body': top_hash2.encode(),
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/named_packages/Quilt/package/1234567891',
            }
        )

        self.s3_stubber.add_response(
            method='put_object',
            service_response={
                'VersionId': 'v4'
            },
            expected_params={
                'Body': top_hash2.encode(),
                'Bucket': 'my_test_bucket',
                'Key': '.quilt/named_packages/Quilt/package/latest',
            }
        )

        test_file3 = Path('bar3')
        test_file3.write_text('!!!')
        remote_pkg.set('foo2', test_file3)

        with patch('time.time', return_value=1234567891), \
             patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1):
            remote_pkg.push('Quilt/package', 's3://my_test_bucket/')