def download_test_resources(args: Args):
    # Try running the download pipeline
    try:
        # Get test resources dir
        resources_dir = (
            Path(__file__).parent.parent / "aicsimageio" / "tests" / "resources"
        ).resolve()
        resources_dir.mkdir(exist_ok=True)

        # Get quilt package
        package = Package.browse(
            "aicsimageio/test_resources",
            "s3://aics-modeling-packages-test-resources",
            top_hash=args.top_hash,
        )

        # Download
        package["resources"].fetch(resources_dir)

        log.info(f"Completed package download.")

    # Catch any exception
    except Exception as e:
        log.error("=============================================")
        if args.debug:
            log.error("\n\n" + traceback.format_exc())
            log.error("=============================================")
        log.error("\n\n" + str(e) + "\n")
        log.error("=============================================")
        sys.exit(1)
Beispiel #2
0
    def exec_module(cls, module):
        """
        Module executor.
        """
        name_parts = module.__name__.split('.')
        registry = get_from_config('default_local_registry')

        if module.__name__ == 'quilt3.data':
            # __path__ must be set even if the package is virtual. Since __path__ will be
            # scanned by all other finders preceding this one in sys.meta_path order, make sure
            # it points to someplace lacking importable objects
            module.__path__ = MODULE_PATH
            return module

        elif len(name_parts) == 3:  # e.g. module.__name__ == quilt3.data.foo
            namespace = name_parts[2]

            # we do not know the name the user will ask for, so populate all valid names
            for pkg in list_packages():
                pkg_user, pkg_name = pkg.split('/')
                if pkg_user == namespace:
                    module.__dict__[pkg_name] = Package.browse(
                        pkg, registry=registry)

            module.__path__ = MODULE_PATH
            return module

        else:
            assert False
Beispiel #3
0
    def test_diff(self):
        new_pkg = Package()

        # Create a dummy file to add to the package.
        test_file_name = 'bar'
        with open(test_file_name, "w") as fd:
            fd.write('test_file_content_string')
            test_file = Path(fd.name)

        # Build a new package into the local registry.
        new_pkg = new_pkg.set('foo', test_file_name)
        top_hash = new_pkg.build("Quilt/Test")

        p1 = Package.browse('Quilt/Test')
        p2 = Package.browse('Quilt/Test')
        assert p1.diff(p2) == ([], [], [])
Beispiel #4
0
    def test_manifest(self):
        pkg = Package()
        pkg.set('as/df', LOCAL_MANIFEST)
        pkg.set('as/qw', LOCAL_MANIFEST)
        top_hash = pkg.build('foo/bar').top_hash
        manifest = list(pkg.manifest)

        pkg2 = Package.browse('foo/bar', top_hash=top_hash)
        assert list(pkg.manifest) == list(pkg2.manifest)
Beispiel #5
0
    def test_top_hash_stable(self):
        """Ensure that top_hash() never changes for a given manifest"""

        registry = DATA_DIR.as_posix()
        top_hash = '20de5433549a4db332a11d8d64b934a82bdea8f144b4aecd901e7d4134f8e733'

        pkg = Package.browse('foo/bar', registry=registry, top_hash=top_hash)

        assert pkg.top_hash == top_hash, \
            "Unexpected top_hash for {}/packages/.quilt/packages/{}".format(registry, top_hash)
Beispiel #6
0
    def test_rollback(self):
        p = Package()
        p.set('foo', DATA_DIR / 'foo.txt')
        p.build('quilt/tmp')

        good_hash = p.top_hash

        assert 'foo' in Package.browse('quilt/tmp')

        p.delete('foo')
        p.build('quilt/tmp')

        assert 'foo' not in Package.browse('quilt/tmp')

        Package.rollback('quilt/tmp', LOCAL_REGISTRY, good_hash)

        assert 'foo' in Package.browse('quilt/tmp')

        with self.assertRaises(QuiltException):
            Package.rollback('quilt/tmp', LOCAL_REGISTRY, '12345678' * 8)

        with self.assertRaises(QuiltException):
            Package.rollback('quilt/blah', LOCAL_REGISTRY, good_hash)
Beispiel #7
0
    def test_browse_package_from_registry(self):
        """ Verify loading manifest locally and from s3 """
        with patch('quilt3.Package._from_path') as pkgmock:
            registry = LOCAL_REGISTRY.resolve().as_uri()
            pkg = Package()
            pkgmock.return_value = pkg
            top_hash = pkg.top_hash

            pkg = Package.browse('Quilt/nice-name', top_hash=top_hash)
            assert '{}/.quilt/packages/{}'.format(registry, top_hash) \
                    in [x[0][0] for x in pkgmock.call_args_list]

            pkgmock.reset_mock()

            with patch('quilt3.packages.get_bytes') as dl_mock:
                dl_mock.return_value = (top_hash.encode('utf-8'), None)
                pkg = Package.browse('Quilt/nice-name')
                assert registry + '/.quilt/named_packages/Quilt/nice-name/latest' \
                        == dl_mock.call_args_list[0][0][0]

            assert '{}/.quilt/packages/{}'.format(registry, top_hash) \
                    in [x[0][0] for x in pkgmock.call_args_list]
            pkgmock.reset_mock()

            remote_registry = 's3://asdf/foo'
            # remote load
            pkg = Package.browse('Quilt/nice-name',
                                 registry=remote_registry,
                                 top_hash=top_hash)
            assert '{}/.quilt/packages/{}'.format(remote_registry, top_hash) \
                    in [x[0][0] for x in pkgmock.call_args_list]
            pkgmock.reset_mock()
            pkg = Package.browse('Quilt/nice-name',
                                 top_hash=top_hash,
                                 registry=remote_registry)
            assert '{}/.quilt/packages/{}'.format(remote_registry, top_hash) \
                    in [x[0][0] for x in pkgmock.call_args_list]

            pkgmock.reset_mock()
            with patch('quilt3.packages.get_bytes') as dl_mock:
                dl_mock.return_value = (top_hash.encode('utf-8'), None)
                pkg = Package.browse('Quilt/nice-name',
                                     registry=remote_registry)
            assert '{}/.quilt/packages/{}'.format(remote_registry, top_hash) \
                    in [x[0][0] for x in pkgmock.call_args_list]

            # registry failure case
            with patch('quilt3.packages.get_from_config',
                       return_value=fix_url(os.path.dirname(__file__))):
                with pytest.raises(FileNotFoundError):
                    Package.browse('Quilt/nice-name')
def download_test_resources() -> None:
    root = Path(__file__).parent.parent.parent
    resources = (root / "aicsimageio" / "aicsimageio" / "tests" /
                 "resources").resolve()

    # Get the specific hash for test resources
    with open(root / "aicsimageio" / "scripts" / "TEST_RESOURCES_HASH.txt",
              "r") as f:
        top_hash = f.readline().strip()

    # Download test resources
    resources.mkdir(exist_ok=True)
    package = Package.browse(
        "aicsimageio/test_resources",
        "s3://aics-modeling-packages-test-resources",
        top_hash=top_hash,
    )
    package["resources"].fetch(resources)
Beispiel #9
0
def chart_benchmarks(args: Args):
    # Check save dir exists or create
    args.save_dir.mkdir(parents=True, exist_ok=True)

    # Get file
    if args.benchmark_file is None:
        benchmark_filepath = Path("benchmark_results.json")
        p = Package.browse(
            "aicsimageio/benchmarks", "s3://aics-modeling-packages-test-resources"
        )
        p["results.json"].fetch(benchmark_filepath)
    else:
        benchmark_filepath = args.benchmark_file

    # Read results file
    with open(benchmark_filepath, "r") as read_in:
        all_results = json.load(read_in)

    # Generate charts for each config
    per_cluster_results = []
    selected_cluster_results = []
    for config_name, results in all_results.items():
        results = pd.DataFrame(results)
        results["config"] = config_name

        # Add to all
        per_cluster_results.append(results)

        # Add to primary viz
        if config_name in SELECTED_CLUSTERS_TO_VISUALIZE:
            selected_cluster_results.append(results)

        chart = _generate_chart(results)
        chart.save(str(args.save_dir / f"{config_name}.png"))

    # Generate unified chart
    all_results = pd.concat(per_cluster_results)
    unified_chart = _generate_chart(all_results)
    unified_chart.save(str(args.save_dir / "all.png"))

    # Generate unified primary chart
    primary_results = pd.concat(selected_cluster_results)
    unified_chart = _generate_chart(primary_results, sorted=True)
    unified_chart.save(str(args.save_dir / "primary.png"))
Beispiel #10
0
import os
import pandas as pd
from tqdm import tqdm
from quilt3distribute import Dataset
from quilt3 import Package

# Downlaod the datasets from Quilt if there is no local copy

ds_folder = "../database/"

if not os.path.exists(os.path.join(ds_folder, "metadata.csv")):

    pkg = Package.browse("matheus/assay_dev_datasets",
                         "s3://allencell-internal-quilt").fetch(ds_folder)

metadata = pd.read_csv(os.path.join(ds_folder, "metadata.csv"))

df_meta = pd.read_csv(os.path.join(ds_folder, metadata.database_path[0]),
                      index_col=0)

# FOVs that could not be read from the server
# We shall come back to this files in the future.
fovs_with_read_problems = [40, 135, 462, 2000]

# Gathering results
df = []

for FOVId in tqdm(df_meta.index):

    if FOVId not in fovs_with_read_problems:
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Download the model weights from Quilt if the folder `best_model` is empty

model_weights_path = []
if os.path.exists("../best_model"):
    for f in os.listdir("../best_model"):
        # Search for pth files
        if f.endswith(".pth"):
            model_weights_path.append(os.path.join("..", "best_model", f))
if not model_weights_path:
    # Download from Quilt
    print("No weights were found locally. Downloading from Quilt...")
    pkg = Package.browse(
        "matheus/assay_dev_actn2_classifier", "s3://allencell-internal-quilt"
    ).fetch("../best_model")
    metadata = pd.read_csv(os.path.join("..", "best_model", "metadata.csv"))
    model_weights_path = os.path.join("..", "best_model", metadata.model_path[0])
elif len(model_weights_path) > 1:
    # Use the last one in case more than 1 are found
    model_weights_path = model_weights_path[-1]
    print(f"More than 1 weight file found. Using the last one: {model_weights_path}.")
else:
    # Only one file found
    model_weights_path = model_weights_path[0]

# Load weights
classifier = cardio_cnn(model_path=model_weights_path)

# Segment the images for background calculation
Beispiel #12
0
    def __init__(self,
                 num_batches,
                 BATCH_SIZE,
                 model_kwargs,
                 shuffle=True,
                 corr=False,
                 train=True,
                 mask=False):
        """
        Args:
            num_batches: Number of batches of synthetic data
            BATCH_SIZE: batchsize of synthetic data
            model_kwargs: dictionary containing "x_dim"
            which indicates input data size
            shuffle:  True sets condition vector in input data to 0
            for all possible permutations
            corr: True sets dependent input dimensions
            via a correlation matrix
        """
        self.num_batches = num_batches
        self.BATCH_SIZE = BATCH_SIZE
        self.corr = corr
        self.shuffle = shuffle
        self.model_kwargs = model_kwargs
        self.train = train

        Batches_C_train, Batches_C_test = torch.empty([0]), torch.empty([0])
        Batches_X_train, Batches_X_test = torch.empty([0]), torch.empty([0])
        Batches_conds_train, Batches_conds_test = torch.empty(
            [0]), torch.empty([0])

        ds = Package.browse("aics/pipeline_integrated_single_cell",
                            "s3://allencell")

        # Specify path to pre downloaded quilt json files
        try:
            path_to_json = model_kwargs['json_quilt_path']
        except:
            path_to_json = "/home/ritvik.vasan/test/"

        # json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]

        meta_to_file_name = []
        for f in ds["cell_features"]:
            meta_to_file_name.append({
                "filename": f,
                **ds["cell_features"][f].meta
            })

        metas = pd.DataFrame(meta_to_file_name)

        # Specify path to config file for FeatureDatabase
        try:
            db = FeatureDatabase(model_kwargs['config_path'])
        except:
            db = FeatureDatabase("/home/ritvik.vasan/config.json")

        t = db.get_pg_table("featuresets",
                            "aics-mitosis-classifier-four-stage_v1.0.0")

        semi = metas.merge(t,
                           left_on="CellId",
                           right_on="CellId",
                           suffixes=("_meta", "_mito"))

        # Only interphase or no interphase
        semi['Interphase and Mitotic Stages [stage]'] = semi[
            'Interphase and Mitotic Stages [stage]'].apply(lambda x: 0
                                                           if x == 0.0 else 1)

        dd = defaultdict(list)
        for i in range(len(semi['filename'])):
            this_file = semi['filename'][i]
            a = json.loads(open(path_to_json + this_file).read())
            a = dict([(key, value) for key, value in a.items() if key not in [
                'imsize_orig', 'com', 'angle', 'flipdim', 'imsize_registered'
            ]])
            a.update({'CellId': semi['CellId'][i]})
            for key, value in a.items():
                dd[key].append(value)

        features_plus_cellid = pd.DataFrame(dict(dd))

        meta_plus_features = pd.merge(semi, features_plus_cellid, on='CellId')

        i_care_cols = [
            c for c in meta_plus_features.columns if c not in [
                'CellId', 'CellIndex', 'FOVId', 'WellId', 'FeatureExplorerURL',
                'CellLine', 'Workflow', 'associates', 'filename',
                'NucMembSegmentationAlgorithm',
                'NucMembSegmentationAlgorithmVersion', 'PlateId'
            ]
        ]

        meta_plus_features = meta_plus_features[i_care_cols]
        meta_plus_features.dropna(inplace=True)

        categorical_features = [
            'Gene', 'ProteinDisplayName', 'StructureDisplayName'
        ]

        categorical_dataframe = meta_plus_features[categorical_features]

        non_categorical_dataframe = meta_plus_features[[
            c for c in meta_plus_features.columns
            if c not in categorical_features
        ]]

        one_hot_categorical_features = pd.get_dummies(categorical_dataframe,
                                                      prefix=None,
                                                      drop_first=True)

        # num_of_cells = len(non_categorical_dataframe)

        # This is mean, std normalization
        non_categorical_dataframe = non_categorical_dataframe.iloc[:, :]

        # print(non_categorical_dataframe.shape)

        self._feature_names = [
            i for i in non_categorical_dataframe.columns
        ] + [i for i in one_hot_categorical_features.columns]

        num_training_samples = 33000

        x = non_categorical_dataframe.values
        std_scaler = preprocessing.StandardScaler()
        # 0 is binary, dont scale that column
        x_train_and_test_scaled = std_scaler.fit_transform(
            x[:, 1:model_kwargs["x_dim"] + 1])
        x_train_scaled = std_scaler.fit_transform(
            x[:num_training_samples, 1:model_kwargs["x_dim"] + 1])
        x_test_scaled = std_scaler.transform(x[num_training_samples:,
                                               1:model_kwargs["x_dim"] + 1])

        if model_kwargs["x_dim"] > 103:
            non_categorical_train = pd.DataFrame(
                np.concatenate((x[:num_training_samples, 0:1], x_train_scaled),
                               axis=1))
            non_categorical_test = pd.DataFrame(
                np.concatenate((x[num_training_samples:, 0:1], x_test_scaled),
                               axis=1))
            non_categorical_train_and_test = pd.DataFrame(
                np.concatenate((x[:, 0:1], x_train_and_test_scaled), axis=1))
            # print(non_categorical_train.shape, non_categorical_test.shape)
            # print(len(self._feature_names))
            # print(non_categorical_train_and_test.shape)
            non_categorical_train_and_test.columns = self._feature_names[:103]
        else:
            non_categorical_train = pd.DataFrame(x_train_scaled)
            non_categorical_test = pd.DataFrame(x_test_scaled)
            non_categorical_train_and_test = pd.DataFrame(
                x_train_and_test_scaled)
            self._feature_names = self._feature_names[1:model_kwargs['x_dim'] +
                                                      1]
            non_categorical_train_and_test.columns = self._feature_names[:]
        # print(non_categorical_train.shape, non_categorical_test.shape, len(self._feature_names))

        # Convert to torch tensor
        self._non_categorical_dataframe = non_categorical_train_and_test
        self._categorical_dataframe = one_hot_categorical_features

        X_train_whole_batch = torch.from_numpy(
            non_categorical_train.values).float()
        X_test_whole_batch = torch.from_numpy(
            non_categorical_test.values).float()
        all_categorical_X = torch.from_numpy(
            one_hot_categorical_features.values).float()

        if model_kwargs["x_dim"] > 103:
            X_train_whole_batch = torch.cat(
                (X_train_whole_batch,
                 all_categorical_X[:num_training_samples, :]), 1)
            X_test_whole_batch = torch.cat(
                (X_test_whole_batch,
                 all_categorical_X[num_training_samples:, :]), 1)

        for j, i in enumerate(range(self.num_batches)):
            X_train = X_train_whole_batch[i * self.BATCH_SIZE:(i + 1) *
                                          self.BATCH_SIZE, :]
            X_test = X_test_whole_batch[i * self.BATCH_SIZE:(i + 1) *
                                        self.BATCH_SIZE, :]

            if X_train.size()[0] != self.BATCH_SIZE:
                break

            # print(X_train.size(), X_test.size())
            # print(Batches_X_train.size(), Batches_X_test.size())

            self._color = X_train[:, 0]

            C_train = X_train.clone()
            C_test = X_test.clone()

            count = 0
            if self.shuffle is True:
                while count == 0:
                    C_mask_train = torch.zeros(C_train.shape).bernoulli_(0.5)
                    C_mask_test = torch.zeros(C_test.shape).bernoulli_(0.5)
                    count = 1
            else:
                C_mask_train = torch.zeros(C_train.shape).bernoulli_(0)
                C_mask_test = torch.zeros(C_test.shape).bernoulli_(0)

            C_train[C_mask_train.byte()] = 0
            C_train_indicator = C_mask_train == 0

            C_test[C_mask_test.byte()] = 0
            C_test_indicator = C_mask_test == 0

            C_train = torch.cat(
                [C_train.float(), C_train_indicator.float()], 1)
            C_test = torch.cat([C_test.float(), C_test_indicator.float()], 1)

            X_train = X_train.view([1, -1, X_train.size()[-1]])
            X_test = X_test.view([1, -1, X_test.size()[-1]])
            C_train = C_train.view([1, -1, X_train.size()[-1] * 2])
            C_test = C_test.view([1, -1, X_test.size()[-1] * 2])

            # Sum up
            conds_train = C_train[:, :, X_train.size()[-1]:].sum(2)
            conds_test = C_test[:, :, X_test.size()[-1]:].sum(2)

            Batches_X_train = torch.cat([Batches_X_train, X_train], 0)
            Batches_C_train = torch.cat([Batches_C_train, C_train], 0)
            Batches_conds_train = torch.cat([Batches_conds_train, conds_train],
                                            0)
            try:
                Batches_X_test = torch.cat([Batches_X_test, X_test], 0)
                Batches_C_test = torch.cat([Batches_C_test, C_test], 0)
                Batches_conds_test = torch.cat(
                    [Batches_conds_test, conds_test], 0)
            except:
                pass

        self._batches_x_train = Batches_X_train
        self._batches_c_train = Batches_C_train
        self._batches_conds_train = Batches_conds_train

        self._batches_x_test = Batches_X_test
        self._batches_c_test = Batches_C_test
        self._batches_conds_test = Batches_conds_test
Beispiel #13
0
import torch.utils.data as data
import torchvision.transforms as transforms

import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

from functions import load_data, dataset_training, cardio_cnn_resnet_18, train, validation

from quilt3 import Package
import pandas as pd

os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

# Load data from quilt
p = Package.browse(
    "matheus/assay_dev_classifier_train", "s3://allencell-internal-quilt"
).fetch("data/")

manifest = pd.read_csv("data/metadata.csv", index_col=0)

# model save path
save_model_path = "./models/"  # save Pytorch models

# set model parameters
data_path = f"data/{manifest.DataPath[0]}"
label_lists = (
    f"data/{manifest.AnnotationDiffusePath[0]}",
    f"data/{manifest.AnnotationFibersPath[0]}",
    f"data/{manifest.AnnotationDisorganizedPunctaPath[0]}",
    f"data/{manifest.AnnotationOrganizedPunctaPath[0]}",
    f"data/{manifest.AnnotationOrganizedZDisks[0]}",
Beispiel #14
0
    def test_remote_browse(self):
        """ Verify loading manifest from s3 """
        registry = 's3://test-bucket'

        top_hash = 'abcdefgh' * 8

        # Make the first request.

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(top_hash.encode()),
            },
            expected_params={
                'Bucket': 'test-bucket',
                'Key': '.quilt/named_packages/Quilt/test/latest',
            }
        )

        self.s3_stubber.add_response(
            method='head_object',
            service_response={
                'VersionId': 'v1',
                'ContentLength': REMOTE_MANIFEST.stat().st_size,
            },
            expected_params={
                'Bucket': 'test-bucket',
                'Key': f'.quilt/packages/{top_hash}',
            }
        )

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(REMOTE_MANIFEST.read_bytes()),
                'ContentLength': REMOTE_MANIFEST.stat().st_size,
            },
            expected_params={
                'Bucket': 'test-bucket',
                'Key': f'.quilt/packages/{top_hash}',
            }
        )

        pkg = Package.browse('Quilt/test', registry=registry)
        assert 'foo' in pkg

        # Make the second request. Gets "latest" - but the rest should be cached.

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(top_hash.encode()),
            },
            expected_params={
                'Bucket': 'test-bucket',
                'Key': '.quilt/named_packages/Quilt/test/latest',
            }
        )

        pkg2 = Package.browse('Quilt/test', registry=registry)
        assert 'foo' in pkg2

        # Make another request with a top hash. Everything should be cached.

        pkg3 = Package.browse('Quilt/test', top_hash=top_hash, registry=registry)
        assert 'foo' in pkg3

        # Make a request with a short hash.

        self.s3_stubber.add_response(
            method='list_objects_v2',
            service_response={
                'Contents': [
                    {
                        'Key': f'.quilt/packages/{top_hash}',
                        'Size': 64,
                    },
                    {
                        'Key': f'.quilt/packages/{"a" * 64}',
                        'Size': 64,
                    }
                ]
            },
            expected_params={
                'Bucket': 'test-bucket',
                'Prefix': '.quilt/packages/',
            }
        )

        pkg3 = Package.browse('Quilt/test', top_hash='abcdef', registry=registry)
        assert 'foo' in pkg3

        # Make a request with a bad short hash.

        with self.assertRaises(QuiltException):
            Package.browse('Quilt/test', top_hash='abcde', registry=registry)
        with self.assertRaises(QuiltException):
            Package.browse('Quilt/test', top_hash='a' * 65, registry=registry)

        # Make a request with a non-existant short hash.

        self.s3_stubber.add_response(
            method='list_objects_v2',
            service_response={
                'Contents': [
                    {
                        'Key': f'.quilt/packages/{top_hash}',
                        'Size': 64,
                    },
                    {
                        'Key': f'.quilt/packages/{"a" * 64}',
                        'Size': 64,
                    }
                ]
            },
            expected_params={
                'Bucket': 'test-bucket',
                'Prefix': '.quilt/packages/',
            }
        )

        with self.assertRaises(QuiltException):
            Package.browse('Quilt/test', top_hash='123456', registry=registry)
Beispiel #15
0
    def test_install(self):
        # Manifest

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(b'abcdef'),
            },
            expected_params={
                'Bucket': 'my-test-bucket',
                'Key': '.quilt/named_packages/Quilt/Foo/latest',
            }
        )

        self.s3_stubber.add_response(
            method='head_object',
            service_response={
                'VersionId': 'v1',
                'ContentLength': REMOTE_MANIFEST.stat().st_size,
            },
            expected_params={
                'Bucket': 'my-test-bucket',
                'Key': '.quilt/packages/abcdef',
            }
        )

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(REMOTE_MANIFEST.read_bytes()),
                'ContentLength': REMOTE_MANIFEST.stat().st_size,
            },
            expected_params={
                'Bucket': 'my-test-bucket',
                'Key': '.quilt/packages/abcdef',
            }
        )

        # Objects

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(b'a,b,c'),
            },
            expected_params={
                'Bucket': 'my_bucket',
                'Key': 'my_data_pkg/bar.csv',
            }
        )

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(b'Hello World!'),
            },
            expected_params={
                'Bucket': 'my_bucket',
                'Key': 'my_data_pkg/baz/bat',
            }
        )

        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO('💩'.encode()),
            },
            expected_params={
                'Bucket': 'my_bucket',
                'Key': 'my_data_pkg/foo',
            }
        )

        with patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1):
            Package.install('Quilt/Foo', registry='s3://my-test-bucket', dest='package')

        p = Package.browse('Quilt/Foo')

        assert p['foo'].get() == 's3://my_bucket/my_data_pkg/foo'

        # Check that the cache works.
        local_path = pathlib.Path(p['foo'].get_cached_path())
        assert local_path == pathlib.Path.cwd() / 'package/foo'
        assert local_path.read_text('utf8') == '💩'

        # Test that get_bytes and get_as_text works
        assert p['foo'].get_bytes().decode("utf-8") == '💩'
        assert p['foo'].get_as_string() == '💩'

        # Check that moving the file invalidates the cache...
        local_path.rename('foo2')
        assert p['foo'].get_cached_path() is None

        # ...but moving it back fixes it.
        pathlib.Path('foo2').rename(local_path)
        assert p['foo'].get_cached_path() == str(local_path)

        # Check that changing the contents invalidates the cache.
        local_path.write_text('omg')
        assert p['foo'].get_cached_path() is None

        # Check that installing the package again reuses the cached manifest and two objects - but not "foo".
        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO(b'abcdef'),
            },
            expected_params={
                'Bucket': 'my-test-bucket',
                'Key': '.quilt/named_packages/Quilt/Foo/latest',
            }
        )
        self.s3_stubber.add_response(
            method='get_object',
            service_response={
                'VersionId': 'v1',
                'Body': BytesIO('💩'.encode()),
            },
            expected_params={
                'Bucket': 'my_bucket',
                'Key': 'my_data_pkg/foo',
            }
        )

        with patch('quilt3.data_transfer.s3_transfer_config.max_request_concurrency', 1):
            Package.install('Quilt/Foo', registry='s3://my-test-bucket', dest='package/')