from hca.dss import DSSClient dss = DSSClient() dss.download( bundle_uuid="ffffaf55-f19c-40e3-aa81-a6c69d357265", version="2019-08-01T200147.836832Z", replica="aws", download_dir="download_test", )
class DSSClientTestCase(TmpDirTestCase): maxDiff = None manifest = list( zip( ('bundle_uuid', 'a_uuid', 'b_uuid', 'c_uuid'), ('bundle_version', '1_version', '1_version', '1_version'), ('file_content_type', 'somestuff', 'somestuff', 'somestuff'), ('file_name', 'a_file_name', 'b_file_name', 'c_file_name'), ('file_sha256', 'ad3fc1e4898e0bce096be5151964a81929dbd2a92bd5ed56a39a8e133053831d', '8F35071EAEEDD9D6F575A8B0F291DAEAC4C1DFDFA133B5C561232A00BF18C4B4', '8f3404db04bdede03e9128a4b48599d0ecde5b2e58ed9ce52ce84c3d54a3429c' ), ('file_size', '12', '2', '41'), ('file_uuid', 'af_uuid', 'bf_uuid', 'cf_uuid'), ('file_version', 'af_version', 'af_version', 'af_version'), ('file_indexed', 'False', 'False', 'False'), )) version_dir = os.path.join('.hca', 'v2', 'files_2_4') def setUp(self): super().setUp() self.dss = DSSClient() self._write_manifest(self.manifest) self.manifest_file = 'manifest.tsv' def tearDown(self): super().tearDown() def _write_manifest(self, manifest): with open('manifest.tsv', 'w') as f: f.write('\n'.join(['\t'.join(row) for row in manifest])) def _write_uniform_manifest(self): """Create a manifest where all files have the same hash""" new_manifest = [self.manifest[0]] for row in self.manifest[1:]: new_row = list(row) new_row[4] = 'fakeHASH' new_manifest.append(new_row) self._write_manifest(new_manifest) def _files_present(self): return { os.path.join(dir_path, f) for dir_path, _, files in walk('.') for f in files } def _mock_download_manifest(self, *args, **kwargs): with patch('hca.dss.DownloadContext._download_file', side_effect=_fake_download_file) as download_func: with patch('hca.dss.DSSClient.get_bundle') as mock_get_bundle: mock_get_bundle.paginate = _make_fake_paginate() self.dss.download_manifest(*args, **kwargs) return download_func def _mock_download(self, *args, **kwargs): with patch('hca.dss.DownloadContext._download_file', side_effect=_fake_download_file): with patch('hca.dss.DSSClient.get_bundle') as mock_get_bundle: mock_get_bundle.paginate = _make_fake_paginate() self.dss.download(*args, **kwargs) def _assert_all_files_downloaded(self, more_files=None, prefix=''): prefix = os.path.join(prefix, self.version_dir) files_present = self._files_present() # Add dots so that files match what `walk()` returns if any([f.startswith('.') for f in files_present]): prefix = os.path.join('.', prefix) files_expected = { os.path.join('.', os.path.basename(self.manifest_file)), os.path.join( prefix, 'ad', '3fc1', 'ad3fc1e4898e0bce096be5151964a81929dbd2a92bd5ed56a39a8e133053831d' ), os.path.join( prefix, '8f', '3507', '8f35071eaeedd9d6f575a8b0f291daeac4c1dfdfa133b5c561232a00bf18c4b4' ), os.path.join( prefix, '8f', '3404', '8f3404db04bdede03e9128a4b48599d0ecde5b2e58ed9ce52ce84c3d54a3429c' ), } if more_files: files_expected.update(more_files) self.assertEqual(files_expected, files_expected) def _assert_manifest_updated_with_paths(self, prefix): output_manifest = os.path.basename(self.manifest_file) self.assertTrue(os.path.isfile(output_manifest)) with open(output_manifest, 'r') as f: output_manifest = [ tuple(line.split('\t')) for line in f.read().splitlines() ] expected_manifest = list(zip(*self.manifest)) version_dir = os.path.join(prefix, '.hca', 'v2', 'files_2_4') expected_manifest.append(( 'file_path', os.path.join( version_dir, 'ad', '3fc1', 'ad3fc1e4898e0bce096be5151964a81929dbd2a92bd5ed56a39a8e133053831d' ), os.path.join( version_dir, '8f', '3507', '8f35071eaeedd9d6f575a8b0f291daeac4c1dfdfa133b5c561232a00bf18c4b4' ), os.path.join( version_dir, '8f', '3404', '8f3404db04bdede03e9128a4b48599d0ecde5b2e58ed9ce52ce84c3d54a3429c' ))) expected_manifest = list(zip(*expected_manifest)) self.assertEqual(output_manifest, expected_manifest) def _assert_manifest_not_updated(self): for row in ManifestDownloadContext._parse_manifest(self.manifest_file): self.assertNotIn('file_path', row)
from hca.dss import DSSClient dss = DSSClient() UUID = "ffffaf55-f19c-40e3-aa81-a6c69d357265" VERSION = "ffffaf55-f19c-40e3-aa81-a6c69d357265" # Download the metadata only dss.download(bundle_uuid=UUID, version=VERSION, replica="aws", download_dir=".hca_metadata_only") # Download the data only dss.download(bundle_uuid=UUID, version=VERSION, replica="aws", download_dir=".hca_data_only")
# { # "checkout_job_id": "6a7438be-3998-4f1b-807c-7848dceaf351" # } s = f"Bundle: {uuid}.{version}\n" checkout_id = dss.post_bundles_checkout( uuid=uuid, replica="aws")["checkout_job_id"] # A JSON response that displays status and/or location of checkout. bundle_checkout_status = dss.get_bundles_checkout( replica="aws", checkout_job_id=checkout_id)["status"] print(checkout_id + " " + bundle_checkout_status) # Download a bundle and save it to the local filesystem as a directory. dss.download( bundle_uuid=uuid, replica="aws", version=version, download_dir="./download_test", ) # Retrieves a bundle given a UUID and optionally a version. files_uuid = [] for file in dss.get_bundle: file_version = bundle["version"] file_uuid = bundle["uuid"] file_name = bundle["name"] file_sha256 = bundle["sha256"] files_uuid.append(file_uuid) s += f" File: {file_name} \n" s += f" Sha_256:{file_sha256} \n" s += f" UUID/Version:{file_uuid}.{file_version} \n" print(s[:-1])