def decrypt(self, *inputs): for filename, fs_info, partials in download_list(inputs, extract=True): decompressor = zstd.ZstdDecompressor() if filename.endswith('.mfn'): mfn = self.load_manifest(filename) for count, (og_filename, info) in enumerate(mfn.items()): if partials and og_filename not in partials: continue copy_filename = path.normpath('./{}'.format(og_filename)) dir_path = path.dirname(copy_filename) if dir_path: makedirs(dir_path, exist_ok=True) with open(copy_filename, 'wb') as f, decompressor.stream_writer( f) as decompress_out: for blob in download_list(info['blobs'], fs_info=fs_info): self.decrypt_single_blob(blob, out=decompress_out) utime(copy_filename, times=(info['atime'], info['mtime'])) # print progress to stdout _print_progress(count + 1, len(mfn), og_filename) if self.consume: remove(filename) else: with decompressor.stream_writer(_stdout()) as decompress_out: self.decrypt_single_blob(filename, out=decompress_out)
def test_pass_through(self): ''' if all paths are local, for filename in download_list(files) == for filename in files ''' files = ['a', 'b', 'c', 'd'] self.assertEqual(list(download_list(files)), files) # can pass as *args too self.assertEqual(list(download_list(*files)), files)
def test_download_extract_with_partials(self, mock_s3, mock_b2): mock_b2.return_value = mock_b2 mock_s3.return_value = mock_s3 local_paths = [] fs_infos = [] partials = [] for f, fs_info, prtl in download_list('boring.mfn', 'file1', 'file2', 's3://bucket1/file.mfn', 'dir/file', 'b2://bucket2/another.mfn', extract=True): local_paths.append(f) fs_infos.append(fs_info) partials.append(prtl) if f != 'boring.mfn': # no tempfile download for local file self.assertTrue(path.exists(f)) self.assertEqual(local_paths[0], 'boring.mfn') self.assertEqual(fs_infos[0], []) self.assertEqual(partials[0], {'file1', 'file2'}) mock_s3.assert_called_once_with('bucket1') mock_s3.download_file.assert_any_call(local_paths[1], 'file.mfn') self.assertEqual(fs_infos[1], ('s3', 'bucket1')) self.assertEqual(partials[1], {'dir/file'}) mock_b2.assert_called_once_with('bucket2') mock_b2.download_file.assert_any_call(local_paths[2], 'another.mfn') self.assertEqual(fs_infos[2], ('b2', 'bucket2')) self.assertEqual(partials[2], None) # should clean up for f in local_paths: self.assertFalse(path.exists(f))
def dump_manifest(self, *inputs, show_filenames=True): for filename in download_list(inputs): print('*** {}:'.format(filename), file=sys.stderr) mfn = self.load_manifest(filename) for og_filename, info in mfn.items(): if show_filenames: print('* {}:'.format(og_filename)) for blob in info['blobs']: print(blob)
def dump_manifest_index(self, *inputs): if self.box == self.index_box: self.dump_manifest(*inputs, show_filenames=False) return for filename in download_list(inputs): print('*** {}:'.format(filename), file=sys.stderr) mfn_index = self.load_manifest_index(filename) for blob in mfn_index: print(blob)
def test_download_blobs_with_fs_info(self, mock_s3): # this is the common use case -- get the fs_info from the mfn download, pass it in with `fs_info` mock_s3.return_value = mock_s3 remote_paths = ['abcdef1234', 'fedcba1234'] local_paths = [] for f in download_list(remote_paths, fs_info=('s3', 'mybucket')): local_paths.append(f) self.assertTrue(path.exists(f)) mock_s3.assert_any_call('mybucket') for local, remote in zip(local_paths, remote_paths): mock_s3.download_file.assert_any_call(local, _data_path(remote)) # should clean up for f in local_paths: self.assertFalse(path.exists(f))
def test_download_mfns(self, mock_s3, mock_b2): mock_b2.return_value = mock_b2 mock_s3.return_value = mock_s3 local_paths = [] for f in download_list('s3://bucket1/file.mfn', 'b2://bucket2/another.mfn'): local_paths.append(f) self.assertTrue(path.exists(f)) mock_s3.assert_called_once_with('bucket1') mock_s3.download_file.assert_any_call(local_paths[0], 'file.mfn') mock_b2.assert_called_once_with('bucket2') mock_b2.download_file.assert_any_call(local_paths[1], 'another.mfn') # should clean up for f in local_paths: self.assertFalse(path.exists(f))
def test_download_blobs(self, mock_s3, mock_b2): mock_b2.return_value = mock_b2 mock_s3.return_value = mock_s3 local_paths = [] for f in download_list('s3://bucket1/abcdef1234', 'b2://bucket2/fedcba1234'): local_paths.append(f) self.assertTrue(path.exists(f)) mock_s3.assert_called_once_with('bucket1') mock_s3.download_file.assert_any_call(local_paths[0], 'data/ab/abcdef1234') mock_b2.assert_called_once_with('bucket2') mock_b2.download_file.assert_any_call(local_paths[1], 'data/fe/fedcba1234') # should clean up for f in local_paths: self.assertFalse(path.exists(f))
def test_pass_through_tricky(self): ''' some paths kinda look like urls, but aren't ''' files = ['2020-01-23T18:02:16.482212.mfn'] self.assertEqual(list(download_list(files)), files)