def test_lo_chunk_reuse(self): """Test that chunks that are reused are only downloaded once""" target = casync.parse_caibx(self.manifest_fn) sources = [('target', casync.FileChunkReader(self.target_lo), casync.build_chunk_dict(target))] sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))] stats = casync.extract(target, sources, self.target_lo) with open(self.target_lo, 'rb') as f: self.assertEqual(f.read(len(self.contents)), self.contents) self.assertLess(stats['remote'], len(self.contents))
def extract_casync_image(target_slot_number: int, partition: dict, cloudlog): path = get_partition_path(target_slot_number, partition) seed_path = path[:-1] + ('b' if path[-1] == 'a' else 'a') target = casync.parse_caibx(partition['casync_caibx']) sources: List[Tuple[str, casync.ChunkReader, casync.ChunkDict]] = [] # First source is the current partition. try: raw_hash = get_raw_hash(seed_path, partition['size']) caibx_url = f"{CAIBX_URL}{partition['name']}-{raw_hash}.caibx" try: cloudlog.info(f"casync fetching {caibx_url}") sources += [ ('seed', casync.FileChunkReader(seed_path), casync.build_chunk_dict(casync.parse_caibx(caibx_url))) ] except requests.RequestException: cloudlog.error(f"casync failed to load {caibx_url}") except Exception: cloudlog.exception("casync failed to hash seed partition") # Second source is the target partition, this allows for resuming sources += [('target', casync.FileChunkReader(path), casync.build_chunk_dict(target))] # Finally we add the remote source to download any missing chunks sources += [('remote', casync.RemoteChunkReader(partition['casync_store']), casync.build_chunk_dict(target))] last_p = 0 def progress(cur): nonlocal last_p p = int(cur / partition['size'] * 100) if p != last_p: last_p = p print(f"Installing {partition['name']}: {p}", flush=True) stats = casync.extract(target, sources, path, progress) cloudlog.error(f'casync done {json.dumps(stats)}') os.sync() if not verify_partition( target_slot_number, partition, force_full_check=True): raise Exception(f"Raw hash mismatch '{partition['hash_raw'].lower()}'")
def test_already_done(self): """Test that an already flashed target doesn't download any chunks""" target = casync.parse_caibx(self.manifest_fn) with open(self.target_fn, 'wb') as f: f.write(self.contents) sources = [('target', casync.FileChunkReader(self.target_fn), casync.build_chunk_dict(target))] sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))] stats = casync.extract(target, sources, self.target_fn) with open(self.target_fn, 'rb') as f: self.assertEqual(f.read(), self.contents) self.assertEqual(stats['target'], len(self.contents))
def test_seed(self): target = casync.parse_caibx(self.manifest_fn) # Populate seed with half of the target contents with open(self.seed_fn, 'wb') as seed_f: seed_f.write(self.contents[:len(self.contents) // 2]) sources = [('seed', casync.FileChunkReader(self.seed_fn), casync.build_chunk_dict(target))] sources += [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))] stats = casync.extract(target, sources, self.target_fn) with open(self.target_fn, 'rb') as target_f: self.assertEqual(target_f.read(), self.contents) self.assertGreater(stats['seed'], 0) self.assertLess(stats['remote'], len(self.contents))
def test_simple_extract(self): target = casync.parse_caibx(self.manifest_fn) sources = [('remote', casync.RemoteChunkReader(self.store_fn), casync.build_chunk_dict(target))] stats = casync.extract(target, sources, self.target_fn) with open(self.target_fn, 'rb') as target_f: self.assertEqual(target_f.read(), self.contents) self.assertEqual(stats['remote'], len(self.contents))
if __name__ == "__main__": parser = argparse.ArgumentParser( description='Compute overlap between two casync manifests') parser.add_argument('frm') parser.add_argument('to') args = parser.parse_args() frm = casync.parse_caibx(args.frm) to = casync.parse_caibx(args.to) remote_url = args.to.replace('.caibx', '') most_common = collections.Counter(t.sha for t in to).most_common(1)[0][0] frm_dict = casync.build_chunk_dict(frm) # Get content-length for each chunk with multiprocessing.Pool() as pool: szs = list(tqdm(pool.imap(get_chunk_download_size, to), total=len(to))) chunk_sizes = {t.sha: sz for (t, sz) in zip(to, szs)} sources: Dict[str, List[int]] = { 'seed': [], 'remote_uncompressed': [], 'remote_compressed': [], } for chunk in to: # Assume most common chunk is the zero chunk if chunk.sha == most_common: