Example #1
0
    def test_lo_chunk_reuse(self):
        """Test that chunks that are reused are only downloaded once"""
        target = casync.parse_caibx(self.manifest_fn)

        sources = [('target', casync.FileChunkReader(self.target_lo),
                    casync.build_chunk_dict(target))]
        sources += [('remote', casync.RemoteChunkReader(self.store_fn),
                     casync.build_chunk_dict(target))]

        stats = casync.extract(target, sources, self.target_lo)

        with open(self.target_lo, 'rb') as f:
            self.assertEqual(f.read(len(self.contents)), self.contents)

        self.assertLess(stats['remote'], len(self.contents))
Example #2
0
def extract_casync_image(target_slot_number: int, partition: dict, cloudlog):
    path = get_partition_path(target_slot_number, partition)
    seed_path = path[:-1] + ('b' if path[-1] == 'a' else 'a')

    target = casync.parse_caibx(partition['casync_caibx'])

    sources: List[Tuple[str, casync.ChunkReader, casync.ChunkDict]] = []

    # First source is the current partition.
    try:
        raw_hash = get_raw_hash(seed_path, partition['size'])
        caibx_url = f"{CAIBX_URL}{partition['name']}-{raw_hash}.caibx"

        try:
            cloudlog.info(f"casync fetching {caibx_url}")
            sources += [
                ('seed', casync.FileChunkReader(seed_path),
                 casync.build_chunk_dict(casync.parse_caibx(caibx_url)))
            ]
        except requests.RequestException:
            cloudlog.error(f"casync failed to load {caibx_url}")
    except Exception:
        cloudlog.exception("casync failed to hash seed partition")

    # Second source is the target partition, this allows for resuming
    sources += [('target', casync.FileChunkReader(path),
                 casync.build_chunk_dict(target))]

    # Finally we add the remote source to download any missing chunks
    sources += [('remote', casync.RemoteChunkReader(partition['casync_store']),
                 casync.build_chunk_dict(target))]

    last_p = 0

    def progress(cur):
        nonlocal last_p
        p = int(cur / partition['size'] * 100)
        if p != last_p:
            last_p = p
            print(f"Installing {partition['name']}: {p}", flush=True)

    stats = casync.extract(target, sources, path, progress)
    cloudlog.error(f'casync done {json.dumps(stats)}')

    os.sync()
    if not verify_partition(
            target_slot_number, partition, force_full_check=True):
        raise Exception(f"Raw hash mismatch '{partition['hash_raw'].lower()}'")
Example #3
0
    def test_already_done(self):
        """Test that an already flashed target doesn't download any chunks"""
        target = casync.parse_caibx(self.manifest_fn)

        with open(self.target_fn, 'wb') as f:
            f.write(self.contents)

        sources = [('target', casync.FileChunkReader(self.target_fn),
                    casync.build_chunk_dict(target))]
        sources += [('remote', casync.RemoteChunkReader(self.store_fn),
                     casync.build_chunk_dict(target))]

        stats = casync.extract(target, sources, self.target_fn)

        with open(self.target_fn, 'rb') as f:
            self.assertEqual(f.read(), self.contents)

        self.assertEqual(stats['target'], len(self.contents))
Example #4
0
    def test_seed(self):
        target = casync.parse_caibx(self.manifest_fn)

        # Populate seed with half of the target contents
        with open(self.seed_fn, 'wb') as seed_f:
            seed_f.write(self.contents[:len(self.contents) // 2])

        sources = [('seed', casync.FileChunkReader(self.seed_fn),
                    casync.build_chunk_dict(target))]
        sources += [('remote', casync.RemoteChunkReader(self.store_fn),
                     casync.build_chunk_dict(target))]
        stats = casync.extract(target, sources, self.target_fn)

        with open(self.target_fn, 'rb') as target_f:
            self.assertEqual(target_f.read(), self.contents)

        self.assertGreater(stats['seed'], 0)
        self.assertLess(stats['remote'], len(self.contents))
Example #5
0
    def test_simple_extract(self):
        target = casync.parse_caibx(self.manifest_fn)

        sources = [('remote', casync.RemoteChunkReader(self.store_fn),
                    casync.build_chunk_dict(target))]
        stats = casync.extract(target, sources, self.target_fn)

        with open(self.target_fn, 'rb') as target_f:
            self.assertEqual(target_f.read(), self.contents)

        self.assertEqual(stats['remote'], len(self.contents))
Example #6
0
if __name__ == "__main__":

    parser = argparse.ArgumentParser(
        description='Compute overlap between two casync manifests')
    parser.add_argument('frm')
    parser.add_argument('to')
    args = parser.parse_args()

    frm = casync.parse_caibx(args.frm)
    to = casync.parse_caibx(args.to)
    remote_url = args.to.replace('.caibx', '')

    most_common = collections.Counter(t.sha for t in to).most_common(1)[0][0]

    frm_dict = casync.build_chunk_dict(frm)

    # Get content-length for each chunk
    with multiprocessing.Pool() as pool:
        szs = list(tqdm(pool.imap(get_chunk_download_size, to), total=len(to)))
    chunk_sizes = {t.sha: sz for (t, sz) in zip(to, szs)}

    sources: Dict[str, List[int]] = {
        'seed': [],
        'remote_uncompressed': [],
        'remote_compressed': [],
    }

    for chunk in to:
        # Assume most common chunk is the zero chunk
        if chunk.sha == most_common: