Beispiel #1
0
 def get_base_description(self):
     return {
         'precision':
         '{0:.{1}f}'.format(10**(-self._precision), self._precision),
         'hash256':
         hash_file(self._file_path)
     }
Beispiel #2
0
 def file_metrics(self, metrics={}):
     """Extracts file information such as name, last update date, number of rows and it's hash"""
     metrics["filename"] = self.file
     metrics["structured_at"] = time.time()
     metrics["rows"] = len(self.df)
     metrics["hash"] = hash_file(self.file)
     return metrics
Beispiel #3
0
def check(args: argparse.Namespace) -> None:
    """Verify that index matches files, print out any mismatches

    :param args: must have attr cold_dir: str
    """
    cold_dir = Path(args.cold_dir)
    assert cold_dir.is_dir(), "cold_dir not found!"
    index = Index(cold_dir)
    fail_count = 0

    # Set up progress bar
    total = sum([(cold_dir / p).stat().st_size if
                 (cold_dir / p).exists() else 0 for p in index.keys()])
    with tqdm(total=total, unit="B", unit_scale=True) as pbar:
        # Check that index is correct
        for p, h in index.items():
            if h != hash_file(cold_dir / p, pbar):
                print(f"Verification failed: '{p}'.", file=sys.stderr)
                fail_count += 1
        # Additionally check that index is complete
        for file in walk(
                cold_dir,
            [PathAwareGitWildMatchPattern('index.txt', cold_dir)]):
            rel_path: PurePath = file.relative_to(cold_dir)
            if rel_path not in index:
                print(f"File missing from index: '{rel_path}'.",
                      file=sys.stderr)
                fail_count += 1

    if fail_count == 0:
        print("OK: Data is intact!")
    else:
        print(f"FAIL: There were {fail_count} failures!")
Beispiel #4
0
    async def post(self, request):
        form = await request.form()
        filename = form['file'].filename
        file = form['file'].file

        async with get_conn() as conn:
            async with conn.transaction():
                master = await Replica(conn).master()
                if not master:
                    return api_error('No master set')

                ds = parse_dcm(file)
                hsh = hash_file(file)

                file_data = {
                    'name': os.path.basename(filename),
                    'master': master['id'],
                    'hash': hsh,
                }
                file_data.update(ds)
                filedata = await Files(conn).insert_or_select(file_data)

                storage = await Storage.get(master)
                ret = await storage.copy(file, filedata)

                await ReplicaFiles(conn).add(
                    master['id'],
                    [{
                        'id': filedata['id'],
                        **ret
                    }],
                )
        return UJSONResponse({})
def execute(file_name: str):
    """
    Given a file name, look up the meta data from the config (where each file
    name is a key), and depending on the file extension call the appropriate
    handler.
    Each handler will ultimately attempt to create a record, either a raw
    accident (from Excel files) or one of the meta information collections
    (from csv files).
    """
    try:
        file_meta = files[file_name]
    except KeyError:
        print(f"Unknown file {file_name}")
        exit(1)

    file_path = path.normpath(f'{data_directory}/{file_name}')

    _, file_extension = path.splitext(file_path)

    file_meta['source_file'] = file_name
    file_meta['import_timestamp'] = import_timestamp
    file_meta['source_file_hash'] = hash_file(file_path)

    if file_extension == '.xlsx':
        import_xlsx(file_path, file_meta)
    elif file_extension == '.xlsb':
        import_xlsb(file_path, file_meta)
    elif file_extension == '.csv':
        import_csv(file_path, file_meta)
    else:
        print(f'Unknown file extension {file_extension}')
        exit(1)
Beispiel #6
0
def post_videos():
    try:
        video = request.files['video']

        video_hash = hash_file(video)
        video_data = mongo.db.videos.find_one({'hash': video_hash})
        if video_data is not None:
            res = build_video_object(video_data)
            return jsonify(res), 200

        _, ext = os.path.splitext(video.filename)
        ext = ext[1:]

        if ext in ALLOWED_EXTENSIONS:
            results = startProcessing(
                file=video, uploadFolderPath=app.config['UPLOAD_FOLDER'])
        else:
            results = {'message': 'File format not allowed'}
            return jsonify(results), 400
    except KeyError as err:
        return jsonify({
            'title': 'BAD_REQUEST',
            'message': f'Missing key: {str(err)}'
        }), 400
    except Exception as e:
        return jsonify({'title': 'ERROR', 'message': (str(e))}), 400

    results['hash'] = video_hash
    mongo.db.videos.insert_one(build_video_object(results))

    return jsonify(results), 200
Beispiel #7
0
    def do_pull(self):
        logging.info('Starting pull process')
        since = (utils.read_settings(self._conn, 'last_update')
                      .get('last_update', 0))
        cursor = self._conn.cursor()
        response = self.communicate({'ACTION': 'PULL',
                                     'SINCE': since})
        to_recv = parse.listify(parse.loads(response['CHANGES']))

        logging.info('Adding %d new files from server.' % len(to_recv))

        for x in to_recv:
            from_serv = parse.loads(x)
            sid = int(from_serv['ID'])
            logging.debug('Proccessing file update. SID: %d, type: %s'
                          % (sid, from_serv['type']))
            if from_serv['type'] == 'NEW':
                cursor.execute('SELECT 1 FROM files WHERE server_id=?', [sid])
                if cursor.fetchone():
                    logging.warning('Server returned a file I already have, '
                                    'ignoring and continuing pull process.')
                    continue

                file_path, file_hash = self.pull_remote(sid)

                fd = open(file_path, 'rb')
                our_hash = utils.hash_file(fd)
                if our_hash.digest() != file_hash:
                    raise Exception('MD5 digests did not match! Transmission '
                                    'error suspected.')

                it_path = self.add_to_itunes(file_path)
                os.remove(file_path)

                record = utils.generate_file_info(it_path)
                record['server_id'] = sid
                utils.insert_file_record(record, self._conn)

                logging.debug('Successfuly added file: %s' 
                              % (os.path.split(it_path)[-1],))

            elif from_serv['type'] == 'DELETE':
                cursor.execute('SELECT * FROM files WHERE server_id=?', [sid])
                record = cursor.fetchone()

                if not record:
                    logging.warning('Server sent delete directive on file I '
                                    'don\'t have. Ignoring.')
                    continue

                self.remove_from_itunes(sid)
                cursor.execute('DELETE FROM files WHERE server_id=?', [sid])

            self._conn.commit()

        logging.info('...finished pull process')
Beispiel #8
0
async def index(replica):
    global work

    replica_id = replica['id']
    async with get_conn() as conn:
        await Replica(conn).update_status(replica_id, 'indexing')

        storage = await Storage.get(replica)

        indexing_interrupted = False
        async for d in storage.index():
            if not work:
                indexing_interrupted = True
                break

            loc = None
            if not d.get('hash'):
                loc = await storage.fetch(d)

                if not d.get('hash'):
                    d['hash'] = hash_file(loc)

            f = await Files(conn).get(d)
            if not f:
                if not replica['master']:
                    continue
                if not loc:
                    loc = await storage.fetch(d)
                try:
                    dcm_data = parse_dcm(loc)
                except Exception as e:
                    continue

                d.update(dcm_data)
                d['master'] = replica['id']

                f = await Files(conn).add(d)

            d['id'] = f['id']
            try:
                del d['meta']
            except KeyError:
                pass
            if replica['master']:
                await ReplicaFiles(conn).add(
                    replica_id,
                    [d],
                )
            else:
                await ReplicaFiles(conn).index(replica_id, d)

        files = await ReplicaFiles(conn).get_for_sync(replica)
        if len(files) == 0 and not indexing_interrupted:
            await Replica(conn).update_status(replica_id, 'ok')
Beispiel #9
0
    def push_command(self, command, session):
        cursor = self._conn.cursor()

        if command['TYPE'] == 'NEW':
            cursor.execute('INSERT INTO files (received) VALUES (?)', 
                           [time.time()])
            sid = cursor.lastrowid
            resp = parse.dumps({'ACTION': 'HSUP',
                                'ID': sid,
                                'DONE': 0})
            logging.debug('CONT -> %s' % resp)
            self._send(resp + '\n')
            file_path = os.path.join( 
                utils.read_settings(self._conn, 'storage_dir')['storage_dir'],
                '%d.mp3' % sid)

            digest = utils.pull_file(file_path, self._socket)
            our_digest = utils.hash_file(open(file_path, 'rb')).digest()

            if our_digest != digest:
                cursor.execute('DELETE FROM files WHERE id=?', [sid])
                resp = {'ACTION': 'ERROR',
                        'REASON': 'Hash mismatch, record revoked, retransmit'}
                self._conn.commit()
                return resp, session

            cursor.execute('UPDATE files SET path=?, hash=? WHERE id=?',
                           [file_path, digest.encode('hex'), sid])
            self._conn.commit()

            resp = {'ACTION': 'HSUP',
                    'DONE': 1}
            return resp, session

        elif command['TYPE'] == 'DELETE':
            sid = int(command['ID'])
            cursor.execute(
                'INSERT INTO deleted (file_id, del_time) VALUES (?, ?)',
                [sid, time.time()])
            cursor.execute('DELETE FROM files WHERE id=?', [sid])

            resp = {'ACTION': 'HSUP',
                    'DONE': 1}
            return resp, session
        else:
            resp = {'ACTION': 'ERROR',
                    'REASON': 'Unknown PUSH type: %s' % command['TYPE']}
            return resp, session
Beispiel #10
0
def scan(path, conn):
    """
    Walks a directory and compares it to the databse pointed at by `conn`,
    returning a three tuple of files added to or removed from the directory 
    verses its representation in the db respectively. Modified files will be
    included in both added and removed lists. Will update the `last_scan` 
    fields within the db. Note that no effort is made to identify files which 
    have moved.
    """
    cursor = conn.cursor()
    scan_start = time.time()
    added = []
    removed = []

    for dirpath, dirnames, filenames in os.walk(path):
        for filename in filenames:
            file_path = os.path.join(dirpath, filename)
            cursor.execute('SELECT * FROM files WHERE path = ? LIMIT 1',
                           [file_path])

            record = cursor.fetchone()

            if not record:
                added.append(file_path)
            else:
                fs_mtime = os.path.getmtime(file_path)

                if fs_mtime > record['mtime']:
                    # This file is marked as having been modified since we
                    # last saw it, time to hash to check for changes.
                    fd = open(file_path, 'rb')
                    hash_value = utils.hash_file(fd).hexdigest()

                    if hash_value != record['hash']:
                        added.append(file_path)
                        removed.append(record['id'])

                cursor.execute('UPDATE files SET last_scan=? WHERE id=?',
                               [scan_start, record['id']])

    conn.commit()

    cursor.execute('SELECT * FROM files WHERE last_scan < ?', [scan_start])
    removed.extend(cursor.fetchall())
    cursor.close()

    return added, removed
Beispiel #11
0
    def open_file(self):
        if self.hashFile != "":
            self.save_transits_to_file()
        self.clear_graph(rebuild=False)
        self.detrended_all_flux = []
        self.folded_all_time = []
        fileName, _ = QFileDialog.getOpenFileName(
            self, "Open light curve file", "c:\\",
            "Fits kepler files (*.fits)")
        if fileName:
            with fits.open(fileName) as hdu_list:
                light_curve = hdu_list["LIGHTCURVE"].data
            match = re.search("kplr[0-9]{9}", fileName)
            self.kepID = ""
            if match:
                self.kepID = match[0][4:]
            self.all_time = light_curve.TIME
            self.all_flux = light_curve.PDCSAP_FLUX

            flux_and_time_finite = np.logical_and(np.isfinite(self.all_flux),
                                                  np.isfinite(self.all_time))
            self.all_time = self.all_time[flux_and_time_finite]
            self.all_flux = self.all_flux[flux_and_time_finite]

            self.all_flux /= np.median(self.all_flux)

            self.hashFile = utils.hash_file(fileName)
            try:
                with open(self.hashDir + self.hashFile, "r") as transitFile:
                    for line in transitFile.readlines():
                        item = QListWidgetItem()
                        item.setText(line)
                        self.listOfTransits.addItem(item)
            except OSError:
                pass

            self.rebuild_plot()
            self.fileLoaded = True
            self.saveButton.setVisible(True)
            self.clearButton.setVisible(True)
            self.detrendCheck.setVisible(True)
            self.actionDetrend.setEnabled(True)
Beispiel #12
0
async def store(ds, data):
    global initialized

    if not initialized:
        await setup()
        initialized = True

    async with get_conn() as conn:
        try:
            ds = get_meta(ds)
            async with conn.transaction():
                master = await Replica(conn).master()

                hsh = hash_file(data)

                file_data = {
                    'name': str(uuid.uuid4()) + '.dcm',
                    'master': master['id'],
                    'hash': hsh,
                }
                file_data.update(ds)
                f = await Files(conn).insert_or_select(file_data)

                storage = await Storage.get(master)
                ret = await storage.copy(data, f)

                await ReplicaFiles(conn).add(
                    master['id'],
                    [{
                        'id': f['id'],
                        **ret
                    }],
                )
        except Exception as e:
            print(traceback.format_exc())
            await Log(conn).add(str(e))
            return False
    return True
Beispiel #13
0
 def hash(self, location):
     with open(location, 'rb') as dcmf:
         return hash_file(dcmf)
Beispiel #14
0
    def __init__(self, name, f, user):
        self.name = name
        self.user = user
        self.digest = hash_file(f)

        self.output = Image.analyze(f)
Beispiel #15
0
def test_train(params):
    # Get the token for authorizing with the serve endpoint
    config = ConfigObj(CONFIG_PATH)
    try:
        token = config["MINIKUBE"]["backend"]["token"]
    except KeyError:
        token = config["DOCKER"]["backend"]["token"]

    print(token)

    subprocess.Popen(["kaos workspace list"],
                     shell=True,
                     stdout=subprocess.PIPE).stdout.read()

    workspace_name = get_rand_str()
    code, stdout, stderr = run_cmd(
        f"kaos workspace create -n {workspace_name}")
    print(stdout.read())

    code, stdout, stderr = run_cmd(f"kaos template get -n property-val")
    print(stdout.read())

    print("###############################################################")
    print("# train model and assert results")
    print("###############################################################")

    old_job_id, old_model_id, old_model_checksum = train_and_assert(
        workspace_name, 0)

    print("###############################################################")
    print("# deploy inference with the trained model")
    print("###############################################################")

    code, stdout, stderr = run_cmd(f"kaos train info -i 0")
    data = stdout.read().decode('utf-8')
    model_id = parse_train_info(data)[0][3]

    code, stdout, stderr = run_cmd(
        f"kaos serve deploy -m {model_id} -s templates/property-val/model-serve"
    )
    print(stdout.read())

    serve_and_assert(
        deploy_command=
        f"kaos serve deploy -m {model_id} -s templates/property-val/model-serve",
        list_command="kaos serve list")

    code, stdout, stderr = run_cmd("kaos serve list")
    data = stdout.read().decode('utf-8')
    building_table, serving_table = parse_serve_list(data)

    print("###############################################################")
    print("# curl the running model")
    print("###############################################################")

    data = open("templates/property-val/test_payload.json").read()

    endpoint_name = serving_table[0][2]
    print(f"endpoing name: {endpoint_name}")
    r = requests.post(
        f"http://localhost:{params['k8s_port']}/{endpoint_name}/invocations",
        headers={
            "Content-Type": "application/json",
            "X-Token": token
        },
        data=data)

    assert r.status_code == 200
    assert "result" in r.json()

    print("###############################################################")
    print("# check all the serving artifacts")
    print("###############################################################")

    serve_artifacts_dir = f"serve_artifacts-{workspace_name}"
    os.mkdir(serve_artifacts_dir)
    code, stdout, stderr = run_cmd(
        f"kaos serve get -e {endpoint_name} -o {serve_artifacts_dir}")
    print(stdout.read())

    serve_code_path_matches = glob.glob(
        f"{serve_artifacts_dir}/*/*/code/property-val:*", recursive=True)
    assert len(serve_code_path_matches) == 1
    serve_code_path = serve_code_path_matches[0]
    assert checksumdir.dirhash(serve_code_path, excluded_files=["__init__.py", "model.pkl"]) == \
           checksumdir.dirhash("templates/property-val/model-serve/property-val",
                               excluded_files=["__init__.py", "model.pkl"])

    model_path_matches = glob.glob(
        f"{serve_artifacts_dir}/*/*/code/property-val:*/model/model.pkl",
        recursive=True)
    assert len(model_path_matches) == 1

    model_path = model_path_matches[0]
    assert os.path.getsize(model_path) // 100000 == 4

    _, stdout, _ = run_cmd(
        f"kaos serve provenance -e {endpoint_name} -o {serve_artifacts_dir}")
    print(stdout.read())

    serve_provenance_matches = glob.glob(
        f"{serve_artifacts_dir}/{workspace_name.lower()}/provenance/serve-*.pdf",
        recursive=True)
    assert len(serve_provenance_matches) == 1

    serve_provenance_path = serve_provenance_matches[0]
    assert os.path.exists(serve_provenance_path)
    assert os.path.isfile(serve_provenance_path)

    with open(serve_provenance_path, "rb") as prov_file:
        prov = PdfFileReader(prov_file, strict=False)
        print(prov.documentInfo)

    print("###############################################################")
    print("# modify code dir")
    print("###############################################################")

    with open(
            f"templates/property-val/model-train/property-val/model/{uuid.uuid4().hex}",
            'w') as f:
        f.write(uuid.uuid4().hex)

    print("###############################################################")
    print("# RE-train model and assert results")
    print("###############################################################")

    train_and_assert(workspace_name, 1)

    # ###############################################################
    # # modify data dir
    # ###############################################################
    #
    # with open(f"templates/property-val/data/features{uuid.uuid4().hex}", 'w') as f:
    #     f.write(uuid.uuid4().hex)
    #
    # ###############################################################
    # # RE-train model and assert results
    # ###############################################################
    #
    # train_and_assert(workspace_name, 2)

    print("# ##############################################################")
    print("# Check that we can still get the actual old model")
    print("# ##############################################################")

    old_artifacts_dir = f"old-artifacts-{workspace_name}"
    os.mkdir(old_artifacts_dir)
    code, stdout, stderr = run_cmd(
        f"kaos train get -cdm --job_id {old_job_id} -o {old_artifacts_dir}")
    print(stdout.read())
    old_model_path_matches = glob.glob(
        f"{old_artifacts_dir}/*/*/models/*/model/model.pkl", recursive=True)
    assert len(old_model_path_matches) == 1

    old_model_path = old_model_path_matches[0]
    old_model_checksum_now = hash_file(old_model_path)
    assert old_model_checksum == old_model_checksum_now
Beispiel #16
0
def train_and_assert(workspace_name, expected_pretrained_jobs):
    code, stdout, stderr = run_cmd(
        f"kaos train deploy -s templates/property-val/model-train/ "
        f"-d templates/property-val/data/")
    print(stdout.read())

    print("###############################################################")
    print("# wait until the submitted job appears in BUILDING list")
    print("###############################################################")

    building_table = []
    training_table = []
    i = 0
    while len(building_table) == 0 and i < TIMEOUT:
        code, stdout, stderr = run_cmd(f"kaos train list")
        data = stdout.read().decode('utf-8')
        building_table, training_table = parse_train_list(data)
        time.sleep(10)
        print(f"building -> {building_table}")
        print(f"training -> {training_table}")
        i += 1

    if i == TIMEOUT:
        raise Exception("timeout")

    print("###############################################################")
    print("# check that the status is JOB_RUNNING")
    print("###############################################################")

    print(building_table)
    print(training_table)
    assert len(building_table) == 1
    assert len(training_table) == expected_pretrained_jobs
    assert building_table[0][3] == 'JOB_RUNNING'

    print("###############################################################")
    print("# wait until the submitted job appears in TRAINING list")
    print("###############################################################")

    building_table = []
    training_table = []
    i = 0
    while len(training_table) <= expected_pretrained_jobs and i < TIMEOUT:
        code, stdout, stderr = run_cmd(f"kaos train list")
        data = stdout.read().decode('utf-8')
        building_table, training_table = parse_train_list(data)
        print(f"building -> {building_table}")
        print(f"training -> {training_table}")
        time.sleep(10)
        i += 1

    if i == TIMEOUT:
        raise Exception("timeout")

    print("###############################################################")
    print("# check that the job is either running or has succeeded")
    print("###############################################################")

    print(building_table)
    print(training_table)
    assert len(building_table) == 0
    assert len(training_table) == 1 + expected_pretrained_jobs
    assert training_table[0][5] in ('JOB_RUNNING', 'JOB_SUCCESS',
                                    'JOB_MERGING')

    print("###############################################################")
    print("# wait if any training job is still running or merging")
    print("###############################################################")

    i = 0
    while any(
            map(lambda row: row[5] in ('JOB_RUNNING', 'JOB_MERGING'),
                training_table)) and i < TIMEOUT:
        code, stdout, stderr = run_cmd(f"kaos train list")
        data = stdout.read().decode('utf-8')
        building_table, training_table = parse_train_list(data)
        print(f"building -> {building_table}")
        print(f"training -> {training_table}")
        time.sleep(10)
        i += 1

    if i == TIMEOUT:
        raise Exception("timeout")

    print("###############################################################")
    print("# check that job finished with JOB_SUCCESS status")
    print("###############################################################")

    print(building_table)
    print(training_table)
    assert len(building_table) == 0
    assert len(training_table) == 1 + expected_pretrained_jobs
    assert training_table[0][5] == 'JOB_SUCCESS'

    print("###############################################################")
    print("# check all the training artifacts")
    print("###############################################################")

    artifacts_dir = f"artifacts-{workspace_name}-{expected_pretrained_jobs}"
    os.mkdir(artifacts_dir)
    job_id = training_table[0][3]
    train_get_cmd = f"kaos train get -cdm --job_id {job_id} -o {artifacts_dir}"
    print(train_get_cmd)
    code, stdout, stderr = run_cmd(train_get_cmd)
    print(stdout.read())
    model_path_matches = glob.glob(
        f"{artifacts_dir}/*/*/models/*/model/model.pkl", recursive=True)
    assert len(model_path_matches) == 1

    model_path = model_path_matches[0]
    model_checksum = hash_file(model_path)
    assert os.path.getsize(model_path) // 100000 == 4

    data_path_matches = glob.glob(f"{artifacts_dir}/*/*/data", recursive=True)
    assert len(data_path_matches) == 1

    data_path = data_path_matches[0]
    assert checksumdir.dirhash(data_path) == checksumdir.dirhash(
        "templates/property-val/data/")

    code_path_matches = glob.glob(f"{artifacts_dir}/*/*/code/property-val:*",
                                  recursive=True)
    assert len(code_path_matches) == 1

    code_path = code_path_matches[0]
    print(f"code_path -> {code_path}")
    print(f"job id -> {job_id}")
    print(f"{training_table}")

    assert checksumdir.dirhash(code_path, excluded_files=["__init__.py"]) == \
           checksumdir.dirhash("templates/property-val/model-train/property-val", excluded_files=["__init__.py"])

    code, stdout, stderr = run_cmd(f"kaos train info -i 0")
    data = stdout.read().decode('utf-8')
    train_info = parse_train_info(data)
    assert len(train_info) > 1

    model_id = train_info[0][3]

    print("###############################################################")
    print("# check provenance")
    print("###############################################################")

    _, stdout, _ = run_cmd(
        f"kaos train provenance -m {model_id} -o {artifacts_dir}")
    print(stdout.read())
    prov_path = f"{artifacts_dir}/{workspace_name.lower()}/provenance/model-{model_id}.pdf"
    assert os.path.exists(prov_path)
    assert os.path.isfile(prov_path)

    with open(prov_path, "rb") as prov_file:
        prov = PdfFileReader(prov_file, strict=False)
        print(prov.documentInfo)

    return job_id, model_id, model_checksum
    remote_checksum = None
    try:
        print("retrieving remote planet file checksum from:\n%s" % constants.PLANET_MD5_URL)
        remote = urllib.urlopen(constants.PLANET_MD5_URL)
        remote_checksum = remote.read().split(" ")[0]
        print("remote checksum retrieved:")
        print(remote_checksum)
    except Exception as e:
        print("ERROR: retrieving remote md5 checksum failed")
        print(e)

    # it only makes sense to compute the local checksum if we have the remote checksum
    if remote_checksum:
        try:
            print("computing local md5 checksum")
            local_checksum = utils.hash_file(constants.PLANET_PATH)
            print("local md5 checksum done:")
            print(local_checksum)
        except Exception as e:
            print("local md5 checksum computation failed")
            print(e)

if remote_checksum is None or local_checksum is None:
    print("skipping the md5 check - can't get remote or local checksum")
else:
    if remote_checksum == local_checksum:
        print("* md5 checksum: OK")
    else:
        print("* md5 checksum: NOK")
        sane = False
Beispiel #18
0
 def test_hash_file(self):
     for example in TestHashFile.xxhash_examples:
         with mock.patch('builtins.open', mock.mock_open(read_data=example['data'])) as mock_open:
             pbar = mock.MagicMock()
             self.assertEqual(example['hexdigest'], hash_file(Path('foo_filename'), pbar))
             mock_open.assert_called_once_with(Path('foo_filename'), 'rb')