def checksum(self): ''' Checksum the file contents, used as a proxy for comparing the actual content. ''' csum = self._checksum if csum is None: path = self.path U = Upd() pathspace = U.columns - 64 label = "scan " + ( path if len(path) < pathspace else '...' + path[-(pathspace - 3):] # pylint: disable=unsubscriptable-object ) with Pfx("checksum %r", path): csum = hashfunc() with open(path, 'rb') as fp: length = os.fstat(fp.fileno()).st_size read_len = 0 for data in progressbar( read_from(fp, rsize=1024*1024), label=label, total=length, units_scale=BINARY_BYTES_SCALE, itemlenfunc=len, update_frequency=128, upd=U, ): csum.update(data) read_len += len(data) assert read_len == self.size csum = csum.digest() self._checksum = csum return csum
def hash_fp(fp, hashname, h=None, rsize=16384): ''' Compute or update the sha256 hashcode for data read from a file. ''' if isinstance(fp, str): filename = fp with open(filename, 'rb') as fp: return hash_fp(fp, hashname, h=h, rsize=rsize) return hash_byteses(read_from(fp, rsize=rsize), hashname, h=h)
def scanfrom(filepath, offset=0): ''' Scan the specified `filepath` from `offset`, yielding data `(pre_offset, data, post_offset)`. ''' scanner = scanner_from_filename(filepath) with open(filepath, 'rb') as fp: fp.seek(offset) for data in blocked_chunks_of2(read_from(fp, DEFAULT_SCAN_SIZE), scanner=scanner): post_offset = offset + len(data) yield offset, data, post_offset offset = post_offset
def test02blocked_chunks_of(self): ''' Blockify some input sources. ''' for parser in [None] + list(PARSERS): testfilename = None if parser is None else scanner_testfile(parser) if testfilename is None: self._test_blocked_chunks_of(parser, '100 x ' + __file__, [self.mycode for _ in range(100)]) self._test_blocked_chunks_of(parser, 'random data', self.random_data) else: with open(testfilename, 'rb') as f: input_chunks = read_from(f, DEFAULT_SCAN_SIZE) self._test_blocked_chunks_of(parser, testfilename, input_chunks)
def test01scanners(self): ''' Test some domain specific data parsers. ''' for parser in PARSERS: with self.subTest(parser.__name__): f = None testfilename = scanner_testfile(parser) if testfilename is None: input_chunks = self.random_data else: self.assertIsNotNone(testfilename) f = open(testfilename, 'rb') input_chunks = read_from(f) last_offset = 0 for offset in parser(CornuCopyBuffer(input_chunks)): self.assertTrue( last_offset <= offset, "offset %d <= last_offset %d" % (offset, last_offset)) last_offset = offset if f is not None: f.close() f = None