Example #1
0
 def checksum(self):
   ''' Checksum the file contents, used as a proxy for comparing the actual content.
   '''
   csum = self._checksum
   if csum is None:
     path = self.path
     U = Upd()
     pathspace = U.columns - 64
     label = "scan " + (
         path if len(path) < pathspace else '...' + path[-(pathspace - 3):]  # pylint: disable=unsubscriptable-object
     )
     with Pfx("checksum %r", path):
       csum = hashfunc()
       with open(path, 'rb') as fp:
         length = os.fstat(fp.fileno()).st_size
         read_len = 0
         for data in progressbar(
             read_from(fp, rsize=1024*1024),
             label=label,
             total=length,
             units_scale=BINARY_BYTES_SCALE,
             itemlenfunc=len,
             update_frequency=128,
             upd=U,
         ):
           csum.update(data)
           read_len += len(data)
         assert read_len == self.size
     csum = csum.digest()
     self._checksum = csum
   return csum
Example #2
0
def hash_fp(fp, hashname, h=None, rsize=16384):
    ''' Compute or update the sha256 hashcode for data read from a file.
  '''
    if isinstance(fp, str):
        filename = fp
        with open(filename, 'rb') as fp:
            return hash_fp(fp, hashname, h=h, rsize=rsize)
    return hash_byteses(read_from(fp, rsize=rsize), hashname, h=h)
Example #3
0
 def scanfrom(filepath, offset=0):
   ''' Scan the specified `filepath` from `offset`,
       yielding data `(pre_offset, data, post_offset)`.
   '''
   scanner = scanner_from_filename(filepath)
   with open(filepath, 'rb') as fp:
     fp.seek(offset)
     for data in blocked_chunks_of2(read_from(fp, DEFAULT_SCAN_SIZE),
                                    scanner=scanner):
       post_offset = offset + len(data)
       yield offset, data, post_offset
       offset = post_offset
Example #4
0
 def test02blocked_chunks_of(self):
     ''' Blockify some input sources.
 '''
     for parser in [None] + list(PARSERS):
         testfilename = None if parser is None else scanner_testfile(parser)
         if testfilename is None:
             self._test_blocked_chunks_of(parser, '100 x ' + __file__,
                                          [self.mycode for _ in range(100)])
             self._test_blocked_chunks_of(parser, 'random data',
                                          self.random_data)
         else:
             with open(testfilename, 'rb') as f:
                 input_chunks = read_from(f, DEFAULT_SCAN_SIZE)
                 self._test_blocked_chunks_of(parser, testfilename,
                                              input_chunks)
Example #5
0
 def test01scanners(self):
     ''' Test some domain specific data parsers.
 '''
     for parser in PARSERS:
         with self.subTest(parser.__name__):
             f = None
             testfilename = scanner_testfile(parser)
             if testfilename is None:
                 input_chunks = self.random_data
             else:
                 self.assertIsNotNone(testfilename)
                 f = open(testfilename, 'rb')
                 input_chunks = read_from(f)
             last_offset = 0
             for offset in parser(CornuCopyBuffer(input_chunks)):
                 self.assertTrue(
                     last_offset <= offset,
                     "offset %d <= last_offset %d" % (offset, last_offset))
                 last_offset = offset
             if f is not None:
                 f.close()
                 f = None