def _hash_parts(self, path, size, mtime): _logger.info('Hashing file %s', path) whole_file_hasher = hashlib.sha1() hashes = [] with open(path, 'rb') as f: while True: if not self.is_running: return data = f.read(self._part_size) if not data: break self.progress = (path, f.tell()) whole_file_hasher.update(data) part_hasher = hashlib.sha1(data) hashes.append(part_hasher.digest()) file_hash = whole_file_hasher.digest() file_hash_info = FileInfo(file_hash, hashes) index = hashlib.sha1(file_hash_info.to_bytes()).digest() with self._table.connection() as con: cur = con.execute('INSERT INTO files ' '(key, `index`, size, mtime, part_size, filename,' 'file_hash_info) ' 'VALUES (?, ? , ? , ? , ?, ?, ?)', (file_hash, index, size, mtime, self._part_size, path, file_hash_info.to_bytes())) row_id = cur.lastrowid for i in range(len(hashes)): offset = i * self._part_size hash_bytes = hashes[i] self.progress = (path, offset) try: con.execute('INSERT INTO parts ' '(hash_id, file_id, file_offset) VALUES ' '(?, ?, ?)', (hash_bytes, row_id, offset)) except sqlite3.IntegrityError: _logger.exception('Possible duplicate') collection_type = self._get_collection_type(path) if collection_type: con.execute('INSERT INTO collections ' '(file_id, type) VALUES ' '(?, ?)', (row_id, collection_type))
def test_read_json(self): '''It should read in a json with basic info''' s = (b'{' b'"!":"BytestagFileInfo",' b'"hash":"jbip9t8iC9lEz3jndkm5I2fTWV0=",' b'"parts":["jbip9t8iC9lEz3jndkm5I2fTWV0="]' b'}') info = FileInfo.from_bytes(s) self.assertEqual(info.file_hash, KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0=')) self.assertEqual(info.part_hashes, [KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0=')]) result_bytes = info.to_bytes() self.assertEqual(s, result_bytes)
def test_read_json_extended(self): '''It should read in a json with extended info''' s = (b'{' b'"!":"BytestagFileInfo",' b'"filename":["my_file.txt"],' b'"hash":"jbip9t8iC9lEz3jndkm5I2fTWV0=",' b'"parts":["jbip9t8iC9lEz3jndkm5I2fTWV0="],' b'"size":123' b'}') info = FileInfo.from_bytes(s) self.assertEqual(info.file_hash, KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0=')) self.assertEqual(info.part_hashes, [KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0=')]) self.assertEqual(info.size, 123) self.assertEqual(info.filename, ['my_file.txt']) result_bytes = info.to_bytes() self.assertEqual(s, result_bytes)
def file_hash_info(self, kvpid): return FileInfo.from_bytes(self._get_file_hash_info(kvpid))