Beispiel #1
0
    def _hash_parts(self, path, size, mtime):
        _logger.info('Hashing file %s', path)

        whole_file_hasher = hashlib.sha1()
        hashes = []

        with open(path, 'rb') as f:
            while True:
                if not self.is_running:
                    return

                data = f.read(self._part_size)

                if not data:
                    break

                self.progress = (path, f.tell())

                whole_file_hasher.update(data)
                part_hasher = hashlib.sha1(data)
                hashes.append(part_hasher.digest())

        file_hash = whole_file_hasher.digest()
        file_hash_info = FileInfo(file_hash, hashes)
        index = hashlib.sha1(file_hash_info.to_bytes()).digest()

        with self._table.connection() as con:
            cur = con.execute('INSERT INTO files '
                '(key, `index`, size, mtime, part_size, filename,'
                'file_hash_info) '
                'VALUES (?, ? , ? , ? , ?, ?, ?)', (file_hash, index,
                    size, mtime, self._part_size, path,
                    file_hash_info.to_bytes()))

            row_id = cur.lastrowid

            for i in range(len(hashes)):
                offset = i * self._part_size
                hash_bytes = hashes[i]
                self.progress = (path, offset)

                try:
                    con.execute('INSERT INTO parts '
                        '(hash_id, file_id, file_offset) VALUES '
                    '(?, ?, ?)', (hash_bytes, row_id, offset))
                except sqlite3.IntegrityError:
                    _logger.exception('Possible duplicate')

            collection_type = self._get_collection_type(path)

            if collection_type:
                con.execute('INSERT INTO collections '
                    '(file_id, type) VALUES '
                    '(?, ?)', (row_id, collection_type))
Beispiel #2
0
    def test_read_json(self):
        '''It should read in a json with basic info'''

        s = (b'{'
            b'"!":"BytestagFileInfo",'
            b'"hash":"jbip9t8iC9lEz3jndkm5I2fTWV0=",'
            b'"parts":["jbip9t8iC9lEz3jndkm5I2fTWV0="]'
        b'}')

        info = FileInfo.from_bytes(s)

        self.assertEqual(info.file_hash,
            KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0='))
        self.assertEqual(info.part_hashes,
            [KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0=')])

        result_bytes = info.to_bytes()

        self.assertEqual(s, result_bytes)
Beispiel #3
0
    def test_read_json_extended(self):
        '''It should read in a json with extended info'''

        s = (b'{'
            b'"!":"BytestagFileInfo",'
            b'"filename":["my_file.txt"],'
            b'"hash":"jbip9t8iC9lEz3jndkm5I2fTWV0=",'
            b'"parts":["jbip9t8iC9lEz3jndkm5I2fTWV0="],'
            b'"size":123'
        b'}')

        info = FileInfo.from_bytes(s)

        self.assertEqual(info.file_hash,
            KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0='))
        self.assertEqual(info.part_hashes,
            [KeyBytes('jbip9t8iC9lEz3jndkm5I2fTWV0=')])
        self.assertEqual(info.size, 123)
        self.assertEqual(info.filename, ['my_file.txt'])

        result_bytes = info.to_bytes()

        self.assertEqual(s, result_bytes)
Beispiel #4
0
 def file_hash_info(self, kvpid):
     return FileInfo.from_bytes(self._get_file_hash_info(kvpid))