Example #1
0
 def process_file(self, path, st, cache):
     safe_path = make_path_safe(path)
     # Is it a hard link?
     if st.st_nlink > 1:
         source = self.hard_links.get((st.st_ino, st.st_dev))
         if (st.st_ino, st.st_dev) in self.hard_links:
             item = self.stat_attrs(st, path)
             item.update({b'path': safe_path, b'source': source})
             self.add_item(item)
             return
         else:
             self.hard_links[st.st_ino, st.st_dev] = safe_path
     path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
     ids = cache.file_known_and_unchanged(path_hash, st)
     chunks = None
     if ids is not None:
         # Make sure all ids are available
         for id_ in ids:
             if not cache.seen_chunk(id_):
                 break
         else:
             chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
     # Only chunkify the file if needed
     if chunks is None:
         with open(path, 'rb') as fd:
             chunks = []
             for chunk in chunkify(fd, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed):
                 chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
         cache.memorize_file(path_hash, st, [c[0] for c in chunks])
     item = {b'path': safe_path, b'chunks': chunks}
     item.update(self.stat_attrs(st, path))
     self.stats.nfiles += 1
     self.add_item(item)
Example #2
0
 def process_file(self, path, st, cache):
     safe_path = make_path_safe(path)
     # Is it a hard link?
     if st.st_nlink > 1:
         source = self.hard_links.get((st.st_ino, st.st_dev))
         if (st.st_ino, st.st_dev) in self.hard_links:
             item = self.stat_attrs(st, path)
             item.update({b'path': safe_path, b'source': source})
             self.add_item(item)
             return
         else:
             self.hard_links[st.st_ino, st.st_dev] = safe_path
     path_hash = self.key.id_hash(os.path.join(self.cwd, path).encode('utf-8', 'surrogateescape'))
     ids = cache.file_known_and_unchanged(path_hash, st)
     chunks = None
     if ids is not None:
         # Make sure all ids are available
         for id_ in ids:
             if not cache.seen_chunk(id_):
                 break
         else:
             chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
     # Only chunkify the file if needed
     if chunks is None:
         with open(path, 'rb') as fd:
             chunks = []
             for chunk in chunkify(fd, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed):
                 chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
         cache.memorize_file(path_hash, st, [c[0] for c in chunks])
     item = {b'path': safe_path, b'chunks': chunks}
     item.update(self.stat_attrs(st, path))
     self.stats.nfiles += 1
     self.add_item(item)
Example #3
0
 def flush(self, flush=False):
     if self.buffer.tell() == 0:
         return
     self.buffer.seek(0)
     chunks = list(bytes(s) for s in chunkify(self.buffer, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed))
     self.buffer.seek(0)
     self.buffer.truncate(0)
     # Leave the last parital chunk in the buffer unless flush is True
     end = None if flush or len(chunks) == 1 else -1
     for chunk in chunks[:end]:
         self.chunks.append(self.write_chunk(chunk))
     if end == -1:
         self.buffer.write(chunks[-1])
Example #4
0
 def flush(self, flush=False):
     if self.buffer.tell() == 0:
         return
     self.buffer.seek(0)
     chunks = list(bytes(s) for s in chunkify(self.buffer, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed))
     self.buffer.seek(0)
     self.buffer.truncate(0)
     # Leave the last parital chunk in the buffer unless flush is True
     end = None if flush or len(chunks) == 1 else -1
     for chunk in chunks[:end]:
         self.chunks.append(self.write_chunk(chunk))
     if end == -1:
         self.buffer.write(chunks[-1])
Example #5
0
 def flush_items(self, flush=False):
     if self.items.tell() == 0:
         return
     self.items.seek(0)
     chunks = list(bytes(s) for s in chunkify(self.items, WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, self.key.chunk_seed))
     self.items.seek(0)
     self.items.truncate()
     for chunk in chunks[:-1]:
         id, _, _ = self.cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats)
         self.items_ids.append(id)
     if flush or len(chunks) == 1:
         id, _, _ = self.cache.add_chunk(self.key.id_hash(chunks[-1]), chunks[-1], self.stats)
         self.items_ids.append(id)
     else:
         self.items.write(chunks[-1])
Example #6
0
 def test_chunkify(self):
     data = b'0' * 1024 * 1024 * 15 + b'Y'
     parts = [bytes(c) for c in chunkify(BytesIO(data), 2, 0x3, 2, 0)]
     self.assert_equal(len(parts), 2)
     self.assert_equal(b''.join(parts), data)
     self.assert_equal(
         [bytes(c) for c in chunkify(BytesIO(b''), 2, 0x3, 2, 0)], [])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 0)
     ], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 1)
     ], [
         b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf',
         b'oobarb', b'oobaz'
     ])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 2)
     ], [
         b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar',
         b'boobaz'
     ])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 0)
     ], [b'foobarboobaz' * 3])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 1)
     ], [
         b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar',
         b'boobaz'
     ])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 2)
     ], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz'])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 0)
     ], [b'foobarboobaz' * 3])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 1)
     ], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
     self.assert_equal([
         bytes(c)
         for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 2)
     ], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])
Example #7
0
 def test_chunkify(self):
     data = b'0' * 1024 * 1024 * 15 + b'Y'
     parts = [bytes(c) for c in chunkify(BytesIO(data), 2, 0x3, 2, 0)]
     self.assert_equal(len(parts), 2)
     self.assert_equal(b''.join(parts), data)
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b''), 2, 0x3, 2, 0)], [])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 0)], [b'fooba', b'rboobaz', b'fooba', b'rboobaz', b'fooba', b'rboobaz'])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 1)], [b'fo', b'obarb', b'oob', b'azf', b'oobarb', b'oob', b'azf', b'oobarb', b'oobaz'])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 2, 0x3, 2, 2)], [b'foob', b'ar', b'boobazfoob', b'ar', b'boobazfoob', b'ar', b'boobaz'])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 0)], [b'foobarboobaz' * 3])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 1)], [b'foobar', b'boo', b'bazfo', b'obar', b'boo', b'bazfo', b'obar', b'boobaz'])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 3, 2)], [b'foo', b'barboobaz', b'foo', b'barboobaz', b'foo', b'barboobaz'])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 0)], [b'foobarboobaz' * 3])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 1)], [b'foobar', b'boobazfo', b'obar', b'boobazfo', b'obar', b'boobaz'])
     self.assert_equal([bytes(c) for c in chunkify(BytesIO(b'foobarboobaz' * 3), 3, 0x3, 4, 2)], [b'foob', b'arboobaz', b'foob', b'arboobaz', b'foob', b'arboobaz'])