def test_replicate_handoff_overwrites_old_version(self): ts = (server.diskfile.Timestamp(t) for t in itertools.count(int(time.time()))) source_device = '127.0.0.1:%s' % self.ports[0] target_port = self.ports[1] target_device = '127.0.0.1:%s' % target_port other_port = self.ports[2] other_device = '127.0.0.1:%s' % other_port # put an old copy on source old_ts = ts.next().internal self.put_object(source_device, 'obj1', body='old', timestamp=old_ts) # put a newer copy on the handoff expected = self.put_object(other_device, 'obj1', body='new', timestamp=ts.next().internal) # replicate to other servers self.daemon._replicate(other_device, policy=self.policy) result = self.get_object(source_device, 'obj1') self.assertEquals(expected, result) result = self.get_object(target_device, 'obj1') self.assertEquals(expected, result) # and now it's gone from handoff self.assertRaises(server.diskfile.DiskFileNotExist, self.get_object, other_device, 'obj1') # immediately after a push to the new node, the old version is still # on the drive head_markers = key_range_markers( server.diskfile.get_data_dir(self.policy)) self.assertEqual(2, len(self.client_map[self.ports[0]].getKeyRange( *head_markers).wait())) chunk_markers = key_range_markers('chunks') self.assertEqual(2, len(self.client_map[self.ports[0]].getKeyRange( *chunk_markers).wait())) # but after another replication pass the old copy is gone self.daemon._replicate(source_device, policy=self.policy) self.assertEqual(1, len(self.client_map[self.ports[0]].getKeyRange( *head_markers).wait())) self.assertEqual(1, len(self.client_map[self.ports[0]].getKeyRange( *chunk_markers).wait()))
def test_put_with_body(self): num_chunks = 10 # PUT headers = { 'x-timestamp': Timestamp(time.time()).internal, 'content-type': 'application/octet-stream', 'content-length': self.disk_chunk_size * num_chunks, 'x-backend-storage-policy-index': int(self.policy), } conn = http_connect(method='PUT', path='/a/c/o', headers=headers, **self.node) for i in range(num_chunks): conn.send(self.disk_chunk_size * chr(97 + i)) resp = conn.getresponse() self.assertEqual(resp.status, 201) resp.read() # GET path = quote('/%(device)s/%(partition)s' % self.node) + '/a/c/o' conn.putrequest('GET', path) conn.putheader('x-backend-storage-policy-index', int(self.policy)) conn.endheaders() resp = conn.getresponse() self.assertEqual(resp.status, 200) expected_body = ''.join(self.disk_chunk_size * chr(97 + i) for i in range(num_chunks)) self.assertEqual(resp.read(), expected_body) # sanity chunks object_dir = server.diskfile.get_data_dir(self.policy) keys = self.client.getKeyRange(*key_range_markers(object_dir)).wait() self.assertEqual(len(keys), 1) keys = self.client.getKeyRange(*key_range_markers('chunks')).wait() self.assertEqual(len(keys), num_chunks) tmp_dir = server.diskfile.get_tmp_dir(self.policy) keys = self.client.getKeyRange(*key_range_markers(tmp_dir)).wait() self.assertEqual(len(keys), 0)
def _cleanup_old_chunks(conn, policy): """ Look for old temp markers and remove orphaned chunks. """ temp_range = key_range_markers(diskfile.get_tmp_dir(policy)) for temp_marker in conn.iterKeyRange(*temp_range): # tmp.<hash>.<nonce>.<time>.<stamp> parts = temp_marker.split('.') timeout = float('.'.join(parts[3:5])) + CLENAUP_ABORT_UPLOAD_SECONDS if time.time() < timeout: continue # see if a head key exists for this nonce hash_range = key_range_markers('%s.%s' % ( diskfile.get_data_dir(policy), parts[1])) for key in conn.iterKeyRange(*hash_range): if split_key(key)['nonce'] == parts[2]: break else: # did not find matching head key chunk_marker = 'chunks.{1}.{2}'.format(*parts) chunk_range = key_range_markers(chunk_marker) conn.delete_keys(conn.iterKeyRange(*chunk_range)) conn.delete(temp_marker, force=True).wait()
def test_put_disconnect(self): num_chunks = 10 # PUT headers = { 'x-timestamp': Timestamp(time.time()).internal, 'content-type': 'application/octet-stream', 'content-length': self.disk_chunk_size * num_chunks, 'x-backend-storage-policy-index': int(self.policy), } conn = http_connect(method='PUT', path='/a/c/o', headers=headers, **self.node) for i in range(num_chunks - 1): conn.send(self.disk_chunk_size * chr(97 + i)) conn.close() # GET headers = { 'x-backend-storage-policy-index': int(self.policy), } conn = http_connect(method='GET', path='/a/c/o', headers=headers, **self.node) resp = conn.getresponse() self.assertEqual(resp.status, 404) # per-policy dirs object_dir = server.diskfile.get_data_dir(self.policy) tmp_dir = server.diskfile.get_tmp_dir(self.policy) # find old chunks keys = self.client.getKeyRange(*key_range_markers(object_dir)).wait() self.assertEqual(len(keys), 0) keys = self.client.getKeyRange(*key_range_markers('chunks')).wait() self.assertLessEqual(len(keys), num_chunks - 1) keys = self.client.getKeyRange(*key_range_markers(tmp_dir)).wait() self.assertEqual(len(keys), 1) # make sure the replicator won't clean up too quick replicator._cleanup_old_chunks(self.client, self.policy) # still have old chunks keys = self.client.getKeyRange(*key_range_markers(object_dir)).wait() self.assertEqual(len(keys), 0) keys = self.client.getKeyRange(*key_range_markers('chunks')).wait() self.assertLessEqual(len(keys), num_chunks - 1) keys = self.client.getKeyRange(*key_range_markers(tmp_dir)).wait() self.assertEqual(len(keys), 1) # ... but after awhile the_future = time.time() + (9 * 60 * 60) with mock.patch('time.time') as mock_time: mock_time.return_value = the_future replicator._cleanup_old_chunks(self.client, self.policy) # no more old chunks! keys = self.client.getKeyRange(*key_range_markers(object_dir)).wait() self.assertEqual(len(keys), 0) keys = self.client.getKeyRange(*key_range_markers('chunks')).wait() self.assertEqual(len(keys), 0) keys = self.client.getKeyRange(*key_range_markers(tmp_dir)).wait() self.assertEqual(len(keys), 0)
def find_temp_markers(): for key in conn.getKeyRange(*key_range_markers(tmp)).wait(): temp_markers.append(key)
def test_cleanup_orphaned_temp_markers(self): # use the first primary to keep the object from being replicated off port = self.ports[0] conn = self.client_map[port] dev = '127.0.0.1:%s' % port num_chunks = 3 # we're going to sniff for temp_markers during upload tmp = replicator.diskfile.get_tmp_dir(self.policy) temp_markers = [] def find_temp_markers(): for key in conn.getKeyRange(*key_range_markers(tmp)).wait(): temp_markers.append(key) # sanity check no temp markers find_temp_markers() self.assertFalse(temp_markers) # this is our test body def good_body(): for i in range(num_chunks): yield chr(97 + i) * self.mgr.disk_chunk_size # make a new object, and sniff for temp_markers as we go def sniffing_body(): for chunk in good_body(): yield chunk find_temp_markers() yield '' self.put_object(dev, 'obj1', body=sniffing_body()) # sanity object is fine metadata, body = self.get_object(dev, 'obj1') self.assertEqual(body, ''.join(good_body())) # temp_marker is of course cleaned up self.assertEqual(len(temp_markers), 1) temp_marker = temp_markers.pop(0) self.assertFalse(conn.get(temp_marker).wait()) # ... but even if we put it back to simulate a failure to remove the # temp_marker after a successful upload conn.put(temp_marker, '').wait() # replication will ignore it self.daemon._replicate(dev, policy=self.policy) expected = { 'chunks': 3, tmp: 1, } for marker, count in expected.items(): keys = conn.getKeyRange(*key_range_markers(marker)).wait() self.assertEqual(count, len(keys)) # ... and the object is uneffected self.assertEqual(self.get_object(dev, 'obj1'), (metadata, body)) # ... until sometime later, replication will just clean it up the_future = time.time() + (9 * 60 * 60) with mock.patch('time.time') as mock_time: mock_time.return_value = the_future self.daemon._replicate(dev, policy=self.policy) expected = { 'chunks': 3, tmp: 0, } for marker, count in expected.items(): keys = conn.getKeyRange(*key_range_markers(marker)).wait() self.assertEqual(count, len(keys)) # ... and the object is *still* uneffected self.assertEqual(self.get_object(dev, 'obj1'), (metadata, body))
def test_cleanup_aborted_uploads(self): port = random.choice(self.ports) conn = self.client_map[port] dev = '127.0.0.1:%s' % port num_chunks = 3 # first make a good obj1 diskfile def good_body(): for i in range(num_chunks): yield chr(97 + i) * self.mgr.disk_chunk_size self.put_object(dev, 'obj1', body=good_body()) # sanity metadata, body = self.get_object(dev, 'obj1') self.assertEqual(body, ''.join(good_body())) # sanity chunks keys = conn.getKeyRange('chunks.', 'chunks/').wait() self.assertEqual(3, len(keys)) # now upload another but blow up at the end def exploding_body(): for chunk in good_body(): yield chunk raise Exception('KABOOM!') try: self.put_object(dev, 'obj1', body=exploding_body()) except Exception: pass else: self.fail('exploding_body did not explode!') # can still read old version self.assertEqual(self.get_object(dev, 'obj1'), (metadata, body)) # ... but there's a few extra chunks extra_chunks = [] for key in conn.getKeyRange('chunks.', 'chunks/').wait(): if key in keys: continue extra_chunks.append(key) self.assertTrue(extra_chunks) hash_, nonce = extra_chunks[0].split('.')[1:3] # ... and a temp_marker tmp = replicator.diskfile.get_tmp_dir(self.policy) temp_markers = [] for key in conn.getKeyRange(*key_range_markers(tmp)).wait(): temp_markers.append(key) self.assertEqual(1, len(temp_markers)) # sanity # which points to the extra chunk keys by hash and nonce temp_hash, temp_nonce = temp_markers[0].split('.')[1:3] self.assertEqual(hash_, temp_hash) self.assertEqual(nonce, temp_nonce) extra_chunk_marker = 'chunks.%s.%s' % (hash_, nonce) # ... it'll stay like this until the aborted upload times out self.daemon._replicate(dev, policy=self.policy) expected = { extra_chunk_marker: len(extra_chunks), tmp: 1, } for marker, count in expected.items(): keys = conn.getKeyRange(*key_range_markers(marker)).wait() msg = 'expected %s %s keys, found %r' % (count, marker, keys) self.assertEqual(count, len(keys), msg) # ... but after awhile, next time it runs the_future = time.time() + (9 * 60 * 60) with mock.patch('time.time') as mock_time: mock_time.return_value = the_future self.daemon._replicate(dev, policy=self.policy) expected = { extra_chunk_marker: 0, tmp: 0, } for marker, count in expected.items(): keys = conn.getKeyRange(*key_range_markers(marker)).wait() msg = 'expected %s keys, found %r' % (count, keys) self.assertEqual(count, len(keys), msg)