def test_rebuild_partner_down(self): # create EC container headers = {'X-Storage-Policy': self.policy.name} client.put_container(self.url, self.token, self.container_name, headers=headers) # PUT object contents = Body() client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents) opart, onodes = self.object_ring.get_nodes(self.account, self.container_name, self.object_name) # find a primary server that only has one of it's devices in the # primary node list group_nodes_by_config = defaultdict(list) for n in onodes: group_nodes_by_config[self.config_number(n)].append(n) for config_number, node_list in group_nodes_by_config.items(): if len(node_list) == 1: break else: self.fail('ring balancing did not use all available nodes') primary_node = node_list[0] # pick one it's partners to fail randomly partner_node = random.choice( _get_partners(primary_node['index'], onodes)) # 507 the partner device device_path = self.device_dir('object', partner_node) self.kill_drive(device_path) # select another primary sync_to node to fail failed_primary = [ n for n in onodes if n['id'] not in (primary_node['id'], partner_node['id']) ][0] # ... capture it's fragment etag failed_primary_etag = self.direct_get(failed_primary, opart) # ... and delete it part_dir = self.storage_dir('object', failed_primary, part=opart) shutil.rmtree(part_dir, True) # reconstruct from the primary, while one of it's partners is 507'd self.reconstructor.once(number=self.config_number(primary_node)) # the other failed primary will get it's fragment rebuilt instead self.assertEqual(failed_primary_etag, self.direct_get(failed_primary, opart)) # just to be nice self.revive_drive(device_path)
def test_rebuild_partner_down(self): # create EC container headers = {'X-Storage-Policy': self.policy.name} client.put_container(self.url, self.token, self.container_name, headers=headers) # PUT object contents = Body() client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents) opart, onodes = self.object_ring.get_nodes( self.account, self.container_name, self.object_name) # find a primary server that only has one of it's devices in the # primary node list group_nodes_by_config = defaultdict(list) for n in onodes: group_nodes_by_config[self.config_number(n)].append(n) for config_number, node_list in group_nodes_by_config.items(): if len(node_list) == 1: break else: self.fail('ring balancing did not use all available nodes') primary_node = node_list[0] # pick one it's partners to fail randomly partner_node = random.choice(_get_partners( primary_node['index'], onodes)) # 507 the partner device device_path = self.device_dir('object', partner_node) self.kill_drive(device_path) # select another primary sync_to node to fail failed_primary = [n for n in onodes if n['id'] not in (primary_node['id'], partner_node['id'])][0] # ... capture it's fragment etag failed_primary_etag = self.direct_get(failed_primary, opart) # ... and delete it part_dir = self.storage_dir('object', failed_primary, part=opart) shutil.rmtree(part_dir, True) # reconstruct from the primary, while one of it's partners is 507'd self.reconstructor.once(number=self.config_number(primary_node)) # the other failed primary will get it's fragment rebuilt instead self.assertEqual(failed_primary_etag, self.direct_get(failed_primary, opart)) # just to be nice self.revive_drive(device_path)
def test_rebuild_partner_down(self): # find a primary server that only has one of it's devices in the # primary node list group_nodes_by_config = defaultdict(list) for n in self.onodes: group_nodes_by_config[self.config_number(n)].append(n) for config_number, node_list in group_nodes_by_config.items(): if len(node_list) == 1: break else: self.fail('ring balancing did not use all available nodes') primary_node = node_list[0] # pick one it's partners to fail randomly partner_node = random.choice( _get_partners(primary_node['index'], self.onodes)) # 507 the partner device device_path = self.device_dir('object', partner_node) self.kill_drive(device_path) # select another primary sync_to node to fail failed_primary = [ n for n in self.onodes if n['id'] not in (primary_node['id'], partner_node['id']) ][0] # ... capture it's fragment etag failed_primary_meta, failed_primary_etag = self.direct_get( failed_primary, self.opart) # ... and delete it part_dir = self.storage_dir('object', failed_primary, part=self.opart) shutil.rmtree(part_dir, True) # reconstruct from the primary, while one of it's partners is 507'd self.reconstructor.once(number=self.config_number(primary_node)) # the other failed primary will get it's fragment rebuilt instead failed_primary_meta_new, failed_primary_etag_new = self.direct_get( failed_primary, self.opart) del failed_primary_meta['Date'] del failed_primary_meta_new['Date'] self.assertEqual(failed_primary_etag, failed_primary_etag_new) self.assertEqual(failed_primary_meta, failed_primary_meta_new) # just to be nice self.revive_drive(device_path)
def test_rebuild_partner_down(self): # find a primary server that only has one of it's devices in the # primary node list group_nodes_by_config = defaultdict(list) for n in self.onodes: group_nodes_by_config[self.config_number(n)].append(n) for config_number, node_list in group_nodes_by_config.items(): if len(node_list) == 1: break else: self.fail('ring balancing did not use all available nodes') primary_node = node_list[0] # pick one it's partners to fail randomly partner_node = random.choice(_get_partners( primary_node['index'], self.onodes)) # 507 the partner device device_path = self.device_dir('object', partner_node) self.kill_drive(device_path) # select another primary sync_to node to fail failed_primary = [n for n in self.onodes if n['id'] not in (primary_node['id'], partner_node['id'])][0] # ... capture it's fragment etag failed_primary_meta, failed_primary_etag = self.direct_get( failed_primary, self.opart) # ... and delete it part_dir = self.storage_dir('object', failed_primary, part=self.opart) shutil.rmtree(part_dir, True) # reconstruct from the primary, while one of it's partners is 507'd self.reconstructor.once(number=self.config_number(primary_node)) # the other failed primary will get it's fragment rebuilt instead failed_primary_meta_new, failed_primary_etag_new = self.direct_get( failed_primary, self.opart) del failed_primary_meta['Date'] del failed_primary_meta_new['Date'] self.assertEqual(failed_primary_etag, failed_primary_etag_new) self.assertEqual(failed_primary_meta, failed_primary_meta_new) # just to be nice self.revive_drive(device_path)
def test_reconstruct_from_reverted_fragment_archive(self): headers = {'X-Storage-Policy': self.policy.name} client.put_container(self.url, self.token, self.container_name, headers=headers) # get our node lists opart, onodes = self.object_ring.get_nodes(self.account, self.container_name, self.object_name) # find a primary server that only has one of it's devices in the # primary node list group_nodes_by_config = defaultdict(list) for n in onodes: group_nodes_by_config[self.config_number(n)].append(n) for config_number, node_list in group_nodes_by_config.items(): if len(node_list) == 1: break else: self.fail('ring balancing did not use all available nodes') primary_node = node_list[0] # ... and 507 it's device primary_device = self.device_dir(primary_node) self.kill_drive(primary_device) # PUT object contents = Body() etag = client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents) self.assertEqual(contents.etag, etag) # fix the primary device and sanity GET self.revive_drive(primary_device) self.assertEqual(etag, self.proxy_get()) # find a handoff holding the fragment for hnode in self.object_ring.get_more_nodes(opart): try: reverted_fragment_etag = self.direct_get(hnode, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise else: break else: self.fail('Unable to find handoff fragment!') # we'll force the handoff device to revert instead of potentially # racing with rebuild by deleting any other fragments that may be on # the same server handoff_fragment_etag = None for node in onodes: if self.is_local_to(node, hnode): # we'll keep track of the etag of this fragment we're removing # in case we need it later (queue forshadowing music)... try: handoff_fragment_etag = self.direct_get(node, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise # this just means our handoff device was on the same # machine as the primary! continue # use the primary nodes device - not the hnode device part_dir = self.storage_dir(node, part=opart) shutil.rmtree(part_dir, True) # revert from handoff device with reconstructor self.reconstructor.once(number=self.config_number(hnode)) # verify fragment reverted to primary server self.assertEqual(reverted_fragment_etag, self.direct_get(primary_node, opart)) # now we'll remove some data on one of the primary node's partners partner = random.choice( reconstructor._get_partners(primary_node['index'], onodes)) try: rebuilt_fragment_etag = self.direct_get(partner, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise # partner already had it's fragment removed if (handoff_fragment_etag is not None and self.is_local_to(hnode, partner)): # oh, well that makes sense then... rebuilt_fragment_etag = handoff_fragment_etag else: # I wonder what happened? self.fail('Partner inexplicably missing fragment!') part_dir = self.storage_dir(partner, part=opart) shutil.rmtree(part_dir, True) # sanity, it's gone try: self.direct_get(partner, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise else: self.fail('successful GET of removed partner fragment archive!?') # and force the primary node to do a rebuild self.reconstructor.once(number=self.config_number(primary_node)) # and validate the partners rebuilt_fragment_etag try: self.assertEqual(rebuilt_fragment_etag, self.direct_get(partner, opart)) except direct_client.DirectClientException as err: if err.http_status != 404: raise else: self.fail('Did not find rebuilt fragment on partner node')
def test_reconstruct_from_reverted_fragment_archive(self): headers = {"X-Storage-Policy": self.policy.name} client.put_container(self.url, self.token, self.container_name, headers=headers) # get our node lists opart, onodes = self.object_ring.get_nodes(self.account, self.container_name, self.object_name) # find a primary server that only has one of it's devices in the # primary node list group_nodes_by_config = defaultdict(list) for n in onodes: group_nodes_by_config[self.config_number(n)].append(n) for config_number, node_list in group_nodes_by_config.items(): if len(node_list) == 1: break else: self.fail("ring balancing did not use all available nodes") primary_node = node_list[0] # ... and 507 it's device primary_device = self.device_dir("object", primary_node) self.kill_drive(primary_device) # PUT object contents = Body() etag = client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents) self.assertEqual(contents.etag, etag) # fix the primary device and sanity GET self.revive_drive(primary_device) self.assertEqual(etag, self.proxy_get()) # find a handoff holding the fragment for hnode in self.object_ring.get_more_nodes(opart): try: reverted_fragment_etag = self.direct_get(hnode, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise else: break else: self.fail("Unable to find handoff fragment!") # we'll force the handoff device to revert instead of potentially # racing with rebuild by deleting any other fragments that may be on # the same server handoff_fragment_etag = None for node in onodes: if self.is_local_to(node, hnode): # we'll keep track of the etag of this fragment we're removing # in case we need it later (queue forshadowing music)... try: handoff_fragment_etag = self.direct_get(node, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise # this just means our handoff device was on the same # machine as the primary! continue # use the primary nodes device - not the hnode device part_dir = self.storage_dir("object", node, part=opart) shutil.rmtree(part_dir, True) # revert from handoff device with reconstructor self.reconstructor.once(number=self.config_number(hnode)) # verify fragment reverted to primary server self.assertEqual(reverted_fragment_etag, self.direct_get(primary_node, opart)) # now we'll remove some data on one of the primary node's partners partner = random.choice(reconstructor._get_partners(primary_node["index"], onodes)) try: rebuilt_fragment_etag = self.direct_get(partner, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise # partner already had it's fragment removed if handoff_fragment_etag is not None and self.is_local_to(hnode, partner): # oh, well that makes sense then... rebuilt_fragment_etag = handoff_fragment_etag else: # I wonder what happened? self.fail("Partner inexplicably missing fragment!") part_dir = self.storage_dir("object", partner, part=opart) shutil.rmtree(part_dir, True) # sanity, it's gone try: self.direct_get(partner, opart) except direct_client.DirectClientException as err: if err.http_status != 404: raise else: self.fail("successful GET of removed partner fragment archive!?") # and force the primary node to do a rebuild self.reconstructor.once(number=self.config_number(primary_node)) # and validate the partners rebuilt_fragment_etag try: self.assertEqual(rebuilt_fragment_etag, self.direct_get(partner, opart)) except direct_client.DirectClientException as err: if err.http_status != 404: raise else: self.fail("Did not find rebuilt fragment on partner node")