Ejemplo n.º 1
0
    def test_rebuild_partner_down(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        client.put_object(self.url,
                          self.token,
                          self.container_name,
                          self.object_name,
                          contents=contents)

        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(
            _get_partners(primary_node['index'], onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [
            n for n in onodes
            if n['id'] not in (primary_node['id'], partner_node['id'])
        ][0]
        # ... capture it's fragment etag
        failed_primary_etag = self.direct_get(failed_primary, opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        self.assertEqual(failed_primary_etag,
                         self.direct_get(failed_primary, opart))

        # just to be nice
        self.revive_drive(device_path)
Ejemplo n.º 2
0
    def test_rebuild_partner_down(self):
        # create EC container
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url, self.token, self.container_name,
                             headers=headers)

        # PUT object
        contents = Body()
        client.put_object(self.url, self.token,
                          self.container_name,
                          self.object_name,
                          contents=contents)

        opart, onodes = self.object_ring.get_nodes(
            self.account, self.container_name, self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(_get_partners(
            primary_node['index'], onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [n for n in onodes if n['id'] not in
                          (primary_node['id'], partner_node['id'])][0]
        # ... capture it's fragment etag
        failed_primary_etag = self.direct_get(failed_primary, opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        self.assertEqual(failed_primary_etag,
                         self.direct_get(failed_primary, opart))

        # just to be nice
        self.revive_drive(device_path)
Ejemplo n.º 3
0
    def test_rebuild_partner_down(self):
        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in self.onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(
            _get_partners(primary_node['index'], self.onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [
            n for n in self.onodes
            if n['id'] not in (primary_node['id'], partner_node['id'])
        ][0]
        # ... capture it's fragment etag
        failed_primary_meta, failed_primary_etag = self.direct_get(
            failed_primary, self.opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=self.opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        failed_primary_meta_new, failed_primary_etag_new = self.direct_get(
            failed_primary, self.opart)
        del failed_primary_meta['Date']
        del failed_primary_meta_new['Date']
        self.assertEqual(failed_primary_etag, failed_primary_etag_new)
        self.assertEqual(failed_primary_meta, failed_primary_meta_new)

        # just to be nice
        self.revive_drive(device_path)
Ejemplo n.º 4
0
    def test_rebuild_partner_down(self):
        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in self.onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # pick one it's partners to fail randomly
        partner_node = random.choice(_get_partners(
            primary_node['index'], self.onodes))

        # 507 the partner device
        device_path = self.device_dir('object', partner_node)
        self.kill_drive(device_path)

        # select another primary sync_to node to fail
        failed_primary = [n for n in self.onodes if n['id'] not in
                          (primary_node['id'], partner_node['id'])][0]
        # ... capture it's fragment etag
        failed_primary_meta, failed_primary_etag = self.direct_get(
            failed_primary, self.opart)
        # ... and delete it
        part_dir = self.storage_dir('object', failed_primary, part=self.opart)
        shutil.rmtree(part_dir, True)

        # reconstruct from the primary, while one of it's partners is 507'd
        self.reconstructor.once(number=self.config_number(primary_node))

        # the other failed primary will get it's fragment rebuilt instead
        failed_primary_meta_new, failed_primary_etag_new = self.direct_get(
            failed_primary, self.opart)
        del failed_primary_meta['Date']
        del failed_primary_meta_new['Date']
        self.assertEqual(failed_primary_etag, failed_primary_etag_new)
        self.assertEqual(failed_primary_meta, failed_primary_meta_new)

        # just to be nice
        self.revive_drive(device_path)
Ejemplo n.º 5
0
    def test_reconstruct_from_reverted_fragment_archive(self):
        headers = {'X-Storage-Policy': self.policy.name}
        client.put_container(self.url,
                             self.token,
                             self.container_name,
                             headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account,
                                                   self.container_name,
                                                   self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail('ring balancing did not use all available nodes')
        primary_node = node_list[0]

        # ... and 507 it's device
        primary_device = self.device_dir(primary_node)
        self.kill_drive(primary_device)

        # PUT object
        contents = Body()
        etag = client.put_object(self.url,
                                 self.token,
                                 self.container_name,
                                 self.object_name,
                                 contents=contents)
        self.assertEqual(contents.etag, etag)

        # fix the primary device and sanity GET
        self.revive_drive(primary_device)
        self.assertEqual(etag, self.proxy_get())

        # find a handoff holding the fragment
        for hnode in self.object_ring.get_more_nodes(opart):
            try:
                reverted_fragment_etag = self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                if err.http_status != 404:
                    raise
            else:
                break
        else:
            self.fail('Unable to find handoff fragment!')

        # we'll force the handoff device to revert instead of potentially
        # racing with rebuild by deleting any other fragments that may be on
        # the same server
        handoff_fragment_etag = None
        for node in onodes:
            if self.is_local_to(node, hnode):
                # we'll keep track of the etag of this fragment we're removing
                # in case we need it later (queue forshadowing music)...
                try:
                    handoff_fragment_etag = self.direct_get(node, opart)
                except direct_client.DirectClientException as err:
                    if err.http_status != 404:
                        raise
                    # this just means our handoff device was on the same
                    # machine as the primary!
                    continue
                # use the primary nodes device - not the hnode device
                part_dir = self.storage_dir(node, part=opart)
                shutil.rmtree(part_dir, True)

        # revert from handoff device with reconstructor
        self.reconstructor.once(number=self.config_number(hnode))

        # verify fragment reverted to primary server
        self.assertEqual(reverted_fragment_etag,
                         self.direct_get(primary_node, opart))

        # now we'll remove some data on one of the primary node's partners
        partner = random.choice(
            reconstructor._get_partners(primary_node['index'], onodes))

        try:
            rebuilt_fragment_etag = self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            # partner already had it's fragment removed
            if (handoff_fragment_etag is not None
                    and self.is_local_to(hnode, partner)):
                # oh, well that makes sense then...
                rebuilt_fragment_etag = handoff_fragment_etag
            else:
                # I wonder what happened?
                self.fail('Partner inexplicably missing fragment!')
        part_dir = self.storage_dir(partner, part=opart)
        shutil.rmtree(part_dir, True)

        # sanity, it's gone
        try:
            self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
        else:
            self.fail('successful GET of removed partner fragment archive!?')

        # and force the primary node to do a rebuild
        self.reconstructor.once(number=self.config_number(primary_node))

        # and validate the partners rebuilt_fragment_etag
        try:
            self.assertEqual(rebuilt_fragment_etag,
                             self.direct_get(partner, opart))
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            else:
                self.fail('Did not find rebuilt fragment on partner node')
Ejemplo n.º 6
0
    def test_reconstruct_from_reverted_fragment_archive(self):
        headers = {"X-Storage-Policy": self.policy.name}
        client.put_container(self.url, self.token, self.container_name, headers=headers)

        # get our node lists
        opart, onodes = self.object_ring.get_nodes(self.account, self.container_name, self.object_name)

        # find a primary server that only has one of it's devices in the
        # primary node list
        group_nodes_by_config = defaultdict(list)
        for n in onodes:
            group_nodes_by_config[self.config_number(n)].append(n)
        for config_number, node_list in group_nodes_by_config.items():
            if len(node_list) == 1:
                break
        else:
            self.fail("ring balancing did not use all available nodes")
        primary_node = node_list[0]

        # ... and 507 it's device
        primary_device = self.device_dir("object", primary_node)
        self.kill_drive(primary_device)

        # PUT object
        contents = Body()
        etag = client.put_object(self.url, self.token, self.container_name, self.object_name, contents=contents)
        self.assertEqual(contents.etag, etag)

        # fix the primary device and sanity GET
        self.revive_drive(primary_device)
        self.assertEqual(etag, self.proxy_get())

        # find a handoff holding the fragment
        for hnode in self.object_ring.get_more_nodes(opart):
            try:
                reverted_fragment_etag = self.direct_get(hnode, opart)
            except direct_client.DirectClientException as err:
                if err.http_status != 404:
                    raise
            else:
                break
        else:
            self.fail("Unable to find handoff fragment!")

        # we'll force the handoff device to revert instead of potentially
        # racing with rebuild by deleting any other fragments that may be on
        # the same server
        handoff_fragment_etag = None
        for node in onodes:
            if self.is_local_to(node, hnode):
                # we'll keep track of the etag of this fragment we're removing
                # in case we need it later (queue forshadowing music)...
                try:
                    handoff_fragment_etag = self.direct_get(node, opart)
                except direct_client.DirectClientException as err:
                    if err.http_status != 404:
                        raise
                    # this just means our handoff device was on the same
                    # machine as the primary!
                    continue
                # use the primary nodes device - not the hnode device
                part_dir = self.storage_dir("object", node, part=opart)
                shutil.rmtree(part_dir, True)

        # revert from handoff device with reconstructor
        self.reconstructor.once(number=self.config_number(hnode))

        # verify fragment reverted to primary server
        self.assertEqual(reverted_fragment_etag, self.direct_get(primary_node, opart))

        # now we'll remove some data on one of the primary node's partners
        partner = random.choice(reconstructor._get_partners(primary_node["index"], onodes))

        try:
            rebuilt_fragment_etag = self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            # partner already had it's fragment removed
            if handoff_fragment_etag is not None and self.is_local_to(hnode, partner):
                # oh, well that makes sense then...
                rebuilt_fragment_etag = handoff_fragment_etag
            else:
                # I wonder what happened?
                self.fail("Partner inexplicably missing fragment!")
        part_dir = self.storage_dir("object", partner, part=opart)
        shutil.rmtree(part_dir, True)

        # sanity, it's gone
        try:
            self.direct_get(partner, opart)
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
        else:
            self.fail("successful GET of removed partner fragment archive!?")

        # and force the primary node to do a rebuild
        self.reconstructor.once(number=self.config_number(primary_node))

        # and validate the partners rebuilt_fragment_etag
        try:
            self.assertEqual(rebuilt_fragment_etag, self.direct_get(partner, opart))
        except direct_client.DirectClientException as err:
            if err.http_status != 404:
                raise
            else:
                self.fail("Did not find rebuilt fragment on partner node")