def wait_for_claim(ocp_node, pvc_name, timeout=60, interval=2):
    """Wait for a claim to be created & bound up to the given timeout.
    """
    for w in Waiter(timeout, interval):
        sts = oc_get_pvc(ocp_node, pvc_name)
        if sts and sts.get('status', {}).get('phase') == 'Bound':
            return sts
    raise AssertionError('wait_for_claim on pvc %s timed out' % (pvc_name, ))
def wait_for_sc_unused(ocp_node, sc_name, timeout=60, interval=1):
    for w in Waiter(timeout, interval):
        sts = oc_get_all_pvs(ocp_node)
        items = (sts and sts.get('items')) or []
        if not any(
                i.get('spec', {}).get('storageClassName') == sc_name
                for i in items):
            return
    raise AssertionError('wait_for_sc_unused on %s timed out' % (sc_name, ))
Ejemplo n.º 3
0
    def test_dynamic_provisioning_glusterfile_glusterpod_failure(self):
        """Validate dynamic provisioning for gluster file when gluster pod down
        """
        mount_path = "/mnt"
        datafile_path = '%s/fake_file_for_%s' % (mount_path, self.id())

        # Create secret and storage class
        self.create_storage_class()

        # Create PVC
        pvc_name = self.create_and_wait_for_pvc()

        # Create app POD with attached volume
        pod_name = oc_create_tiny_pod_with_volume(
            self.node, pvc_name, "test-pvc-mount-on-app-pod",
            mount_path=mount_path)
        self.addCleanup(
            wait_for_resource_absence, self.node, 'pod', pod_name)
        self.addCleanup(oc_delete, self.node, 'pod', pod_name)

        # Wait for app POD be up and running
        wait_for_pod_be_ready(
            self.node, pod_name, timeout=60, wait_step=2)

        # Run IO in background
        io_cmd = "oc rsh %s dd if=/dev/urandom of=%s bs=1000K count=900" % (
            pod_name, datafile_path)
        async_io = g.run_async(self.node, io_cmd, "root")

        # Pick up one of the hosts which stores PV brick (4+ nodes case)
        gluster_pod_data = get_gluster_pod_names_by_pvc_name(
            self.node, pvc_name)[0]

        # Delete glusterfs POD from chosen host and wait for spawn of new one
        oc_delete(self.node, 'pod', gluster_pod_data["pod_name"])
        cmd = ("oc get pods -o wide | grep glusterfs | grep %s | "
               "grep -v Terminating | awk '{print $1}'") % (
                   gluster_pod_data["host_name"])
        for w in Waiter(600, 30):
            out = self.cmd_run(cmd)
            new_gluster_pod_name = out.strip().split("\n")[0].strip()
            if not new_gluster_pod_name:
                continue
            else:
                break
        if w.expired:
            error_msg = "exceeded timeout, new gluster pod not created"
            g.log.error(error_msg)
            raise ExecutionError(error_msg)
        new_gluster_pod_name = out.strip().split("\n")[0].strip()
        g.log.info("new gluster pod name is %s" % new_gluster_pod_name)
        wait_for_pod_be_ready(self.node, new_gluster_pod_name)

        # Check that async IO was not interrupted
        ret, out, err = async_io.async_communicate()
        self.assertEqual(ret, 0, "IO %s failed on %s" % (io_cmd, self.node))
 def wait_to_settle(self, timeout=120, interval=1):
     # This was originally going to be a tearDown, but oddly enough
     # tearDown is called *before* the cleanup functions, so it
     # could never succeed. This needs to be added as a cleanup
     # function first so that we run after our test's other cleanup
     # functions but before we go on to the next test in order
     # to prevent the async cleanups in kubernetes from steping
     # on the next test's "toes".
     for w in Waiter(timeout):
         nvols = self._count_vols()
         if nvols == self.volcount:
             return
     raise AssertionError('wait for volume count to settle timed out')
Ejemplo n.º 5
0
    def _node_reboot(self):
        storage_hostname = (
            g.config["gluster_servers"][self.gluster_servers[0]]["storage"])

        cmd = "sleep 3; /sbin/shutdown -r now 'Reboot triggered by Glusto'"
        ret, out, err = g.run(storage_hostname, cmd)

        self.addCleanup(self._wait_for_gluster_pod_to_be_ready)

        if ret != 255:
            err_msg = "failed to reboot host %s error: %s" % (storage_hostname,
                                                              err)
            g.log.error(err_msg)
            raise AssertionError(err_msg)

        try:
            g.ssh_close_connection(storage_hostname)
        except Exception as e:
            g.log.error("failed to close connection with host %s"
                        " with error: %s" % (storage_hostname, e))
            raise

        # added sleep as node will restart after 3 sec
        time.sleep(3)

        for w in Waiter(timeout=600, interval=10):
            try:
                if g.rpyc_get_connection(storage_hostname, user="******"):
                    g.rpyc_close_connection(storage_hostname, user="******")
                    break
            except Exception as err:
                g.log.info("exception while getting connection: '%s'" % err)

        if w.expired:
            error_msg = ("exceeded timeout 600 sec, node '%s' is "
                         "not reachable" % storage_hostname)
            g.log.error(error_msg)
            raise ExecutionError(error_msg)

        # wait for the gluster pod to be in 'Running' state
        self._wait_for_gluster_pod_to_be_ready()

        # glusterd and gluster-blockd service should be up and running
        service_names = ("glusterd", "gluster-blockd", "tcmu-runner")
        for gluster_pod in self.gluster_pod_list:
            for service in service_names:
                g.log.info("gluster_pod - '%s' : gluster_service '%s'" %
                           (gluster_pod, service))
                check_service_status(self.oc_node, gluster_pod, service,
                                     "running")
 def background_ops():
     subname = make_unique_label(short_tc_name)
     for i, w in enumerate(Waiter(60 * 60)):
         time.sleep(random.randint(1, 10) * 0.1)
         c = ClaimInfo(name='{}-{}'.format(subname, i),
                       storageclass=tname,
                       size=2)
         c.create_pvc(ocp_node)
         time.sleep(1)
         c.update_pvc_info(ocp_node, timeout=120)
         c.update_pv_info(ocp_node)
         time.sleep(random.randint(1, 10) * 0.1)
         c.delete_pvc(ocp_node)
         if done.is_set():
             break
Ejemplo n.º 7
0
    def _wait_for_gluster_pod_to_be_ready(self):
        for gluster_pod in self.gluster_pod_list:
            for w in Waiter(timeout=600, interval=10):
                try:
                    success = wait_for_pod_be_ready(self.oc_node,
                                                    gluster_pod,
                                                    timeout=1,
                                                    wait_step=1)
                    if success:
                        break
                except ExecutionError as e:
                    g.log.info("exception %s while validating gluster "
                               "pod %s" % (e, gluster_pod))

            if w.expired:
                error_msg = ("exceeded timeout 600 sec, pod '%s' is "
                             "not in 'running' state" % gluster_pod)
                g.log.error(error_msg)
                raise ExecutionError(error_msg)