Beispiel #1
0
    def revert_snapshot(self,
                        name,
                        skip_timesync=False,
                        skip_slaves_check=False):
        if not self.d_env.has_snapshot(name):
            return False

        logger.info('We have snapshot with such name: {:s}'.format(name))

        logger.info("Reverting the snapshot '{0}' ....".format(name))
        self.d_env.revert(name)

        logger.info("Resuming the snapshot '{0}' ....".format(name))
        self.resume_environment()

        if not skip_timesync:
            self.sync_time()
        try:
            _wait(self.fuel_web.client.get_releases,
                  expected=EnvironmentError,
                  timeout=300)
        except exceptions.Unauthorized:
            self.set_admin_keystone_password()
            self.fuel_web.get_nailgun_version()

        if not skip_slaves_check:
            _wait(lambda: self.check_slaves_are_ready(), timeout=60 * 6)
        return True
Beispiel #2
0
    def revert_snapshot(self, name, skip_timesync=False,
                        skip_slaves_check=False):
        if not self.d_env.has_snapshot(name):
            return False

        logger.info('We have snapshot with such name: {:s}'.format(name))

        logger.info("Reverting the snapshot '{0}' ....".format(name))
        self.d_env.revert(name)

        logger.info("Resuming the snapshot '{0}' ....".format(name))
        self.resume_environment()

        if not skip_timesync:
            self.sync_time()
        try:
            _wait(self.fuel_web.client.get_releases,
                  expected=EnvironmentError, timeout=300)
        except exceptions.Unauthorized:
            self.set_admin_keystone_password()
            self.fuel_web.get_nailgun_version()

        if not skip_slaves_check:
            _wait(lambda: self.check_slaves_are_ready(), timeout=60 * 6)
        return True
Beispiel #3
0
def check_mysql(remote, node_name):
    check_cmd = 'pkill -0 -x mysqld'
    check_crm_cmd = ('crm resource status clone_p_mysql |'
                     ' grep -q "is running on: $HOSTNAME"')
    check_galera_cmd = ("mysql --connect_timeout=5 -sse \"SELECT"
                        " VARIABLE_VALUE FROM"
                        " information_schema.GLOBAL_STATUS"
                        " WHERE VARIABLE_NAME"
                        " = 'wsrep_local_state_comment';\"")
    try:
        wait(lambda: remote.execute(check_cmd)['exit_code'] == 0,
             timeout=300)
        logger.info('MySQL daemon is started on {0}'.format(node_name))
    except TimeoutError:
        logger.error('MySQL daemon is down on {0}'.format(node_name))
        raise
    _wait(lambda: assert_equal(remote.execute(check_crm_cmd)['exit_code'], 0,
                               'MySQL resource is NOT running on {0}'.format(
                                   node_name)), timeout=60)
    try:
        wait(lambda: ''.join(remote.execute(
            check_galera_cmd)['stdout']).rstrip() == 'Synced', timeout=600)
    except TimeoutError:
        logger.error('galera status is {0}'.format(''.join(remote.execute(
            check_galera_cmd)['stdout']).rstrip()))
        raise
Beispiel #4
0
def check_mysql(remote, node_name):
    check_cmd = 'pkill -0 -x mysqld'
    check_crm_cmd = ('crm resource status clone_p_mysql |'
                     ' grep -q "is running on: $HOSTNAME"')
    check_galera_cmd = ("mysql --connect_timeout=5 -sse \"SELECT"
                        " VARIABLE_VALUE FROM"
                        " information_schema.GLOBAL_STATUS"
                        " WHERE VARIABLE_NAME"
                        " = 'wsrep_local_state_comment';\"")
    try:
        wait(lambda: remote.execute(check_cmd)['exit_code'] == 0,
             timeout=10 * 60)
        logger.info('MySQL daemon is started on {0}'.format(node_name))
    except TimeoutError:
        logger.error('MySQL daemon is down on {0}'.format(node_name))
        raise
    _wait(lambda: assert_equal(
        remote.execute(check_crm_cmd)['exit_code'], 0,
        'MySQL resource is NOT running on {0}'.format(node_name)),
          timeout=60)
    try:
        wait(lambda: ''.join(remote.execute(check_galera_cmd)['stdout']).
             rstrip() == 'Synced',
             timeout=600)
    except TimeoutError:
        logger.error('galera status is {0}'.format(''.join(
            remote.execute(check_galera_cmd)['stdout']).rstrip()))
        raise
    def rollback_automatically_ha_env(self):
        """Rollback manually simple deployed cluster

        Scenario:
            1. Revert snapshot with simple neutron gre ha env
            2. Add raise exception to openstack.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Add 1 cinder node and re-deploy cluster
            7. Run OSTF

        """
        if not self.env.get_virtual_environment().has_snapshot(
                'deploy_neutron_gre_ha'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre_ha")
        cluster_id = self.fuel_web.get_last_created_cluster()
        checkers.upload_tarball(self.env.get_admin_remote(),
                                hlp_data.TARBALL_PATH, '/var')
        checkers.check_tarball_exists(self.env.get_admin_remote(),
                                      os.path.basename(hlp_data.
                                                       TARBALL_PATH),
                                      '/var')
        checkers.untar(self.env.get_admin_remote(),
                       os.path.basename(hlp_data.
                                        TARBALL_PATH), '/var')
        self.fuel_web.modify_python_file(self.env.get_admin_remote(),
                                         "61i \ \ \ \ \ \ \ \ raise errors."
                                         "ExecutedErrorNonZeroExitCode('{0}')"
                                         .format('Some bad error'),
                                         '/var/upgrade/site-packages/'
                                         'fuel_upgrade/engines/'
                                         'openstack.py')
        checkers.run_script(self.env.get_admin_remote(), '/var', 'upgrade.sh',
                            password=
                            hlp_data.KEYSTONE_CREDS['password'],
                            rollback=True, exit_code=255)
        checkers.wait_rollback_is_done(self.env.get_admin_remote(), 3000)
        checkers.check_upgraded_containers(self.env.get_admin_remote(),
                                           hlp_data.UPGRADE_FUEL_TO,
                                           hlp_data.UPGRADE_FUEL_FROM)
        logger.debug("all containers are ok")
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.nodes().slaves[0]), timeout=120)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(self.env.nodes().slaves[:5])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)

        self.env.bootstrap_nodes(self.env.nodes().slaves[5:6])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-06': ['cinder']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id)

        self.env.make_snapshot("rollback_automatic_ha")
Beispiel #6
0
    def ha_pacemaker_restart_heat_engine(self):
        if not self.env.d_env.has_snapshot(self.snapshot_name):
            raise SkipTest()

        self.env.revert_snapshot(self.snapshot_name)
        ocf_success = "DEBUG: OpenStack Orchestration Engine" \
                      " (heat-engine) monitor succeeded"
        ocf_error = "ERROR: OpenStack Heat Engine is not connected to the" \
                    " AMQP server: AMQP connection test returned 1"

        heat_name = 'heat-engine'

        ocf_status = \
            'script -q -c "OCF_ROOT=/usr/lib/ocf' \
            ' /usr/lib/ocf/resource.d/fuel/{0}' \
            ' monitor 2>&1"'.format(heat_name)

        remote = self.fuel_web.get_ssh_for_node(
            self.env.d_env.nodes().slaves[0].name)
        pid = ''.join(remote.execute('pgrep heat-engine')['stdout'])
        get_ocf_status = ''.join(
            remote.execute(ocf_status)['stdout']).rstrip()
        assert_true(ocf_success in get_ocf_status,
                    "heat engine is not succeeded, status is {0}".format(
                        get_ocf_status))
        assert_true(len(remote.execute(
            "netstat -nap | grep {0} | grep :5673".
            format(pid))['stdout']) > 0, 'There is no amqp connections')
        remote.execute("iptables -I OUTPUT 1 -m owner --uid-owner heat -m"
                       " state --state NEW,ESTABLISHED,RELATED -j DROP")

        cmd = "netstat -nap | grep {0} | grep :5673".format(pid)
        wait(lambda: len(remote.execute(cmd)['stdout']) == 0, timeout=300)

        get_ocf_status = ''.join(
            remote.execute(ocf_status)['stdout']).rstrip()
        logger.info('ocf status after blocking is {0}'.format(
            get_ocf_status))
        assert_true(ocf_error in get_ocf_status,
                    "heat engine is running, status is {0}".format(
                        get_ocf_status))

        remote.execute("iptables -D OUTPUT 1 -m owner --uid-owner heat -m"
                       " state --state NEW,ESTABLISHED,RELATED")
        _wait(lambda: assert_true(ocf_success in ''.join(
            remote.execute(ocf_status)['stdout']).rstrip()), timeout=240)
        newpid = ''.join(remote.execute('pgrep heat-engine')['stdout'])
        assert_true(pid != newpid, "heat pid is still the same")
        get_ocf_status = ''.join(remote.execute(
            ocf_status)['stdout']).rstrip()
        assert_true(ocf_success in get_ocf_status,
                    "heat engine is not succeeded, status is {0}".format(
                        get_ocf_status))
        assert_true(len(
            remote.execute("netstat -nap | grep {0} | grep :5673".format(
                newpid))['stdout']) > 0)
        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.run_ostf(cluster_id=cluster_id)
Beispiel #7
0
 def assertClusterReady(self, node_name, smiles_count,
                        networks_count=1, timeout=300):
     _wait(
         lambda: self.get_cluster_status(
             self.get_node_by_devops_node(
                 self.ci().environment().node_by_name(node_name))['ip'],
             smiles_count=smiles_count,
             networks_count=networks_count),
         timeout=timeout)
Beispiel #8
0
    def rollback_automatically_ha_env(self):
        """Rollback manually simple deployed cluster

        Scenario:
            1. Revert snapshot with simple neutron gre ha env
            2. Add raise exception to openstack.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Add 1 cinder node and re-deploy cluster
            7. Run OSTF

        """
        if not self.env.get_virtual_environment().has_snapshot(
                'deploy_neutron_gre_ha'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre_ha")
        cluster_id = self.fuel_web.get_last_created_cluster()
        checkers.upload_tarball(self.env.get_admin_remote(),
                                hlp_data.TARBALL_PATH, '/var')
        checkers.check_tarball_exists(self.env.get_admin_remote(),
                                      os.path.basename(hlp_data.
                                                       TARBALL_PATH),
                                      '/var')
        checkers.untar(self.env.get_admin_remote(),
                       os.path.basename(hlp_data.
                                        TARBALL_PATH), '/var')
        self.fuel_web.modify_python_file(self.env.get_admin_remote(),
                                         "2i \ \ \ \ 2014.2-6.0: blah-blah",
                                         '/var/upgrade/releases/'
                                         'metadata.yaml')
        checkers.run_script(self.env.get_admin_remote(), '/var', 'upgrade.sh',
                            password=
                            hlp_data.KEYSTONE_CREDS['password'],
                            rollback=True, exit_code=255)
        checkers.wait_rollback_is_done(self.env.get_admin_remote(), 3000)
        checkers.check_upgraded_containers(self.env.get_admin_remote(),
                                           hlp_data.UPGRADE_FUEL_TO,
                                           hlp_data.UPGRADE_FUEL_FROM)
        logger.debug("all containers are ok")
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.nodes().slaves[0]), timeout=120)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(self.env.nodes().slaves[:5])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)

        self.env.bootstrap_nodes(self.env.nodes().slaves[5:6])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-06': ['cinder']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id)

        self.env.make_snapshot("rollback_automatic_ha")
Beispiel #9
0
 def assert_cluster_ready(self,
                          node_name,
                          smiles_count,
                          networks_count=1,
                          timeout=300):
     logger.info('Assert cluster services are UP')
     remote = self.environment.get_ssh_to_remote_by_name(node_name)
     _wait(lambda: self.get_cluster_status(
         remote, smiles_count=smiles_count, networks_count=networks_count),
           timeout=timeout)
Beispiel #10
0
 def assert_cluster_ready(self, node_name, smiles_count,
                          networks_count=1, timeout=300):
     logger.info('Assert cluster services are UP')
     remote = self.environment.get_ssh_to_remote_by_name(node_name)
     _wait(
         lambda: self.get_cluster_status(
             remote,
             smiles_count=smiles_count,
             networks_count=networks_count),
         timeout=timeout)
Beispiel #11
0
 def assertClusterReady(self,
                        node_name,
                        smiles_count,
                        networks_count=1,
                        timeout=300):
     _wait(lambda: self.get_cluster_status(self.get_node_by_devops_node(
         self.ci().environment().node_by_name(node_name))['ip'],
                                           smiles_count=smiles_count,
                                           networks_count=networks_count),
           timeout=timeout)
Beispiel #12
0
 def assert_cluster_ready(self, node_name, smiles_count, networks_count=1, timeout=300):
     remote = self.environment.get_ssh_to_remote(
         self.get_nailgun_node_by_devops_node(self.environment.get_virtual_environment().node_by_name(node_name))[
             "ip"
         ]
     )
     _wait(
         lambda: self.get_cluster_status(remote, smiles_count=smiles_count, networks_count=networks_count),
         timeout=timeout,
     )
Beispiel #13
0
    def rollback_automatically_delete_node(self):
        """Rollback automatically ha one controller deployed cluster
           and delete node from cluster

        Scenario:
            1. Revert snapshot with deploy Neutron GRE 6.1 env
            2. Add raise exception to docker_engine.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Delete 1 node and re-deploy cluster
            9. Run OSTF

        """
        # TODO(ddmitriev): change snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre")
        cluster_id = self.fuel_web.get_last_created_cluster()

        self.env.admin_actions.upgrade_master_node(rollback=True)

        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]),
              timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
        nailgun_nodes = self.fuel_web.update_nodes(
            cluster_id, {'slave-03': ['compute', 'cinder']}, False, True)
        task = self.fuel_web.deploy_cluster(cluster_id)
        self.fuel_web.assert_task_success(task)
        nodes = filter(lambda x: x["pending_deletion"] is True, nailgun_nodes)
        try:
            wait(lambda: len(self.fuel_web.client.list_nodes()) == 3,
                 timeout=5 * 60)
        except TimeoutError:
            assert_true(
                len(self.fuel_web.client.list_nodes()) == 3,
                'Node {0} is not discovered in timeout 10 *60'.format(
                    nodes[0]))
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'],
                               should_fail=1)

        self.env.make_snapshot("rollback_automatically_delete_node")
Beispiel #14
0
    def rollback_automatically_ha_one_controller(self):
        """Rollback automatically ha one controller deployed cluster

        Scenario:
            1. Revert snapshot with deploy Neutron VXLAN env
            2. Add raise exception to docker_engine.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Add 1 ceph node and re-deploy cluster
            9. Run OSTF

        """
        if not self.env.d_env.has_snapshot('ceph_ha_one_controller_compact'):
            raise SkipTest()

        self.env.revert_snapshot('ceph_ha_one_controller_compact')
        cluster_id = self.fuel_web.get_last_created_cluster()

        _ip = self.fuel_web.get_nailgun_node_by_name('slave-01')['ip']
        with self.env.d_env.get_ssh_to_remote(_ip) as remote:
            expected_kernel = UpgradeFuelMaster.get_slave_kernel(remote)

        self.env.admin_actions.upgrade_master_node(rollback=True)

        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[3:4])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-04': ['ceph-osd']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        if hlp_data.OPENSTACK_RELEASE_UBUNTU in hlp_data.OPENSTACK_RELEASE:
            _ip = self.fuel_web.get_nailgun_node_by_name('slave-04')['ip']
            with self.env.d_env.get_ssh_to_remote(_ip) as remote:
                kernel = UpgradeFuelMaster.get_slave_kernel(remote)
            checkers.check_kernel(kernel, expected_kernel)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.make_snapshot("rollback_automatically_ha_one_controller")
Beispiel #15
0
    def rollback_automatically_ha_one_controller(self):
        """Rollback automatically ha one controller deployed cluster

        Scenario:
            1. Revert snapshot with deploy Neutron VXLAN env
            2. Add raise exception to docker_engine.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Add 1 ceph node and re-deploy cluster
            9. Run OSTF

        """
        if not self.env.d_env.has_snapshot('ceph_ha_one_controller_compact'):
            raise SkipTest()

        self.env.revert_snapshot('ceph_ha_one_controller_compact')
        cluster_id = self.fuel_web.get_last_created_cluster()

        _ip = self.fuel_web.get_nailgun_node_by_name('slave-01')['ip']
        with self.env.d_env.get_ssh_to_remote(_ip) as remote:
            expected_kernel = UpgradeFuelMaster.get_slave_kernel(remote)

        self.env.admin_actions.upgrade_master_node(rollback=True)

        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[3:4])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-04': ['ceph-osd']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        if hlp_data.OPENSTACK_RELEASE_UBUNTU in hlp_data.OPENSTACK_RELEASE:
            _ip = self.fuel_web.get_nailgun_node_by_name('slave-04')['ip']
            with self.env.d_env.get_ssh_to_remote(_ip) as remote:
                kernel = UpgradeFuelMaster.get_slave_kernel(remote)
            checkers.check_kernel(kernel, expected_kernel)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.make_snapshot("rollback_automatically_ha_one_controller")
Beispiel #16
0
    def rollback_automatically_delete_node(self):
        """Rollback automatically ha one controller deployed cluster
           and delete node from cluster

        Scenario:
            1. Revert snapshot with deploy Neutron GRE 6.1 env
            2. Add raise exception to docker_engine.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Delete 1 node and re-deploy cluster
            9. Run OSTF

        """
        # TODO(ddmitriev): change snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre")
        cluster_id = self.fuel_web.get_last_created_cluster()

        self.env.admin_actions.upgrade_master_node(rollback=True)

        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
        nailgun_nodes = self.fuel_web.update_nodes(
            cluster_id, {'slave-03': ['compute', 'cinder']}, False, True)
        task = self.fuel_web.deploy_cluster(cluster_id)
        self.fuel_web.assert_task_success(task)
        nodes = filter(lambda x: x["pending_deletion"] is True, nailgun_nodes)
        try:
            wait(lambda: len(self.fuel_web.client.list_nodes()) == 3,
                 timeout=5 * 60)
        except TimeoutError:
            assert_true(len(self.fuel_web.client.list_nodes()) == 3,
                        'Node {0} is not discovered in timeout 10 *60'.format(
                            nodes[0]))
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'],
                               should_fail=1)

        self.env.make_snapshot("rollback_automatically_delete_node")
Beispiel #17
0
    def revert_snapshot(self, name):
        if self.get_virtual_environment().has_snapshot(name):
            logger.info('We have snapshot with such name %s' % name)

            self.get_virtual_environment().revert(name)
            logger.info('Starting snapshot reverting ....')

            self.get_virtual_environment().resume()
            logger.info('Starting snapshot resuming ...')

            admin = self.nodes().admin

            try:
                admin. await (self.admin_net, timeout=10 * 60, by_port=8000)
            except Exception as e:
                logger.warning("From first time admin isn't reverted: "
                               "{0}".format(e))
                admin.destroy()
                logger.info('Admin node was destroyed. Wait 10 sec.')
                time.sleep(10)
                self.get_virtual_environment().start(self.nodes().admins)
                logger.info('Admin node started second time.')
                self.nodes().admin. await (self.admin_net,
                                           timeout=10 * 60,
                                           by_port=8000)

            self.set_admin_ssh_password()
            try:
                _wait(self._fuel_web.client.get_releases,
                      expected=EnvironmentError,
                      timeout=300)
            except exceptions.Unauthorized:
                self.set_admin_keystone_password()
                self._fuel_web.get_nailgun_version()

            self.sync_time_admin_node()

            for node in self.nodes().slaves:
                if not node.driver.node_active(node):
                    continue
                try:
                    logger.info("Sync time on revert for node %s" % node.name)
                    self.sync_node_time(
                        self.get_ssh_to_remote_by_name(node.name))
                except Exception as e:
                    logger.warning(
                        'Exception caught while trying to sync time on {0}:'
                        ' {1}'.format(node.name, e))
                self.run_nailgun_agent(
                    self.get_ssh_to_remote_by_name(node.name))
            return True
        return False
Beispiel #18
0
    def revert_snapshot(self, name):
        if self.get_virtual_environment().has_snapshot(name):
            logger.info('We have snapshot with such name %s' % name)

            self.get_virtual_environment().revert(name)
            logger.info('Starting snapshot reverting ....')

            self.get_virtual_environment().resume()
            logger.info('Starting snapshot resuming ...')

            admin = self.nodes().admin

            try:
                admin.await(
                    self.admin_net, timeout=10 * 60, by_port=8000)
            except Exception as e:
                logger.warning("From first time admin isn't reverted: "
                               "{0}".format(e))
                admin.destroy()
                logger.info('Admin node was destroyed. Wait 10 sec.')
                time.sleep(10)
                self.get_virtual_environment().start(self.nodes().admins)
                logger.info('Admin node started second time.')
                self.nodes().admin.await(
                    self.admin_net, timeout=10 * 60, by_port=8000)

            self.set_admin_ssh_password()
            try:
                _wait(self._fuel_web.client.get_releases,
                      expected=EnvironmentError, timeout=300)
            except exceptions.Unauthorized:
                self.set_admin_keystone_password()
                self._fuel_web.get_nailgun_version()

            self.sync_time_admin_node()

            for node in self.nodes().slaves:
                if not node.driver.node_active(node):
                    continue
                try:
                    logger.info("Sync time on revert for node %s" % node.name)
                    self.sync_node_time(
                        self.get_ssh_to_remote_by_name(node.name))
                except Exception as e:
                    logger.warning(
                        'Exception caught while trying to sync time on {0}:'
                        ' {1}'.format(node.name, e))
                self.run_nailgun_agent(
                    self.get_ssh_to_remote_by_name(node.name))
            return True
        return False
Beispiel #19
0
def check_mysql(remote, node_name):
    check_cmd = 'pkill -0 -x mysqld'
    check_crm_cmd = ('crm resource status clone_p_mysql |'
                     ' grep -q "is running on: $HOSTNAME"')
    try:
        wait(lambda: remote.execute(check_cmd)['exit_code'] == 0,
             timeout=300)
        logger.info('MySQL daemon is started on {0}'.format(node_name))
    except TimeoutError:
        logger.error('MySQL daemon is down on {0}'.format(node_name))
        raise
    _wait(lambda: assert_equal(remote.execute(check_crm_cmd)['exit_code'], 0,
                               'MySQL resource is NOT running on {0}'.format(
                                   node_name)), timeout=60)
Beispiel #20
0
    def rollback_automatically_ha(self):
        """Rollback manually ha deployed cluster

        Scenario:
            1. Revert snapshot with Neutron GRE HA 6.1 env
            2. Add raise exception to openstack.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Add 1 cinder node and re-deploy cluster
            9. Run OSTF

        """
        # TODO(ddmitriev): change snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre_ha'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre_ha")
        cluster_id = self.fuel_web.get_last_created_cluster()

        self.env.admin_actions.upgrade_master_node(rollback=True)

        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:5])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[5:6])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-06': ['cinder']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.make_snapshot("rollback_automatically_ha")
Beispiel #21
0
    def rollback_automatically_ha(self):
        """Rollback manually ha deployed cluster

        Scenario:
            1. Revert snapshot with Neutron GRE HA 6.1 env
            2. Add raise exception to openstack.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Add 1 cinder node and re-deploy cluster
            9. Run OSTF

        """
        # TODO(ddmitriev): change snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre_ha'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre_ha")
        cluster_id = self.fuel_web.get_last_created_cluster()

        self.env.admin_actions.upgrade_master_node(rollback=True)

        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:5])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[5:6])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-06': ['cinder']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.make_snapshot("rollback_automatically_ha")
Beispiel #22
0
def check_mysql(remote, node_name):
    if OPENSTACK_RELEASE_UBUNTU in OPENSTACK_RELEASE:
        mysql_pidfile = '/var/run/mysqld/mysqld.pid'
    else:
        mysql_pidfile = '/var/run/mysql/mysqld.pid'
    check_cmd = '[ -r {0} ] && pkill -0 -F {0}'.format(mysql_pidfile)
    check_crm_cmd = ('crm resource status clone_p_mysql |'
                     ' grep -q "is running on: $HOSTNAME"')
    try:
        wait(lambda: remote.execute(check_cmd)['exit_code'] == 0, timeout=300)
        logger.info('MySQL daemon is started on {0}'.format(node_name))
    except TimeoutError:
        logger.error('MySQL daemon is down on {0}'.format(node_name))
        raise
    _wait(lambda: assert_equal(
        remote.execute(check_crm_cmd)['exit_code'], 0,
        'MySQL resource is NOT running on {0}'.format(node_name)),
          timeout=60)
Beispiel #23
0
def check_mysql(remote, node_name):
    if OPENSTACK_RELEASE_UBUNTU in OPENSTACK_RELEASE:
        mysql_pidfile = '/var/run/mysqld/mysqld.pid'
    else:
        mysql_pidfile = '/var/run/mysql/mysqld.pid'
    check_cmd = '[ -r {0} ] && pkill -0 -F {0}'.format(mysql_pidfile)
    check_crm_cmd = ('crm resource status clone_p_mysql |'
                     ' grep -q "is running on: $HOSTNAME"')
    try:
        wait(lambda: remote.execute(check_cmd)['exit_code'] == 0,
             timeout=300)
        logger.info('MySQL daemon is started on {0}'.format(node_name))
    except TimeoutError:
        logger.error('MySQL daemon is down on {0}'.format(node_name))
        raise
    _wait(lambda: assert_equal(remote.execute(check_crm_cmd)['exit_code'], 0,
                               'MySQL resource is NOT running on {0}'.format(
                                   node_name)), timeout=60)
Beispiel #24
0
    def rollback_automatically_ha_one_controller_env(self):
        """Rollback automatically ha one controller deployed cluster

        Scenario:
            1. Revert snapshot with deploy neutron gre env
            2. Add raise exception to docker_engine.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Add 1 ceph node and re-deploy cluster
            9. Run OSTF

        """
        if not self.env.d_env.has_snapshot('ceph_multinode_compact'):
            raise SkipTest()

        self.env.revert_snapshot("ceph_multinode_compact")
        cluster_id = self.fuel_web.get_last_created_cluster()

        _ip = self.fuel_web.get_nailgun_node_by_name('slave-01')['ip']
        remote = self.env.d_env.get_ssh_to_remote(_ip)
        expected_kernel = UpgradeFuelMaster.get_slave_kernel(remote)

        checkers.upload_tarball(self.env.d_env.get_admin_remote(),
                                hlp_data.TARBALL_PATH, '/var')
        checkers.check_tarball_exists(self.env.d_env.get_admin_remote(),
                                      os.path.basename(hlp_data.
                                                       TARBALL_PATH),
                                      '/var')
        checkers.untar(self.env.d_env.get_admin_remote(),
                       os.path.basename(hlp_data.
                                        TARBALL_PATH), '/var')
        # we expect 255 exit code here because upgrade failed
        # and exit status is 255
        checkers.run_script(self.env.d_env.get_admin_remote(),
                            '/var',
                            'upgrade.sh',
                            password=hlp_data.KEYSTONE_CREDS['password'],
                            rollback=True, exit_code=255)
        checkers.wait_rollback_is_done(self.env.d_env.get_admin_remote(), 3000)
        checkers.check_upgraded_containers(self.env.d_env.get_admin_remote(),
                                           hlp_data.UPGRADE_FUEL_TO,
                                           hlp_data.UPGRADE_FUEL_FROM)
        logger.debug("all containers are ok")
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id)
        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[3:4])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-04': ['ceph-osd']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        if hlp_data.OPENSTACK_RELEASE_UBUNTU in hlp_data.OPENSTACK_RELEASE:
            _ip = self.fuel_web.get_nailgun_node_by_name('slave-04')['ip']
            remote = self.env.d_env.get_ssh_to_remote(_ip)
            kernel = UpgradeFuelMaster.get_slave_kernel(remote)
            checkers.check_kernel(kernel, expected_kernel)
        self.fuel_web.run_ostf(cluster_id=cluster_id)

        self.env.make_snapshot("rollback_automatic_ha_one_controller")
Beispiel #25
0
    def ha_pacemaker_restart_heat_engine(self):
        """Verify heat engine service is restarted
         by pacemaker on amqp connection loss

        Scenario:
            1. SSH to any controller
            2. Check heat-engine status
            3. Block heat-engine amqp connections
            4. Check heat-engine was stopped on current controller
            5. Unblock heat-engine amqp connections
            6. Check heat-engine process is running with new pid
            7. Check amqp connection re-appears for heat-engine

        Snapshot ha_pacemaker_restart_heat_engine

        """
        self.env.revert_snapshot("deploy_ha")
        ocf_success = "DEBUG: OpenStack Orchestration Engine" \
                      " (heat-engine) monitor succeeded"
        ocf_error = "ERROR: OpenStack Heat Engine is not connected to the" \
                    " AMQP server: AMQP connection test returned 1"

        heat_name = 'heat-engine'

        ocf_status = \
            'script -q -c "OCF_ROOT=/usr/lib/ocf' \
            ' /usr/lib/ocf/resource.d/mirantis/{0}' \
            ' monitor 2>&1"'.format(heat_name)

        remote = self.fuel_web.get_ssh_for_node(
            self.env.nodes().slaves[0].name)
        pid = ''.join(remote.execute('pgrep heat-engine')['stdout'])
        get_ocf_status = ''.join(
            remote.execute(ocf_status)['stdout']).rstrip()
        assert_true(ocf_success in get_ocf_status,
                    "heat engine is not succeeded, status is {0}".format(
                        get_ocf_status))
        assert_true(len(remote.execute(
            "netstat -nap | grep {0} | grep :5673".
            format(pid))['stdout']) > 0, 'There is no amqp connections')
        remote.execute("iptables -I OUTPUT 1 -m owner --uid-owner heat -m"
                       " state --state NEW,ESTABLISHED,RELATED -j DROP")

        wait(lambda: len(remote.execute
            ("netstat -nap | grep {0} | grep :5673".
             format(pid))['stdout']) == 0, timeout=300)

        get_ocf_status = ''.join(
            remote.execute(ocf_status)['stdout']).rstrip()
        logger.info('ocf status after blocking is {0}'.format(
            get_ocf_status))
        assert_true(ocf_error in get_ocf_status,
                    "heat engine is running, status is {0}".format(
                        get_ocf_status))

        remote.execute("iptables -D OUTPUT 1 -m owner --uid-owner heat -m"
                       " state --state NEW,ESTABLISHED,RELATED")
        _wait(lambda: assert_true(ocf_success in ''.join(
            remote.execute(ocf_status)['stdout']).rstrip()), timeout=240)
        newpid = ''.join(remote.execute('pgrep heat-engine')['stdout'])
        assert_true(pid != newpid, "heat pid is still the same")
        get_ocf_status = ''.join(remote.execute(
            ocf_status)['stdout']).rstrip()
        assert_true(ocf_success in get_ocf_status,
                    "heat engine is not succeeded, status is {0}".format(
                        get_ocf_status))
        assert_true(len(
            remote.execute("netstat -nap | grep {0} | grep :5673".format(
                newpid))['stdout']) > 0)
        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.run_ostf(cluster_id=cluster_id)
Beispiel #26
0
    def ha_corosync_stability_check(self):
        @logwrap
        def _get_pcm_nodes(remote, pure=False):
            nodes = {}
            pcs_status = remote.execute('pcs status nodes')['stdout']
            pcm_nodes = yaml.load(''.join(pcs_status).strip())
            for status in ('Online', 'Offline', 'Standby'):
                list_nodes = (pcm_nodes['Pacemaker Nodes'][status]
                              or '').split()
                if not pure:
                    nodes[status] = [
                        self.fuel_web.get_fqdn_by_hostname(x)
                        for x in list_nodes
                    ]
                else:
                    nodes[status] = list_nodes
            return nodes

        def _check_all_pcs_nodes_status(ctrl_remotes, pcs_nodes_online,
                                        status):
            for remote in ctrl_remotes:
                pcs_nodes = _get_pcm_nodes(remote)
                logger.debug(
                    "Status of pacemaker nodes on node {0}: {1}".format(
                        node['name'], pcs_nodes))
                if set(pcs_nodes_online) != set(pcs_nodes[status]):
                    return False
            return True

        if not self.env.d_env.has_snapshot(self.snapshot_name):
            raise SkipTest()
        self.env.revert_snapshot(self.snapshot_name)
        devops_name = self.env.d_env.nodes().slaves[0].name
        controller_node = self.fuel_web.get_nailgun_node_by_name(devops_name)
        with self.fuel_web.get_ssh_for_node(devops_name) as remote_controller:
            pcs_nodes = self.fuel_web.get_pcm_nodes(devops_name)
            assert_true(
                not pcs_nodes['Offline'],
                "There are offline nodes: {0}".format(pcs_nodes['Offline']))
            pcs_nodes_online = pcs_nodes['Online']
            cluster_id = self.fuel_web.get_last_created_cluster()
            ctrl_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
                cluster_id, ['controller'])
            alive_corosync_nodes = [
                node for node in ctrl_nodes
                if node['mac'] != controller_node['mac']
            ]
            ctrl_remotes = [
                self.env.d_env.get_ssh_to_remote(node['ip'])
                for node in ctrl_nodes
            ]
            live_remotes = [
                self.env.d_env.get_ssh_to_remote(node['ip'])
                for node in alive_corosync_nodes
            ]
            for count in xrange(500):
                logger.debug('Checking splitbrain in the loop, '
                             'count number: {0}'.format(count))
                _wait(lambda: assert_equal(
                    remote_controller.execute('killall -TERM corosync')
                    ['exit_code'], 0, 'Corosync was not killed on controller, '
                    'see debug log, count-{0}'.format(count)),
                      timeout=20)
                _wait(lambda: assert_true(
                    _check_all_pcs_nodes_status(live_remotes, [
                        controller_node['fqdn']
                    ], 'Offline'), 'Caught splitbrain, see debug log, '
                    'count-{0}'.format(count)),
                      timeout=20)
                _wait(lambda: assert_equal(
                    remote_controller.execute(
                        'service corosync start && service pacemaker '
                        'restart')['exit_code'], 0,
                    'Corosync was not started, see debug log,'
                    ' count-{0}'.format(count)),
                      timeout=20)
                _wait(lambda: assert_true(
                    _check_all_pcs_nodes_status(ctrl_remotes, pcs_nodes_online,
                                                'Online'),
                    'Corosync was not started on controller, see debug '
                    'log, count: {0}'.format(count)),
                      timeout=20)
            for remote in ctrl_remotes:
                remote.clear()
            for remote in live_remotes:
                remote.clear()
Beispiel #27
0
    def fuel_migration(self):
        """Fuel master migration to VM

        Scenario:

            1. Create cluster
            2. Run OSTF tests
            3. Run Network check
            4. Migrate fuel-master to VM
            5. Run OSTF tests
            6. Run Network check
            7. Check statuses for master services

        Duration 210m
        """
        self.env.revert_snapshot("ready_with_3_slaves")
        data = {
            'net_provider': 'neutron',
            'net_segment_type': settings.NEUTRON_SEGMENT_TYPE
        }

        cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            mode=settings.DEPLOYMENT_MODE_HA,
            settings=data)

        self.fuel_web.update_nodes(cluster_id, {
            'slave-01': ['controller'],
            'slave-02': ['compute']
        })

        # Check network
        self.fuel_web.verify_network(cluster_id)

        # Cluster deploy
        self.fuel_web.deploy_cluster_wait(cluster_id)

        # Check network
        self.fuel_web.verify_network(cluster_id)

        # Fuel migration
        remote = self.env.d_env.get_admin_remote()
        logger.info('Fuel migration on compute slave-02')

        result = remote.execute(
            'fuel-migrate ' +
            self.fuel_web.get_nailgun_node_by_name('slave-02')['ip'] +
            ' >/dev/null &')
        assert_equal(
            result['exit_code'], 0,
            'Failed to execute "{0}" on remote host: {1}'.format(
                'fuel-migrate' + self.env.d_env.nodes().slaves[0].name,
                result))
        checkers.wait_phrase_in_log(remote,
                                    60 * 60,
                                    interval=0.2,
                                    phrase='Rebooting to begin '
                                    'the data sync process',
                                    log_path='/var/log/fuel-migrate.log')
        remote.clear()
        logger.info('Rebooting to begin the data sync process for fuel '
                    'migrate')

        wait(lambda: not icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15,
             timeout_msg='Master node has not become offline '
             'after rebooting')
        wait(lambda: icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15,
             timeout_msg='Master node has not become online '
             'after rebooting')
        self.env.d_env.nodes().admin. await (network_name=self.d_env.admin_net,
                                             timeout=60 * 15)
        with self.env.d_env.get_admin_remote() as remote:
            checkers.wait_phrase_in_log(remote,
                                        60 * 90,
                                        interval=0.1,
                                        phrase='Stop network and up with '
                                        'new settings',
                                        log_path='/var/log/fuel-migrate.log')
        logger.info('Shutting down network')

        wait(lambda: not icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15,
             interval=0.1,
             timeout_msg='Master node has not become offline shutting network')
        wait(lambda: icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15,
             timeout_msg='Master node has not become online shutting network')

        self.env.d_env.nodes().admin. await (network_name=self.d_env.admin_net,
                                             timeout=60 * 10)

        logger.info("Check containers")
        self.env.docker_actions.wait_for_ready_containers(timeout=60 * 30)

        logger.info("Check services")
        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.assert_ha_services_ready(cluster_id)
        self.fuel_web.assert_os_services_ready(cluster_id)

        # Check network
        self.fuel_web.verify_network(cluster_id)

        # Run ostf
        _wait(lambda: self.fuel_web.run_ostf(cluster_id,
                                             test_sets=['smoke', 'sanity']),
              timeout=1500)
        logger.debug("OSTF tests are pass now")
Beispiel #28
0
    def upgrade_fuel_after_rollback(self):
        """Upgrade Fuel after rollback and deploy new cluster

        Scenario:
            1. Revert deploy_neutron_gre snapshot with 6.1 env
            2. Upgrade with rollback
            3. Run OSTF
            4. Run network verification
            5. Upgrade fuel master
            6. Check upgrading was successful
            7. Deploy 6.1 cluster with 3 nodes and neutron vlan
            8. Run OSTF for new cluster
            9. Run network verification
        """
        # TODO(ddmitriev): change snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre")

        available_releases_before = self.fuel_web.get_releases_list_for_os(
            release_name=hlp_data.OPENSTACK_RELEASE)

        cluster_id = self.fuel_web.get_last_created_cluster()

        self.env.admin_actions.upgrade_master_node(rollback=True)
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.admin_actions.upgrade_master_node(file_upload=False)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_TO)
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nailgun_upgrade_migration()

        # Deploy new cluster
        available_releases_after = self.fuel_web.get_releases_list_for_os(
            release_name=hlp_data.OPENSTACK_RELEASE)
        added_release = [id for id in available_releases_after
                         if id not in available_releases_before]

        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[3:6])

        new_cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            release_id=added_release[0],
            mode=hlp_data.DEPLOYMENT_MODE,
            settings={
                'net_provider': 'neutron',
                'net_segment_type': hlp_data.NEUTRON_SEGMENT['vlan']
            }
        )
        self.fuel_web.update_nodes(
            new_cluster_id, {
                'slave-04': ['controller'],
                'slave-05': ['compute'],
                'slave-06': ['cinder']
            }
        )
        self.fuel_web.run_network_verify(new_cluster_id)
        self.fuel_web.deploy_cluster_wait(new_cluster_id)
        self.fuel_web.run_ostf(new_cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
        self.fuel_web.run_network_verify(new_cluster_id)

        self.env.make_snapshot("upgrade_fuel_after_rollback")
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        n_ctrls = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            cluster_id, ['controller'])
        d_ctrls = self.fuel_web.get_devops_nodes_by_nailgun_nodes(n_ctrls)

        for devops_node in d_ctrls:
            _ip = self.fuel_web.get_nailgun_node_by_name(
                devops_node.name)['ip']
            asserts.assert_true('True' in checkers.check_available_mode(_ip),
                                "Maintenance mode is not available")
            logger.info('Change UMM.CONF on node {0}'.format(devops_node.name))

            command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n"
                        "COUNTER_RESET_TIME=10' > /etc/umm.conf")
            self.ssh_manager.execute_on_remote(ip=_ip, cmd=command1)

            self.ssh_manager.execute_on_remote(ip=_ip, cmd="umm disable")

            asserts.assert_false('True' in checkers.check_available_mode(_ip),
                                 "Maintenance mode should not be available")

            command2 = 'reboot --force >/dev/null & '

            logger.info('Unexpected reboot on node {0}'.format(
                devops_node.name))

            self.ssh_manager.execute_on_remote(ip=_ip, cmd=command2)

            wait(lambda: not checkers.check_ping(self.env.get_admin_node_ip(),
                                                 _ip),
                 timeout=60 * 10)

            # Node don't have enough time for set offline status
            # after reboot --force
            # Just waiting

            asserts.assert_true(
                checkers.check_ping(self.env.get_admin_node_ip(),
                                    _ip,
                                    deadline=600),
                "Host {0} is not reachable by ping during 600 sec".format(_ip))
            logger.info('Wait a {0} node online status after unexpected '
                        'reboot'.format(devops_node.name))

            self.fuel_web.wait_nodes_get_online_state([devops_node])

            logger.info('Check that {0} node not in maintenance mode after'
                        ' unexpected reboot'.format(devops_node.name))

            asserts.assert_false('True' in checkers.check_auto_mode(_ip),
                                 "Maintenance mode should not switched")

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up([n.name for n in d_ctrls])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up([n.name for n in d_ctrls])

            _wait(lambda: self.fuel_web.run_single_ostf_test(
                cluster_id,
                test_sets=['sanity'],
                test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                    'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda: self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
Beispiel #30
0
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        for nailgun_node in self.env.d_env.nodes().slaves[0:3]:
            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_available_mode(remote),
                            "Maintenance mode is not available")

                logger.info('Change UMM.CONF on node %s', nailgun_node.name)
                command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n"
                            "COUNTER_RESET_TIME=10' > /etc/umm.conf")

                result = remote.execute(command1)
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        command1, result))

                result = remote.execute('umm disable')
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        'umm disable', result))

                assert_false('True' in check_available_mode(remote),
                             "Maintenance mode should not be available")

                logger.info('Unexpected reboot on node %s', nailgun_node.name)
                command2 = ('reboot --force >/dev/null & ')
                result = remote.execute(command2)
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        command2, result))

            # Node don't have enough time for set offline status
            # after reboot --force
            # Just waiting

            _ip = self.fuel_web.get_nailgun_node_by_name(
                nailgun_node.name)['ip']
            _wait(lambda: _tcp_ping(_ip, 22), timeout=120)

            logger.info(
                'Wait a %s node online status after unexpected '
                'reboot', nailgun_node.name)
            self.fuel_web.wait_nodes_get_online_state([nailgun_node])

            logger.info(
                'Check that %s node not in maintenance mode after'
                ' unexpected reboot', nailgun_node.name)

            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_false('True' in check_auto_mode(remote),
                             "Maintenance mode should not switched")

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            _wait(lambda: self.fuel_web.run_single_ostf_test(
                cluster_id,
                test_sets=['sanity'],
                test_name=map_ostf.OSTF_TEST_MAPPING.get(
                    'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda: self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
    def manual_cic_maintenance_mode(self):
        """Check manual maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Switch in maintenance mode
            3. Wait until controller is rebooting
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot("cic_maintenance_mode")

        cluster_id = self.fuel_web.get_last_created_cluster()

        n_ctrls = self.fuel_web.get_nailgun_cluster_nodes_by_roles(cluster_id, ["controller"])
        d_ctrls = self.fuel_web.get_devops_nodes_by_nailgun_nodes(n_ctrls)

        for devops_node in d_ctrls:
            with self.fuel_web.get_ssh_for_node(devops_node.name) as remote:
                assert_true("True" in check_available_mode(remote), "Maintenance mode is not available")

                logger.info("Maintenance mode for node %s", devops_node.name)
                result = remote.execute("umm on")
                assert_equal(
                    result["exit_code"], 0, 'Failed to execute "{0}" on remote host: {1}'.format("umm on", result)
                )
            logger.info("Wait a %s node offline status after switching " "maintenance mode ", devops_node.name)
            try:
                wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(devops_node)["online"], timeout=60 * 10)
            except TimeoutError:
                assert_false(
                    self.fuel_web.get_nailgun_node_by_devops_node(devops_node)["online"],
                    "Node {0} has not become offline after" "switching maintenance mode".format(devops_node.name),
                )

            logger.info("Check that %s node in maintenance mode after " "switching", devops_node.name)

            _ip = self.fuel_web.get_nailgun_node_by_name(devops_node.name)["ip"]
            wait(lambda: tcp_ping(_ip, 22), timeout=60 * 10)
            with self.fuel_web.get_ssh_for_node(devops_node.name) as remote:
                assert_true("True" in check_auto_mode(remote), "Maintenance mode is not switch")

                result = remote.execute("umm off")
                assert_equal(
                    result["exit_code"], 0, 'Failed to execute "{0}" on remote host: {1}'.format("umm off", result)
                )

            logger.info("Wait a %s node online status", devops_node.name)
            try:
                wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(devops_node)["online"], timeout=60 * 10)
            except TimeoutError:
                assert_true(
                    self.fuel_web.get_nailgun_node_by_devops_node(devops_node)["online"],
                    "Node {0} has not become online after " "exiting maintenance mode".format(devops_node.name),
                )

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up([n.name for n in d_ctrls])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up([n.name for n in d_ctrls])

            _wait(
                lambda: self.fuel_web.run_single_ostf_test(
                    cluster_id,
                    test_sets=["sanity"],
                    test_name=map_ostf.OSTF_TEST_MAPPING.get("Check that required services are running"),
                ),
                timeout=1500,
            )
            logger.debug("Required services are running")

            _wait(lambda: self.fuel_web.run_ostf(cluster_id, test_sets=["ha"]), timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id, test_sets=["smoke", "sanity"])
            except AssertionError:
                logger.debug(
                    "Test failed from first probe,"
                    " we sleep 600 second try one more time"
                    " and if it fails again - test will fails "
                )
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id, test_sets=["smoke", "sanity"])
Beispiel #32
0
 def wait_for_provisioning(self):
     _wait(lambda: _tcp_ping(
         self.nodes().admin.get_ip_address_by_network_name(self.admin_net),
         22),
           timeout=5 * 60)
Beispiel #33
0
def await_node_deploy(ip, name):
    client = CobblerClient(ip)
    token = client.login('cobbler', 'cobbler')
    _wait(lambda: check_node_ready(client, token, name), timeout=30 * 60)
Beispiel #34
0
    def ha_corosync_stability_check(self):

        @logwrap
        def _get_pcm_nodes(remote, pure=False):
            nodes = {}
            pcs_status = remote.execute('pcs status nodes')['stdout']
            pcm_nodes = yaml.load(''.join(pcs_status).strip())
            for status in ('Online', 'Offline', 'Standby'):
                list_nodes = (pcm_nodes['Pacemaker Nodes']
                              [status] or '').split()
                if not pure:
                    nodes[status] = [self.fuel_web.get_fqdn_by_hostname(x)
                                     for x in list_nodes]
                else:
                    nodes[status] = list_nodes
            return nodes

        def _check_all_pcs_nodes_status(ctrl_remotes, pcs_nodes_online,
                                        status):
            for remote in ctrl_remotes:
                pcs_nodes = _get_pcm_nodes(remote)
                logger.debug(
                    "Status of pacemaker nodes on node {0}: {1}".
                    format(node['name'], pcs_nodes))
                if set(pcs_nodes_online) != set(pcs_nodes[status]):
                    return False
            return True

        if not self.env.d_env.has_snapshot(self.snapshot_name):
            raise SkipTest()
        self.env.revert_snapshot(self.snapshot_name)
        devops_name = self.env.d_env.nodes().slaves[0].name
        controller_node = self.fuel_web.get_nailgun_node_by_name(devops_name)
        with self.fuel_web.get_ssh_for_node(
                devops_name) as remote_controller:
            pcs_nodes = self.fuel_web.get_pcm_nodes(devops_name)
            assert_true(
                not pcs_nodes['Offline'], "There are offline nodes: {0}".
                format(pcs_nodes['Offline']))
            pcs_nodes_online = pcs_nodes['Online']
            cluster_id = self.fuel_web.get_last_created_cluster()
            ctrl_nodes = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
                cluster_id, ['controller'])
            alive_corosync_nodes = [node for node in ctrl_nodes
                                    if node['mac'] != controller_node['mac']]
            ctrl_remotes = [self.env.d_env.get_ssh_to_remote(node['ip'])
                            for node in ctrl_nodes]
            live_remotes = [self.env.d_env.get_ssh_to_remote(node['ip'])
                            for node in alive_corosync_nodes]
            for count in xrange(500):
                logger.debug('Checking splitbrain in the loop, '
                             'count number: {0}'.format(count))
                _wait(
                    lambda: assert_equal(
                        remote_controller.execute(
                            'killall -TERM corosync')['exit_code'], 0,
                        'Corosync was not killed on controller, '
                        'see debug log, count-{0}'.format(count)), timeout=20)
                _wait(
                    lambda: assert_true(
                        _check_all_pcs_nodes_status(
                            live_remotes, [controller_node['fqdn']],
                            'Offline'),
                        'Caught splitbrain, see debug log, '
                        'count-{0}'.format(count)), timeout=20)
                _wait(
                    lambda: assert_equal(
                        remote_controller.execute(
                            'service corosync start && service pacemaker '
                            'restart')['exit_code'], 0,
                        'Corosync was not started, see debug log,'
                        ' count-{0}'.format(count)), timeout=20)
                _wait(
                    lambda: assert_true(
                        _check_all_pcs_nodes_status(
                            ctrl_remotes, pcs_nodes_online, 'Online'),
                        'Corosync was not started on controller, see debug '
                        'log, count: {0}'.format(count)), timeout=20)
            for remote in ctrl_remotes:
                remote.clear()
            for remote in live_remotes:
                remote.clear()
Beispiel #35
0
    def upgrade_fuel_after_rollback(self):
        """Upgrade Fuel after rollback and deploy new cluster

        Scenario:
            1. Revert deploy_neutron_gre snapshot with 6.1 env
            2. Upgrade with rollback
            3. Run OSTF
            4. Run network verification
            5. Upgrade fuel master
            6. Check upgrading was successful
            7. Deploy 6.1 cluster with 3 nodes and neutron vlan
            8. Run OSTF for new cluster
            9. Run network verification
        """
        # TODO(ddmitriev): change snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre")

        available_releases_before = self.fuel_web.get_releases_list_for_os(
            release_name=hlp_data.OPENSTACK_RELEASE)

        cluster_id = self.fuel_web.get_last_created_cluster()

        self.env.admin_actions.upgrade_master_node(rollback=True)
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]),
              timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id, test_sets=['ha', 'smoke', 'sanity'])

        self.env.admin_actions.upgrade_master_node(file_upload=False)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_TO)
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nailgun_upgrade_migration()

        # Deploy new cluster
        available_releases_after = self.fuel_web.get_releases_list_for_os(
            release_name=hlp_data.OPENSTACK_RELEASE)
        added_release = [
            release_id for release_id in available_releases_after
            if release_id not in available_releases_before
        ]

        self.env.bootstrap_nodes(self.env.d_env.nodes().slaves[3:6])

        new_cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            release_id=added_release[0],
            mode=hlp_data.DEPLOYMENT_MODE,
            settings={
                'net_provider': 'neutron',
                'net_segment_type': hlp_data.NEUTRON_SEGMENT['vlan']
            })
        self.fuel_web.update_nodes(
            new_cluster_id, {
                'slave-04': ['controller'],
                'slave-05': ['compute'],
                'slave-06': ['cinder']
            })
        self.fuel_web.run_network_verify(new_cluster_id)
        self.fuel_web.deploy_cluster_wait(new_cluster_id)
        self.fuel_web.run_ostf(new_cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
        self.fuel_web.run_network_verify(new_cluster_id)

        self.env.make_snapshot("upgrade_fuel_after_rollback")
Beispiel #36
0
    def upgrade_fuel_after_rollback(self):
        """Upgrade Fuel after rollback and deploy new cluster

        Scenario:
            1. Revert deploy_neutron_gre snapshot with 6.1 env
            2. Upgrade with rollback
            3. Run OSTF
            4. Run network verification
            5. Upgrade fuel master
            6. Check upgrading was successful
            7. Deploy 6.1 cluster with 3 nodes and neutron vlan
            8. Run OSTF for new cluster
            9. Run network verification
        """
        #(ddmitriev)TODO: change the snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre")

        available_releases_before = self.fuel_web.get_releases_list_for_os(
            release_name=hlp_data.OPENSTACK_RELEASE)

        remote = self.env.d_env.get_admin_remote

        cluster_id = self.fuel_web.get_last_created_cluster()
        checkers.upload_tarball(remote(), hlp_data.TARBALL_PATH, '/var')
        checkers.check_file_exists(remote(),
                                   os.path.join('/var',
                                                os.path.basename(
                                                    hlp_data.TARBALL_PATH)))
        checkers.untar(remote(), os.path.basename(hlp_data.TARBALL_PATH),
                       '/var')

        # Upgrade with rollback
        keystone_pass = hlp_data.KEYSTONE_CREDS['password']
        checkers.run_script(remote(), '/var', 'upgrade.sh',
                            password=keystone_pass, rollback=True,
                            exit_code=255)
        checkers.wait_rollback_is_done(remote(), 3000)
        checkers.check_upgraded_containers(remote(), hlp_data.UPGRADE_FUEL_TO,
                                           hlp_data.UPGRADE_FUEL_FROM)
        logger.debug("all containers are ok")
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        # Upgrade fuel master
        checkers.run_script(remote(), '/var', 'upgrade.sh',
                            password=keystone_pass)
        checkers.wait_upgrade_is_done(remote(), 3000,
                                      phrase='*** UPGRADING MASTER NODE'
                                             ' DONE SUCCESSFULLY')
        checkers.check_upgraded_containers(remote(),
                                           hlp_data.UPGRADE_FUEL_FROM,
                                           hlp_data.UPGRADE_FUEL_TO)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_TO)
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nailgun_upgrade_migration()

        # Deploy new cluster
        available_releases_after = self.fuel_web.get_releases_list_for_os(
            release_name=hlp_data.OPENSTACK_RELEASE)
        added_release = [id for id in available_releases_after
                         if id not in available_releases_before]

        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[3:6])

        new_cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            release_id=added_release[0],
            mode=hlp_data.DEPLOYMENT_MODE,
            settings={
                'net_provider': 'neutron',
                'net_segment_type': hlp_data.NEUTRON_SEGMENT['vlan']
            }
        )
        self.fuel_web.update_nodes(
            new_cluster_id, {
                'slave-04': ['controller'],
                'slave-05': ['compute'],
                'slave-06': ['cinder']
            }
        )
        self.fuel_web.run_network_verify(new_cluster_id)
        self.fuel_web.deploy_cluster_wait(new_cluster_id)
        self.fuel_web.run_ostf(new_cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])
        self.fuel_web.run_network_verify(new_cluster_id)

        self.env.make_snapshot("upgrade_fuel_after_rollback")
Beispiel #37
0
 def wait_for_provisioning(self,
                           timeout=settings.WAIT_FOR_PROVISIONING_TIMEOUT):
     _wait(lambda: _tcp_ping(
         self.d_env.nodes().admin.get_ip_address_by_network_name(
             self.d_env.admin_net), 22),
           timeout=timeout)
Beispiel #38
0
    def create_component_and_env_configdb(self):
        """ Install and check ConfigDB

        Scenario:
            1. Revert snapshot empty
            2. Install configDB extension
            3. Create components
            4. Create environment with component
            5. Get and check created data
            6. Make snapshot

        Duration: 5 min
        Snapshot: create_component_and_env_configdb
        """

        self.check_run('create_component_and_env_configdb')
        self.show_step(1)
        self.env.revert_snapshot('empty')
        self.show_step(2)
        install_configdb(master_node_ip=self.ssh_manager.admin_ip)

        logger.debug('Waiting for ConfigDB')
        _wait(lambda: self.fuel_web.client.get_components(), timeout=45)

        logger.debug('Get env and component data')
        components = self.fuel_web.client.get_components()
        envs = self.fuel_web.client.get_environments()

        assert_false(components,
                     "Components is not empty after tuningbox installation")
        assert_false(envs,
                     "Environments is not empty after tuningbox installation")

        # Uploaded data
        component = {
            "name":
            "comp1",
            "resource_definitions": [{
                "name": self.RESOURCE_NAME_1,
                "content": {}
            }, {
                "name": self.SLASHED_RESOURCE,
                "content": {}
            }]
        }

        environment = {
            "name": "env1",
            "components": ["comp1"],
            "hierarchy_levels": ["nodes"]
        }
        self.show_step(3)
        self.fuel_web.client.create_component(component)
        self.show_step(4)
        self.fuel_web.client.create_environment(environment)
        self.show_step(5)
        comp = self.fuel_web.client.get_components(comp_id=1)
        env = self.fuel_web.client.get_environments(env_id=1)

        expected_comp = {
            'resource_definitions': [{
                'content': {},
                'component_id': 1,
                'id': 1,
                'name': self.RESOURCE_NAME_1
            }, {
                'content': {},
                'component_id': 1,
                'id': 2,
                'name': self.SLASHED_RESOURCE
            }],
            'id':
            1,
            'name':
            "comp1"
        }
        expected_env = {
            'hierarchy_levels': ["nodes"],
            'id': 1,
            'components': [1]
        }
        logger.debug('Compare original component with '
                     'received component from API')
        assert_equal(comp, expected_comp)
        logger.debug('Compare original env with received env from API')
        assert_equal(env, expected_env)
        self.show_step(6)
        self.env.make_snapshot('create_component_and_env_configdb',
                               is_make=True)
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot("cic_maintenance_mode")

        cluster_id = self.fuel_web.get_last_created_cluster()

        n_ctrls = self.fuel_web.get_nailgun_cluster_nodes_by_roles(cluster_id, ["controller"])
        d_ctrls = self.fuel_web.get_devops_nodes_by_nailgun_nodes(n_ctrls)

        for devops_node in d_ctrls:
            _ip = self.fuel_web.get_nailgun_node_by_name(devops_node.name)["ip"]
            with self.fuel_web.get_ssh_for_node(devops_node.name) as remote:
                assert_true("True" in check_available_mode(remote), "Maintenance mode is not available")

                logger.info("Change UMM.CONF on node %s", devops_node.name)
                command1 = "echo -e 'UMM=yes\nREBOOT_COUNT=0\n" "COUNTER_RESET_TIME=10' > /etc/umm.conf"

                result = remote.execute(command1)
                assert_equal(
                    result["exit_code"], 0, 'Failed to execute "{0}" on remote host: {1}'.format(command1, result)
                )

                result = remote.execute("umm disable")
                assert_equal(
                    result["exit_code"], 0, 'Failed to execute "{0}" on remote host: {1}'.format("umm disable", result)
                )

                assert_false("True" in check_available_mode(remote), "Maintenance mode should not be available")

                logger.info("Unexpected reboot on node %s", devops_node.name)
                command2 = "reboot --force >/dev/null & "
                result = remote.execute(command2)
                assert_equal(
                    result["exit_code"], 0, 'Failed to execute "{0}" on remote host: {1}'.format(command2, result)
                )
                wait(lambda: not tcp_ping(_ip, 22), timeout=60 * 10)

            # Node don't have enough time for set offline status
            # after reboot --force
            # Just waiting

            wait(lambda: tcp_ping(_ip, 22), timeout=60 * 10)

            logger.info("Wait a %s node online status after unexpected " "reboot", devops_node.name)
            self.fuel_web.wait_nodes_get_online_state([devops_node])

            logger.info("Check that %s node not in maintenance mode after" " unexpected reboot", devops_node.name)

            with self.fuel_web.get_ssh_for_node(devops_node.name) as remote:
                assert_false("True" in check_auto_mode(remote), "Maintenance mode should not switched")

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up([n.name for n in d_ctrls])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up([n.name for n in d_ctrls])

            _wait(
                lambda: self.fuel_web.run_single_ostf_test(
                    cluster_id,
                    test_sets=["sanity"],
                    test_name=map_ostf.OSTF_TEST_MAPPING.get("Check that required services are running"),
                ),
                timeout=1500,
            )
            logger.debug("Required services are running")

            _wait(lambda: self.fuel_web.run_ostf(cluster_id, test_sets=["ha"]), timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id, test_sets=["smoke", "sanity"])
            except AssertionError:
                logger.debug(
                    "Test failed from first probe,"
                    " we sleep 600 second try one more time"
                    " and if it fails again - test will fails "
                )
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id, test_sets=["smoke", "sanity"])
Beispiel #40
0
    def rollback_automatically_delete_node(self):
        """Rollback automatically ha one controller deployed cluster
           and delete node from cluster

        Scenario:
            1. Revert snapshot with deploy neutron gre env
            2. Add raise exception to docker_engine.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Delete 1 node and re-deploy cluster
            9. Run OSTF

        """
        if not self.env.d_env.has_snapshot('deploy_neutron_gre'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre")
        cluster_id = self.fuel_web.get_last_created_cluster()

        checkers.upload_tarball(self.env.d_env.get_admin_remote(),
                                hlp_data.TARBALL_PATH, '/var')
        checkers.check_tarball_exists(self.env.d_env.get_admin_remote(),
                                      os.path.basename(hlp_data.
                                                       TARBALL_PATH),
                                      '/var')
        checkers.untar(self.env.d_env.get_admin_remote(),
                       os.path.basename(hlp_data.
                                        TARBALL_PATH), '/var')
        # we expect 255 exit code here because upgrade failed
        # and exit status is 255
        checkers.run_script(self.env.d_env.get_admin_remote(),
                            '/var',
                            'upgrade.sh',
                            password=hlp_data.KEYSTONE_CREDS['password'],
                            rollback=True, exit_code=255)
        checkers.wait_rollback_is_done(self.env.d_env.get_admin_remote(), 3000)
        checkers.check_upgraded_containers(self.env.d_env.get_admin_remote(),
                                           hlp_data.UPGRADE_FUEL_TO,
                                           hlp_data.UPGRADE_FUEL_FROM)
        logger.debug("all containers are ok")
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id)
        nailgun_nodes = self.fuel_web.update_nodes(
            cluster_id, {'slave-03': ['compute', 'cinder']}, False, True)
        task = self.fuel_web.deploy_cluster(cluster_id)
        self.fuel_web.assert_task_success(task)
        nodes = filter(lambda x: x["pending_deletion"] is True, nailgun_nodes)
        try:
            wait(lambda: len(self.fuel_web.client.list_nodes()) == 3,
                 timeout=5 * 60)
        except TimeoutError:
            assert_true(len(self.fuel_web.client.list_nodes()) == 3,
                        'Node {0} is not discovered in timeout 10 *60'.format(
                            nodes[0]))
        self.fuel_web.run_ostf(cluster_id=cluster_id, should_fail=1)

        self.env.make_snapshot("rollback_automatically_delete_mode")
Beispiel #41
0
    def auto_cic_maintenance_mode(self):
        """Check auto maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Unexpected reboot
            3. Wait until controller is switching in maintenance mode
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        for nailgun_node in self.env.d_env.nodes().slaves[0:3]:
            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_available_mode(remote),
                            "Maintenance mode is not available")

                logger.info('Change UMM.CONF on node %s', nailgun_node.name)
                command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n"
                            "COUNTER_RESET_TIME=10' > /etc/umm.conf")

                result = remote.execute(command1)
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        command1, result))

                logger.info('Unexpected reboot on node %s', nailgun_node.name)
                command2 = ('reboot --force >/dev/null & ')
                result = remote.execute(command2)
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        command2, result))

            logger.info(
                'Wait a %s node offline status after unexpected '
                'reboot', nailgun_node.name)
            try:
                wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(
                    nailgun_node)['online'],
                     timeout=60 * 10)
            except TimeoutError:
                assert_false(
                    self.fuel_web.get_nailgun_node_by_devops_node(
                        nailgun_node)['online'],
                    'Node {0} has not become offline after unexpected'
                    'reboot'.format(nailgun_node.name))

            logger.info(
                'Check that %s node in maintenance mode after'
                ' unexpected reboot', nailgun_node.name)

            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_auto_mode(remote),
                            "Maintenance mode is not switch")

                result = remote.execute('umm off')
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        'umm off', result))
                # Wait umm stops
                time.sleep(30)
                command3 = ("echo -e 'UMM=yes\nREBOOT_COUNT=2\n"
                            "COUNTER_RESET_TIME=10' > /etc/umm.conf")
                result = remote.execute(command3)
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        command3, result))

            logger.info('Wait a %s node online status', nailgun_node.name)
            try:
                wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
                    nailgun_node)['online'],
                     timeout=90 * 10)
            except TimeoutError:
                assert_true(
                    self.fuel_web.get_nailgun_node_by_devops_node(
                        nailgun_node)['online'],
                    'Node {0} has not become online after umm off'.format(
                        nailgun_node.name))

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            _wait(lambda: self.fuel_web.run_single_ostf_test(
                cluster_id,
                test_sets=['sanity'],
                test_name=map_ostf.OSTF_TEST_MAPPING.get(
                    'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda: self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
Beispiel #42
0
        """Create SSH-connection to the network

        :rtype : SSHClient
        """
        return SSHClient(
            self.get_ip_address_by_network_name(network_name),
            username=login,
            password=password,
            private_keys=private_keys,
        )

    def send_keys(self, keys):
        self.driver.node_send_keys(self, keys)

    def await(self, network_name, timeout=120, by_port=22):
        _wait(lambda: _tcp_ping(self.get_ip_address_by_network_name(network_name), by_port), timeout=timeout)

    def define(self):
        self.driver.node_define(self)
        self.save()

    def start(self):
        self.create(verbose=False)

    def create(self, verbose=False):
        if verbose or not self.driver.node_active(self):
            self.driver.node_create(self)

    def destroy(self, verbose=False):
        if verbose or self.driver.node_active(self):
            self.driver.node_destroy(self)
Beispiel #43
0
    def manual_cic_maintenance_mode(self):
        """Check manual maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Switch in maintenance mode
            3. Wait until controller is rebooting
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        for nailgun_node in self.env.d_env.nodes().slaves[0:3]:
            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_available_mode(remote),
                            "Maintenance mode is not available")

                logger.info('Maintenance mode for node %s', nailgun_node.name)
                result = remote.execute('umm on')
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        'umm on', result))
            logger.info(
                'Wait a %s node offline status after switching '
                'maintenance mode ', nailgun_node.name)
            try:
                wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(
                    nailgun_node)['online'],
                     timeout=60 * 10)
            except TimeoutError:
                assert_false(
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'], 'Node {0} has not become offline after'
                    'switching maintenance mode'.format(nailgun_node.name))

            logger.info(
                'Check that %s node in maintenance mode after '
                'switching', nailgun_node.name)

            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_auto_mode(remote),
                            "Maintenance mode is not switch")

                result = remote.execute('umm off')
                assert_equal(
                    result['exit_code'], 0,
                    'Failed to execute "{0}" on remote host: {1}'.format(
                        'umm off', result))

            logger.info('Wait a %s node online status', nailgun_node.name)
            try:
                wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
                    nailgun_node)['online'],
                     timeout=60 * 10)
            except TimeoutError:
                assert_true(
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'], 'Node {0} has not become online after '
                    'exiting maintenance mode'.format(nailgun_node.name))

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            _wait(lambda: self.fuel_web.run_single_ostf_test(
                cluster_id,
                test_sets=['sanity'],
                test_name=map_ostf.OSTF_TEST_MAPPING.get(
                    'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda: self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
    def auto_cic_maintenance_mode(self):
        """Check auto maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Unexpected reboot
            3. Wait until controller is switching in maintenance mode
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        for nailgun_node in self.env.d_env.nodes().slaves[0:3]:
            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_available_mode(remote),
                            "Maintenance mode is not available")

                logger.info('Change UMM.CONF on node %s', nailgun_node.name)
                command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n"
                            "COUNTER_RESET_TIME=10' > /etc/umm.conf")

                result = remote.execute(command1)
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format(command1, result))

                logger.info('Unexpected reboot on node %s', nailgun_node.name)
                command2 = ('reboot --force >/dev/null & ')
                result = remote.execute(command2)
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format(command2, result))

            logger.info('Wait a %s node offline status after unexpected '
                        'reboot', nailgun_node.name)
            try:
                wait(
                    lambda: not
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'], timeout=60 * 10)
            except TimeoutError:
                assert_false(
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'],
                    'Node {0} has not become offline after unexpected'
                    'reboot'.format(nailgun_node.name))

            logger.info('Check that %s node in maintenance mode after'
                        ' unexpected reboot', nailgun_node.name)

            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_auto_mode(remote),
                            "Maintenance mode is not switch")

                result = remote.execute('umm off')
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format('umm off', result))
                # Wait umm stops
                time.sleep(30)
                command3 = ("echo -e 'UMM=yes\nREBOOT_COUNT=2\n"
                            "COUNTER_RESET_TIME=10' > /etc/umm.conf")
                result = remote.execute(command3)
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format(command3, result))

            logger.info('Wait a %s node online status', nailgun_node.name)
            try:
                wait(
                    lambda:
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'], timeout=90 * 10)
            except TimeoutError:
                assert_true(
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'],
                    'Node {0} has not become online after umm off'.format(
                        nailgun_node.name))

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            _wait(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['sanity'],
                      test_name=map_ostf.OSTF_TEST_MAPPING.get(
                          'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda:
                  self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
    def manual_cic_maintenance_mode(self):
        """Check manual maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Switch in maintenance mode
            3. Wait until controller is rebooting
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        n_ctrls = self.fuel_web.get_nailgun_cluster_nodes_by_roles(
            cluster_id, ['controller'])
        d_ctrls = self.fuel_web.get_devops_nodes_by_nailgun_nodes(n_ctrls)

        for devops_node in d_ctrls:
            _ip = self.fuel_web.get_nailgun_node_by_name(
                devops_node.name)['ip']
            logger.info('Maintenance mode for node {0}'.format(
                devops_node.name))
            asserts.assert_true('True' in checkers.check_available_mode(_ip),
                                "Maintenance mode is not available")
            self.ssh_manager.execute_on_remote(ip=_ip, cmd="umm on")

            logger.info('Wait a {0} node offline status after switching '
                        'maintenance mode '.format(devops_node.name))
            try:
                wait(lambda: not self.fuel_web.get_nailgun_node_by_devops_node(
                    devops_node)['online'],
                     timeout=60 * 10)
            except TimeoutError:
                asserts.assert_false(
                    self.fuel_web.get_nailgun_node_by_devops_node(devops_node)
                    ['online'], 'Node {0} has not become offline after'
                    'switching maintenance mode'.format(devops_node.name))

            logger.info('Check that {0} node in maintenance mode after '
                        'switching'.format(devops_node.name))

            _ip = self.fuel_web.get_nailgun_node_by_name(
                devops_node.name)['ip']
            asserts.assert_true(
                checkers.check_ping(self.env.get_admin_node_ip(),
                                    _ip,
                                    deadline=600),
                "Host {0} is not reachable by ping during 600 sec".format(_ip))

            asserts.assert_true('True' in checkers.check_auto_mode(_ip),
                                "Maintenance mode is not switch")

            self.ssh_manager.execute_on_remote(ip=_ip, cmd="umm off")

            logger.info('Wait a {0} node online status'.format(
                devops_node.name))
            try:
                wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
                    devops_node)['online'],
                     timeout=60 * 10)
            except TimeoutError:
                asserts.assert_true(
                    self.fuel_web.get_nailgun_node_by_devops_node(devops_node)
                    ['online'], 'Node {0} has not become online after '
                    'exiting maintenance mode'.format(devops_node.name))

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up([n.name for n in d_ctrls])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up([n.name for n in d_ctrls])

            _wait(lambda: self.fuel_web.run_single_ostf_test(
                cluster_id,
                test_sets=['sanity'],
                test_name=ostf_test_mapping.OSTF_TEST_MAPPING.get(
                    'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda: self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
    def negative_auto_cic_maintenance_mode(self):
        """Check negative scenario for auto maintenance mode

        Scenario:
            1. Revert snapshot
            2. Disable UMM
            3. Change UMM.CONF
            4. Unexpected reboot
            5. Check the controller not switching in maintenance mode
            6. Check the controller become available

        Duration 85m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        for nailgun_node in self.env.d_env.nodes().slaves[0:3]:
            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_available_mode(remote),
                            "Maintenance mode is not available")

                logger.info('Change UMM.CONF on node %s', nailgun_node.name)
                command1 = ("echo -e 'UMM=yes\nREBOOT_COUNT=0\n"
                            "COUNTER_RESET_TIME=10' > /etc/umm.conf")

                result = remote.execute(command1)
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format(command1, result))

                result = remote.execute('umm disable')
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format('umm disable', result))

                assert_false('True' in check_available_mode(remote),
                             "Maintenance mode should not be available")

                logger.info('Unexpected reboot on node %s', nailgun_node.name)
                command2 = ('reboot --force >/dev/null & ')
                result = remote.execute(command2)
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format(command2, result))

            # Node don't have enough time for set offline status
            # after reboot --force
            # Just waiting

            _ip = self.fuel_web.get_nailgun_node_by_name(
                nailgun_node.name)['ip']
            _wait(lambda: _tcp_ping(_ip, 22), timeout=120)

            logger.info('Wait a %s node online status after unexpected '
                        'reboot', nailgun_node.name)
            self.fuel_web.wait_nodes_get_online_state([nailgun_node])

            logger.info('Check that %s node not in maintenance mode after'
                        ' unexpected reboot', nailgun_node.name)

            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_false('True' in check_auto_mode(remote),
                             "Maintenance mode should not switched")

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            _wait(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['sanity'],
                      test_name=map_ostf.OSTF_TEST_MAPPING.get(
                          'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda:
                  self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
    def manual_cic_maintenance_mode(self):
        """Check manual maintenance mode for controller

        Scenario:
            1. Revert snapshot
            2. Switch in maintenance mode
            3. Wait until controller is rebooting
            4. Exit maintenance mode
            5. Check the controller become available

        Duration 155m
        """
        self.env.revert_snapshot('cic_maintenance_mode')

        cluster_id = self.fuel_web.get_last_created_cluster()

        for nailgun_node in self.env.d_env.nodes().slaves[0:3]:
            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_available_mode(remote),
                            "Maintenance mode is not available")

                logger.info('Maintenance mode for node %s', nailgun_node.name)
                result = remote.execute('umm on')
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format('umm on', result))
            logger.info('Wait a %s node offline status after switching '
                        'maintenance mode ', nailgun_node.name)
            try:
                wait(
                    lambda: not
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'], timeout=60 * 10)
            except TimeoutError:
                assert_false(
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'],
                    'Node {0} has not become offline after'
                    'switching maintenance mode'.format(nailgun_node.name))

            logger.info('Check that %s node in maintenance mode after '
                        'switching', nailgun_node.name)

            with self.fuel_web.get_ssh_for_node(nailgun_node.name) as remote:
                assert_true('True' in check_auto_mode(remote),
                            "Maintenance mode is not switch")

                result = remote.execute('umm off')
                assert_equal(result['exit_code'], 0,
                             'Failed to execute "{0}" on remote host: {1}'.
                             format('umm off', result))

            logger.info('Wait a %s node online status', nailgun_node.name)
            try:
                wait(
                    lambda:
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'], timeout=60 * 10)
            except TimeoutError:
                assert_true(
                    self.fuel_web.get_nailgun_node_by_devops_node(nailgun_node)
                    ['online'],
                    'Node {0} has not become online after '
                    'exiting maintenance mode'.format(nailgun_node.name))

            # Wait until MySQL Galera is UP on some controller
            self.fuel_web.wait_mysql_galera_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            # Wait until Cinder services UP on a controller
            self.fuel_web.wait_cinder_is_up(
                [n.name for n in self.env.d_env.nodes().slaves[0:3]])

            _wait(lambda:
                  self.fuel_web.run_single_ostf_test(
                      cluster_id, test_sets=['sanity'],
                      test_name=map_ostf.OSTF_TEST_MAPPING.get(
                          'Check that required services are running')),
                  timeout=1500)
            logger.debug("Required services are running")

            _wait(lambda:
                  self.fuel_web.run_ostf(cluster_id, test_sets=['ha']),
                  timeout=1500)
            logger.debug("HA tests are pass now")

            try:
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
            except AssertionError:
                logger.debug("Test failed from first probe,"
                             " we sleep 600 second try one more time"
                             " and if it fails again - test will fails ")
                time.sleep(600)
                self.fuel_web.run_ostf(cluster_id,
                                       test_sets=['smoke', 'sanity'])
Beispiel #48
0
 def await (self, network_name, timeout=120):
     _wait(lambda: _tcp_ping(
         self.get_ip_address_by_network_name(network_name), 22),
           timeout=timeout)
Beispiel #49
0
    def ha_pacemaker_restart_heat_engine(self):
        if not self.env.d_env.has_snapshot(self.snapshot_name):
            raise SkipTest()

        self.env.revert_snapshot(self.snapshot_name)
        ocf_success = "DEBUG: OpenStack Orchestration Engine" \
                      " (heat-engine) monitor succeeded"
        ocf_error = "ERROR: OpenStack Heat Engine is not connected to the" \
                    " AMQP server: AMQP connection test returned 1"

        heat_name = 'heat-engine'
        ocf_status = \
            'script -q -c "OCF_ROOT=/usr/lib/ocf' \
            ' /usr/lib/ocf/resource.d/fuel/{0}' \
            ' monitor 2>&1"'.format(heat_name)

        node_name = self.env.d_env.nodes().slaves[0].name

        with self.fuel_web.get_ssh_for_node(node_name) as remote:
            pid = ''.join(
                remote.execute('pgrep {0}'.format(heat_name))['stdout'])
            get_ocf_status = ''.join(
                remote.execute(ocf_status)['stdout']).rstrip()
        assert_true(
            ocf_success in get_ocf_status,
            "heat engine is not succeeded, status is {0}".format(
                get_ocf_status))

        with self.fuel_web.get_ssh_for_node(node_name) as remote:
            amqp_con = len(
                remote.execute("netstat -nap | grep {0} | grep :5673".format(
                    pid))['stdout'])
        assert_true(amqp_con > 0, 'There is no amqp connections')

        with self.fuel_web.get_ssh_for_node(node_name) as remote:
            remote.execute("iptables -I OUTPUT 1 -m owner --uid-owner heat -m"
                           " state --state NEW,ESTABLISHED,RELATED -j DROP")
            cmd = "netstat -nap | grep {0} | grep :5673".format(pid)
            wait(lambda: len(remote.execute(cmd)['stdout']) == 0, timeout=300)

            get_ocf_status = ''.join(
                remote.execute(ocf_status)['stdout']).rstrip()
        logger.info('ocf status after blocking is {0}'.format(get_ocf_status))
        assert_true(
            ocf_error in get_ocf_status,
            "heat engine is running, status is {0}".format(get_ocf_status))

        with self.fuel_web.get_ssh_for_node(node_name) as remote:
            remote.execute("iptables -D OUTPUT 1 -m owner --uid-owner heat -m"
                           " state --state NEW,ESTABLISHED,RELATED")
            _wait(lambda: assert_true(ocf_success in ''.join(
                remote.execute(ocf_status)['stdout']).rstrip()),
                  timeout=240)
            newpid = ''.join(
                remote.execute('pgrep {0}'.format(heat_name))['stdout'])
            assert_true(pid != newpid, "heat pid is still the same")
            get_ocf_status = ''.join(
                remote.execute(ocf_status)['stdout']).rstrip()

        assert_true(
            ocf_success in get_ocf_status,
            "heat engine is not succeeded, status is {0}".format(
                get_ocf_status))

        with self.fuel_web.get_ssh_for_node(node_name) as remote:
            heat = len(
                remote.execute("netstat -nap | grep {0} | grep :5673".format(
                    newpid))['stdout'])
        assert_true(heat > 0)
        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.run_ostf(cluster_id=cluster_id)
Beispiel #50
0
def await_node_deploy(ip, name):
    client = CobblerClient(ip)
    token = client.login('cobbler', 'cobbler')
    _wait(lambda: check_node_ready(client, token, name), timeout=30 * 60)
Beispiel #51
0
    def rollback_automatically_ha(self):
        """Rollback manually ha deployed cluster

        Scenario:
            1. Revert snapshot with Neutron GRE HA 6.1 env
            2. Add raise exception to openstack.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded
            6. Run network verification
            7. Run OSTF
            8. Add 1 cinder node and re-deploy cluster
            9. Run OSTF

        """
        #(ddmitriev)TODO: change the snapshot name to actual when reverting 7.0
        if not self.env.d_env.has_snapshot('deploy_neutron_gre_ha'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre_ha")
        cluster_id = self.fuel_web.get_last_created_cluster()
        checkers.upload_tarball(self.env.d_env.get_admin_remote(),
                                hlp_data.TARBALL_PATH, '/var')
        checkers.check_file_exists(self.env.d_env.get_admin_remote(),
                                   os.path.join('/var',
                                                os.path.basename(
                                                    hlp_data.TARBALL_PATH)))
        checkers.untar(self.env.d_env.get_admin_remote(),
                       os.path.basename(hlp_data.
                                        TARBALL_PATH), '/var')
        checkers.run_script(self.env.d_env.get_admin_remote(),
                            '/var',
                            'upgrade.sh',
                            password=hlp_data.KEYSTONE_CREDS['password'],
                            rollback=True, exit_code=255)
        checkers.wait_rollback_is_done(self.env.d_env.get_admin_remote(), 3000)
        checkers.check_upgraded_containers(self.env.d_env.get_admin_remote(),
                                           hlp_data.UPGRADE_FUEL_TO,
                                           hlp_data.UPGRADE_FUEL_FROM)
        logger.debug("all containers are ok")
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.d_env.nodes().slaves[0]), timeout=8 * 60)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(
            self.env.d_env.nodes().slaves[:5])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.verify_network(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.bootstrap_nodes(
            self.env.d_env.nodes().slaves[5:6])
        self.fuel_web.update_nodes(
            cluster_id, {'slave-06': ['cinder']},
            True, False
        )
        self.fuel_web.deploy_cluster_wait(cluster_id)
        self.fuel_web.run_ostf(cluster_id=cluster_id,
                               test_sets=['ha', 'smoke', 'sanity'])

        self.env.make_snapshot("rollback_automatically_ha")
Beispiel #52
0
    def fuel_migration(self):
        """Fuel master migration to VM

        Scenario:

            1. Create cluster
            2. Run OSTF tests
            3. Run Network check
            4. Migrate fuel-master to VM
            5. Run OSTF tests
            6. Run Network check
            7. Check statuses for master services

        Duration 210m
        """
        self.env.revert_snapshot("ready_with_3_slaves")
        data = {
            'net_provider': 'neutron',
            'net_segment_type': settings.NEUTRON_SEGMENT_TYPE
        }

        cluster_id = self.fuel_web.create_cluster(
            name=self.__class__.__name__,
            mode=settings.DEPLOYMENT_MODE_HA,
            settings=data)

        self.fuel_web.update_nodes(
            cluster_id,
            {
                'slave-01': ['controller'],
                'slave-02': ['compute']
            }
        )

        # Check network
        self.fuel_web.verify_network(cluster_id)

        # Cluster deploy
        self.fuel_web.deploy_cluster_wait(cluster_id)

        # Check network
        self.fuel_web.verify_network(cluster_id)

        # Fuel migration
        remote = self.env.d_env.get_admin_remote()
        logger.info('Fuel migration on compute slave-02')

        result = remote.execute('fuel-migrate ' + self.fuel_web.
                                get_nailgun_node_by_name('slave-02')['ip'] +
                                ' >/dev/null &')
        assert_equal(result['exit_code'], 0,
                     'Failed to execute "{0}" on remote host: {1}'.
                     format('fuel-migrate' + self.env.d_env.nodes().slaves[0].
                            name, result))
        checkers.wait_phrase_in_log(remote, 60 * 60, interval=0.2,
                                    phrase='Rebooting to begin '
                                           'the data sync process',
                                    log_path='/var/log/fuel-migrate.log')
        remote.clear()
        logger.info('Rebooting to begin the data sync process for fuel '
                    'migrate')

        wait(lambda: not icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15, timeout_msg='Master node has not become offline '
                                          'after rebooting')
        wait(lambda: icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15, timeout_msg='Master node has not become online '
                                          'after rebooting')
        self.env.d_env.nodes().admin.await(network_name=self.d_env.admin_net,
                                           timeout=60 * 15)
        with self.env.d_env.get_admin_remote() as remote:
            checkers.wait_phrase_in_log(remote,
                                        60 * 90, interval=0.1,
                                        phrase='Stop network and up with '
                                               'new settings',
                                        log_path='/var/log/fuel-migrate.log')
        logger.info('Shutting down network')

        wait(lambda: not icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15, interval=0.1,
             timeout_msg='Master node has not become offline shutting network')
        wait(lambda: icmp_ping(self.env.get_admin_node_ip()),
             timeout=60 * 15,
             timeout_msg='Master node has not become online shutting network')

        self.env.d_env.nodes().admin.await(network_name=self.d_env.admin_net,
                                           timeout=60 * 10)

        logger.info("Check containers")
        self.env.docker_actions.wait_for_ready_containers(timeout=60 * 30)

        logger.info("Check services")
        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.assert_ha_services_ready(cluster_id)
        self.fuel_web.assert_os_services_ready(cluster_id)

        # Check network
        self.fuel_web.verify_network(cluster_id)

        # Run ostf
        _wait(lambda:
              self.fuel_web.run_ostf(cluster_id,
                                     test_sets=['smoke', 'sanity']),
              timeout=1500)
        logger.debug("OSTF tests are pass now")
Beispiel #53
0
 def wait_for_provisioning(self):
     _wait(lambda: _tcp_ping(
         self.nodes().admin.get_ip_address_by_network_name
         (self.admin_net), 22), timeout=5 * 60)
Beispiel #54
0
    def rollback_automatically_simple_env(self):
        """Rollback automatically simple deployed cluster

        Scenario:
            1. Revert snapshot with simple neutron gre env
            2. Add raise exception to docker_engine.py file
            3. Run upgrade on master
            4. Check that rollback starts automatically
            5. Check that cluster was not upgraded and run OSTf
            6. Add 1 cinder node and re-deploy cluster
            7. Run OSTF

        """
        if not self.env.get_virtual_environment().has_snapshot(
                'deploy_neutron_gre'):
            raise SkipTest()

        self.env.revert_snapshot("deploy_neutron_gre")
        cluster_id = self.fuel_web.get_last_created_cluster()
        remote = self.env.get_ssh_to_remote_by_name('slave-01')
        expected_kernel = UpgradeFuelMaster.get_slave_kernel(remote)

        checkers.upload_tarball(self.env.get_admin_remote(),
                                hlp_data.TARBALL_PATH, '/var')
        checkers.check_tarball_exists(self.env.get_admin_remote(),
                                      os.path.basename(hlp_data.TARBALL_PATH),
                                      '/var')
        checkers.untar(self.env.get_admin_remote(),
                       os.path.basename(hlp_data.TARBALL_PATH), '/var')
        #we expect 255 exit code here because upgrade failed
        # and exit status is 255
        checkers.run_script(self.env.get_admin_remote(),
                            '/var',
                            'upgrade.sh',
                            password=hlp_data.KEYSTONE_CREDS['password'],
                            rollback=True,
                            exit_code=255)
        checkers.wait_rollback_is_done(self.env.get_admin_remote(), 3000)
        checkers.check_upgraded_containers(self.env.get_admin_remote(),
                                           hlp_data.UPGRADE_FUEL_TO,
                                           hlp_data.UPGRADE_FUEL_FROM)
        logger.debug("all containers are ok")
        _wait(lambda: self.fuel_web.get_nailgun_node_by_devops_node(
            self.env.nodes().slaves[0]),
              timeout=120)
        logger.debug("all services are up now")
        self.fuel_web.wait_nodes_get_online_state(self.env.nodes().slaves[:3])
        self.fuel_web.assert_nodes_in_ready_state(cluster_id)
        self.fuel_web.assert_fuel_version(hlp_data.UPGRADE_FUEL_FROM)
        self.fuel_web.run_ostf(cluster_id=cluster_id)
        self.env.bootstrap_nodes(self.env.nodes().slaves[3:4])
        self.fuel_web.update_nodes(cluster_id, {'slave-04': ['cinder']}, True,
                                   False)
        self.fuel_web.deploy_cluster_wait(cluster_id)
        if hlp_data.OPENSTACK_RELEASE_UBUNTU in hlp_data.OPENSTACK_RELEASE:
            remote = self.env.get_ssh_to_remote_by_name('slave-04')
            kernel = UpgradeFuelMaster.get_slave_kernel(remote)
            checkers.check_kernel(kernel, expected_kernel)
        self.fuel_web.run_ostf(cluster_id=cluster_id)

        self.env.make_snapshot("rollback_automatic_simple")
Beispiel #55
0
 def wait_for_provisioning(self,
                           timeout=settings.WAIT_FOR_PROVISIONING_TIMEOUT):
     _wait(lambda: _tcp_ping(
         self.d_env.nodes(
         ).admin.get_ip_address_by_network_name
         (self.d_env.admin_net), 22), timeout=timeout)
Beispiel #56
0
    def ha_pacemaker_restart_heat_engine(self):
        """Verify heat engine service is restarted
         by pacemaker on amqp connection loss

        Scenario:
            1. SSH to any controller
            2. Check heat-engine status
            3. Block heat-engine amqp connections
            4. Check heat-engine was stopped on current controller
            5. Unblock heat-engine amqp connections
            6. Check heat-engine process is running with new pid
            7. Check amqp connection re-appears for heat-engine

        Snapshot ha_pacemaker_restart_heat_engine

        """
        self.env.revert_snapshot("deploy_ha")
        ocf_success = "DEBUG: OpenStack Orchestration Engine" \
                      " (heat-engine) monitor succeeded"
        ocf_error = "ERROR: OpenStack Heat Engine is not connected to the" \
                    " AMQP server: AMQP connection test returned 1"

        heat_name = 'heat-engine'

        ocf_status = \
            'script -q -c "OCF_ROOT=/usr/lib/ocf' \
            ' /usr/lib/ocf/resource.d/fuel/{0}' \
            ' monitor 2>&1"'.format(heat_name)

        remote = self.fuel_web.get_ssh_for_node(
            self.env.nodes().slaves[0].name)
        pid = ''.join(remote.execute('pgrep heat-engine')['stdout'])
        get_ocf_status = ''.join(remote.execute(ocf_status)['stdout']).rstrip()
        assert_true(
            ocf_success in get_ocf_status,
            "heat engine is not succeeded, status is {0}".format(
                get_ocf_status))
        assert_true(
            len(
                remote.execute("netstat -nap | grep {0} | grep :5673".format(
                    pid))['stdout']) > 0, 'There is no amqp connections')
        remote.execute("iptables -I OUTPUT 1 -m owner --uid-owner heat -m"
                       " state --state NEW,ESTABLISHED,RELATED -j DROP")

        wait(lambda: len(
            remote.execute("netstat -nap | grep {0} | grep :5673".format(pid))[
                'stdout']) == 0,
             timeout=300)

        get_ocf_status = ''.join(remote.execute(ocf_status)['stdout']).rstrip()
        logger.info('ocf status after blocking is {0}'.format(get_ocf_status))
        assert_true(
            ocf_error in get_ocf_status,
            "heat engine is running, status is {0}".format(get_ocf_status))

        remote.execute("iptables -D OUTPUT 1 -m owner --uid-owner heat -m"
                       " state --state NEW,ESTABLISHED,RELATED")
        _wait(lambda: assert_true(ocf_success in ''.join(
            remote.execute(ocf_status)['stdout']).rstrip()),
              timeout=240)
        newpid = ''.join(remote.execute('pgrep heat-engine')['stdout'])
        assert_true(pid != newpid, "heat pid is still the same")
        get_ocf_status = ''.join(remote.execute(ocf_status)['stdout']).rstrip()
        assert_true(
            ocf_success in get_ocf_status,
            "heat engine is not succeeded, status is {0}".format(
                get_ocf_status))
        assert_true(
            len(
                remote.execute("netstat -nap | grep {0} | grep :5673".format(
                    newpid))['stdout']) > 0)
        cluster_id = self.fuel_web.get_last_created_cluster()
        self.fuel_web.run_ostf(cluster_id=cluster_id)