Beispiel #1
0
    def start(self, sync=None, finish_execution=None, mode="basic"):
        infra.display_on_terminal(self, 'Starting Endpoint Health Check')
        input_args = self.get_input_arguments()
        self.finish_execution = finish_execution

        if 'openrc_file' in input_args['openstack_api']:
            openrc = input_args['openstack_api']['openrc_file']
        else:
            openrc = None
        if 'password' in input_args['openstack_api']:

            password = input_args['openstack_api']['password']
        else:
            password = None

        if openrc and password:
            noenv = True
        else:
            noenv = False

        self.cred = openstack_api.credentials.Credentials(
            openrc, password, noenv)
        self.frequency = input_args['openstack_api']['frequency']
        max_entries = input_args['openstack_api']['max_entries']

        if sync:
            infra.display_on_terminal(self, "Waiting for Runner Notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self,
                                      "Received notification from Runner")

        self.add_to_graph_file("starttime##" + utils.utils.get_timestamp(
            complete_timestamp=True))
        self.health_check_start()
        self.add_to_graph_file("endtime##" + utils.utils.get_timestamp(
            complete_timestamp=True))
        infra.display_on_terminal(self, "Finished Monitoring")

        # Generate downtime range of all the endpoints
        self.generate_downtime_table(self.endpoint_downtime_dict,
                                     "Endpoints Downtime")

        # Generate downtime range table for all the agents
        self.generate_downtime_table(self.agents_downtime_dict,
                                     "Agent Downtime")

        # Display the final report
        tables_list = ['Endpoints Downtime', 'Agent Downtime']
Beispiel #2
0
    def start(self, sync=None, finish_execution=None, mode="basic"):
        infra.display_on_terminal(self, 'Starting Endpoint Health Check')
        input_args = self.get_input_arguments()
        self.finish_execution = finish_execution

        if 'openrc_file' in input_args['openstack_api']:
            openrc = input_args['openstack_api']['openrc_file']
        else:
            openrc = None
        if 'password' in input_args['openstack_api']:
            
            password = input_args['openstack_api']['password']
        else:
            password = None
        
        if openrc and password:
            noenv = True
        else:
            noenv = False

        self.cred = openstack_api.credentials.Credentials(openrc, password,
                                                          noenv)
        self.frequency = input_args['openstack_api']['frequency']
        max_entries = input_args['openstack_api']['max_entries']
 
        if sync:
            infra.display_on_terminal(self, "Waiting for Runner Notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification from Runner")

        self.add_to_graph_file("starttime##"+utils.utils.get_timestamp
        (complete_timestamp=True))
        self.health_check_start()
        self.add_to_graph_file("endtime##"+utils.utils.get_timestamp
        (complete_timestamp=True))
        infra.display_on_terminal(self, "Finished Monitoring")

        # Generate downtime range of all the endpoints
        self.generate_downtime_table(self.endpoint_downtime_dict,
                                     "Endpoints Downtime")

        # Generate downtime range table for all the agents
        self.generate_downtime_table(self.agents_downtime_dict,
                                     "Agent Downtime")

        # Display the final report
        tables_list = ['Endpoints Downtime', 'Agent Downtime']
Beispiel #3
0
    def start(self, sync=None, finish_execution=None):

        self.finish_execution = finish_execution
        ip_address = self.get_config('nagios_ip')
        input_args = self.get_input_arguments()
        filter_type = str(input_args['nagios']['type'])

        host_config = infra.get_openstack_config()
        host_filter = []
        if filter_type == "node":
            host_filter = self.generate_filter_list(host_config,filter_type)

        # Execution starts here
        if sync:
            infra.display_on_terminal(self, "Waiting for Runner Notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification from Runner")

        start_time = datetime.datetime.\
            fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
        
        while infra.is_execution_completed(self.finish_execution) is False:
            data = self.get_nagios_data(self.url, ip_address)
            self.process_and_report(data, self.reportDict, host_filter,
                                    filter_type)
            time.sleep(20)

        end_time = datetime.\
            datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H:%M:%S')
        end_seconds = time.time()

        NagiosMonitor.write_to_file(self.reportDict, start_time, end_time)
        NagiosMonitor.calSummaryReport(self.summaryDict,
                                       self.reportDict, end_seconds)

        self.health_display_report()
Beispiel #4
0
    def node_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(self, "Entering  Node Disruption plugin")

        table_name = "Node Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(self, table_name,
                                ["Node", "IP", "TimeStamp",
                                 "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")


        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        if input_args:
            print "Inpt " + str(input_args)
            role = input_args.get('role', None)


        nodes_to_be_disrupted = []
        for node in host_config:
            if role in host_config[node].get('role', None):
                infra.display_on_terminal(self, node, " will be disrupted ")
                nodes_to_be_disrupted.append(node)

        node_reboot_command = "reboot -f "

        if self.sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        ha_interval = self.get_ha_interval()
        for i in range(1):
        #while infra.is_execution_completed(self.finish_execution) is False:
           # for node in nodes_to_be_disrupted:
                node = nodes_to_be_disrupted[0]
                ip = host_config.get(node, None).get('ip', None)
                user = host_config.get(node, None).get('user', None)
                password = host_config.get(node, None).get('password', None)
                infra.display_on_terminal(self, "IP: ", ip, " User: "******" Pwd: ", password)
                infra.display_on_terminal(self, "Executing ",
                                          node_reboot_command)
                code, out, error = infra.ssh_and_execute_command(ip, user,
                                                                 password,
                                                            node_reboot_command)
                if error:
                    infra.display_on_terminal(self, "Error ", error,
                                              "color=red")

                infra.display_on_terminal(self, "waiting for ", ip, " to "
                                                                    "come "
                                                                    "online")
                if infra.wait_for_ping(ip, 240, 10):
                    infra.display_on_terminal(self, "Node ", ip,
                                              " is online", "color=green")

                infra.display_on_terminal(self, "Will sleep for interval ",
                                          str(ha_interval))
                #time.sleep(ha_interval)
                infra.add_table_rows(self, table_name, [[node,
                                                         ip,
                                                         utils.get_timestamp(),
                                                         HAConstants.OKGREEN +
                                                         'Rebooted' +
                                                         HAConstants.ENDC]])

        # bring it back to stable state
        infra.display_on_terminal(self, "Waiting for the node to become stable")
        if infra.wait_for_ping(ip, 240, 10):
            infra.display_on_terminal(self, "Node ", ip, " is in stable state",
                                      "color=green")

        infra.display_on_terminal(self, "Finishing Node Disruption")
Beispiel #5
0
    def vm_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(self, "Entering  VM Disruption plugin")

        table_name = "VM Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(self, table_name,
                                ["VM", "IP", "TimeStamp",
                                 "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")


        input_args_dict = self.get_input_arguments()
        print '========',input_args_dict
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        if input_args:
            print "Inpt " + str(input_args)
            role = input_args.get('role', None)

        print '*'*20
        print 'host_config ==',host_config
        print 'input_args ==',input_args
        print '*'*20 

        
        # nodes_to_be_disrupted = []
        for node in host_config:
            if role == host_config[node].get('role', None):
                jump_host = node   

        print jump_host
        nodes_to_be_disrupted = input_args.get('name')
        print nodes_to_be_disrupted

        node_reboot_command = "reboot"

        if self.sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        openrc = host_config.get(jump_host, None).get('openrc', None)
        password = host_config.get(jump_host, None).get('password', None)

        print openrc
        ha_interval = self.get_ha_interval()
        # for i in range(1):
        while infra.is_execution_completed(self.finish_execution) is False:
           for node in nodes_to_be_disrupted:
                # node = nodes_to_be_disrupted[0]
                # ip = host_config.get(node, None).get('ip', None)
                # user = host_config.get(node, None).get('user', None)
                # password = host_config.get(node, None).get('password', None)
                ip = node
                # openrc = host_config.get(node, None).get('openrc', None)
                # password = host_config.get(node, None).get('password', None)
                infra.display_on_terminal(self, "IP: ", ip, " openrc: ",
                                          openrc)

                infra.display_on_terminal(self, "Executing ",
                                          node_reboot_command)


                # Using nova api performing the vm stop operation
                cred = credentials.Credentials(openrc, password,'no_env')

                try:
                    nova = nova_api.NovaHealth(cred.get_nova_credentials_v2())
                    ret = nova.nova_stop_server(ip)
                    time.sleep(ha_interval)
                    infra.display_on_terminal(self, "Rebooting ",ip)
                    nova.nova_start_server(ip)
                    error = []
                except Exception,error:
                    pass

                if error:
                    infra.display_on_terminal(self, "Error ", error,
                                              "color=red")

                infra.display_on_terminal(self, "waiting for ", ip, " to "
                                                                    "come "
                                                                    "online")
                if infra.wait_for_ping(ip, 240, 10):
                    infra.display_on_terminal(self, "Node ", ip,
                                              " is online", "color=green")

                infra.display_on_terminal(self, "Will sleep for interval ",
                                          str(ha_interval))
                #time.sleep(ha_interval)
                infra.add_table_rows(self, table_name, [[node,
                                                         ip,
                                                         utils.get_timestamp(),
                                                         HAConstants.OKGREEN +
                                                         'Rebooted' +
                                                         HAConstants.ENDC]])
    def start(self, sync=None, finish_execution=None, args=None):
        '''
        Required start method to implement for the class.
        '''
        # Parse user data and Initialize.
        self.finish_execution = finish_execution
        data = self.get_input_arguments()
        self.loglevel = data['ansible'].get("loglevel", "DEBUG")
        self.frequency = data['ansible'].get('frequency', 5)
        self.max_hist_size = data['ansible'].get('max_hist', 25)
        self.dockerized = data['ansible'].get('dockerized', False)

        global LOG
        LOG = infra.ha_logging(__name__, level=self.loglevel)
        print "ANSIBLE LOG LEVEL: ", self.loglevel

        LOG.debug("User data: %s", data)

        # Get MariaDB Username/pass
        self.mariadb_user = None
        self.mariadb_password = None
        mariadb_info = data['ansible'].get('mariadb', None)
        if mariadb_info is not None:
            self.mariadb_user = data['ansible']['mariadb'].get('user', None)
            self.mariadb_password = data['ansible']['mariadb'].get('password',
                                                                   None)

        self.ansirunner = None
        setup_file = "../../configs/openstack_config.yaml"
        self.ansiresults = collections.deque(maxlen=self.max_hist_size)

        self.inventory = ConfigHelper(host_file=setup_file)
        LOG.debug("parsed data: ", self.inventory.parsed_data)

        host_list = self.inventory.get_host_list()
        host_ip_list = self.inventory.get_host_ip_list()
        control_ip_list = self.inventory.get_host_ip_list(role='controller')
        compute_ip_list = self.inventory.get_host_ip_list(role='compute')
        remote_user = self.inventory.get_host_username(host_list[0])
        LOG.debug("Inventory: [all: %s], [control: %s] [compute: %s]",
                  host_ip_list, control_ip_list, compute_ip_list)
        LOG.debug("Remote user: "******"Waiting for Runner Notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification from Runner")
        while infra.is_execution_completed(self.finish_execution) is False:
            ####################################################
            # Ansible Monitoring Loop.
            ####################################################
            ts_results = []
            ts = utils.get_timestamp(complete_timestamp=True)
            ts_results.append({'name': 'ts', 'ts': ts})
            msg = "=" * 50 + "\n" + "Timestamp: " + ts
            infra.display_on_terminal(self, msg)

            # Ping and SSH Check.
            host_ip_list = self.inventory.get_host_ip_list()
            ansi_results = self.ansible_ssh_ping_check(host_ip_list,
                                                       remote_user)
            ts_results.append(ansi_results)

            # Process check.
            for service in SERVICE_LIST:
                host_ip_list = self.inventory.get_host_ip_list(role=service['role'])
                ansi_results = self.ansible_check_process(host_ip_list,
                                                          remote_user,
                                                          service['service'])
                ts_results.append(ansi_results)

            # RabbitMQ Check.
            host_ip_list = self.inventory.get_host_ip_list(role='controller')
            ansi_results = self.ansible_check_rabbitmq(host_ip_list,
                                                       remote_user)
            ts_results.append(ansi_results)

            # MariaDB Check.
            ansi_results = self.ansible_check_mariadb(host_ip_list,
                                                      remote_user)
            ts_results.append(ansi_results)

            # Add the ts results to main result list.
            self.ansiresults.append(ts_results)

            time.sleep(self.frequency)


        # Generate Summary Reports
        self.display_ansible_summary_report()
        self.display_asible_process_report()
        infra.display_infra_report()
        self.generate_graphs_output()
Beispiel #7
0
    def container_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(self, "Entering  Container Disruption plugin")

        table_name = "Container Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(self, table_name,
                                ["Host", "Container Process",
                                 "Timestamp",
                                 "Status of Disruption"])
        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        if input_args:
            print "Inpt " + str(input_args)
            container_name = input_args.get('container_name', None)
            role = input_args.get('role', None)
            disruption_type = input_args.get('disruption', None)
        infra.display_on_terminal(self, "Container ", container_name,
                                  " will be disrupted")

        nodes_to_be_disrupted = []
        for node in host_config:
            if 'controller' in host_config[node].get('role', None):
                infra.display_on_terminal(self, node, " will be disrupted ")
                nodes_to_be_disrupted.append(node)
                # For now disrupt on only one node
                break
	#  Deprecate process disruptor and converge on this for both cases later
        container_stop_command = "systemctl stop " + container_name
        container_start_command = "systemctl start " + container_name
	ha_start_delay = self.get_ha_start_delay()
        if sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")
            # Start the actual disruption after 45 seconds
            time.sleep(ha_start_delay)

        ha_interval = self.get_ha_interval()
        disruption_count = self.get_disruption_count()
        if disruption_type == 'infinite':
            #Override the disruption count in executor.yaml
            disruption_count = 1
        while infra.is_execution_completed(self.finish_execution) is False:
            if disruption_count:
                disruption_count = disruption_count - 1
                for node in nodes_to_be_disrupted:
                  ip = host_config.get(node, None).get('ip', None)
                  user = host_config.get(node, None).get('user', None)
                  password = host_config.get(node, None).get('password', None)
                  infra.display_on_terminal(self, "Stopping ", container_name)
                  infra.display_on_terminal(self, "Executing ", container_stop_command)
                  code, out, error = infra.ssh_and_execute_command(ip, user,
                                                                 password,
                                                            container_stop_command)
                  infra.add_table_rows(self, table_name, [[ip,
                                                         container_name,
                                                         utils.get_timestamp(),
                                                         HAConstants.WARNING +
                                                         'Stopped' +
                                                         HAConstants.ENDC]])
                  if disruption_type == 'infinite':
                      infra.display_on_terminal(self, "Infinite disruption chosen bring up container manually")
                      break
                  infra.display_on_terminal(self, "Sleeping for interval ",
                                      str(ha_interval), " seconds")
                  time.sleep(ha_interval)
                  infra.display_on_terminal(self, "Starting ", container_name)
                  infra.display_on_terminal(self, "Executing ", container_start_command)
                  code, out, error = infra.ssh_and_execute_command(ip, user,
                                                            password,
                                                         container_start_command)
                  time.sleep(ha_interval)
                  infra.add_table_rows(self, table_name, [[ip,
                                                         container_name,
                                                         utils.get_timestamp(),
                                                         HAConstants.OKGREEN +
                                                         'Started' +
                                                         HAConstants.ENDC]])

        # bring it back to stable state
	if disruption_type != 'infinite':
            infra.display_on_terminal(self, "Bringing the container to stable state")
            infra.display_on_terminal(self, "Executing ", container_start_command)
            code, out, error = infra.ssh_and_execute_command(ip, user, password,
                                                         container_start_command)

        infra.display_on_terminal(self, "Finishing Container Disruption")
Beispiel #8
0
    def process_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution

        table_name = "Process Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(
            self, table_name,
            ["Host", "Process", "TimeStamp", "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")

        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        if input_args:
            print "Inpt " + str(input_args)
            process_name = input_args.get('process_name', None)
            role = input_args.get('role', None)
            type = input_args.get('type', None)

        infra.display_on_terminal(self, "Process ", process_name,
                                  " will be disrupted")

        nodes_to_be_disrupted = []
        for node in host_config:
            if 'controller' in host_config[node].get('role', None):
                infra.display_on_terminal(self, node, " will be disrupted ")
                nodes_to_be_disrupted.append(node)
                self.expected_failures.append(node + "::" + process_name)

        self.set_expected_failures(self.expected_failures)
        rhel_stop_command = "systemctl stop " + process_name
        rhel_start_command = "systemctl start " + process_name

        if sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        ha_interval = self.get_ha_interval()
        disruption_count = self.get_disruption_count()
        infra.display_on_terminal(self, "Process will be disrupted ",
                                  str(disruption_count))
        while infra.is_execution_completed(self.finish_execution) is False:
            if disruption_count:
                disruption_count = disruption_count - 1
                for node in nodes_to_be_disrupted:
                    ip = host_config.get(node, None).get('ip', None)
                    user = host_config.get(node, None).get('user', None)
                    password = host_config.get(node,
                                               None).get('password', None)
                    infra.display_on_terminal(self, "IP: ", ip, " User: "******" Pwd: ", password)
                    infra.display_on_terminal(self, "Stopping ", process_name)
                    infra.display_on_terminal(self, "Executing ",
                                              rhel_stop_command)
                    code, out, error = infra.ssh_and_execute_command(
                        ip, user, password, rhel_stop_command)
                    infra.add_table_rows(self, table_name, [[
                        ip, process_name,
                        utils.get_timestamp(),
                        HAConstants.WARNING + 'Stopped' + HAConstants.ENDC
                    ]])
                    infra.display_on_terminal(self, "Sleeping for interval ",
                                              str(ha_interval), " seconds")
                    time.sleep(ha_interval)
                    infra.display_on_terminal(self, "Starting ", process_name)
                    infra.display_on_terminal(self, "Executing ",
                                              rhel_start_command)
                    code, out, error = infra.ssh_and_execute_command(
                        ip, user, password, rhel_start_command)
                    time.sleep(ha_interval)
                    infra.add_table_rows(self, table_name, [[
                        ip, process_name,
                        utils.get_timestamp(),
                        HAConstants.OKGREEN + 'Started' + HAConstants.ENDC
                    ]])

        # bring it back to stable state
        infra.display_on_terminal(self,
                                  "Bringing the process  to stable state")
        infra.display_on_terminal(self, "Executing ", rhel_start_command)
        code, out, error = infra.ssh_and_execute_command(
            ip, user, password, rhel_start_command)

        infra.display_on_terminal(self, "Finishing Process Disruption")
    def jump_host_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(self, "Entering Jump Host Disruption plugin")

        table_name = "Jump host Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(self, table_name,
                                ["VM", "IP", "TimeStamp",
                                 "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")


        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        print "*"*20
        print input_args_dict
        print "input_args ==>",input_args
        print "host_config ==>",host_config


        nodes_to_be_disrupted = input_args.get('name',[])
        
        if input_args:
            print "Inpt " + str(input_args)
            role = input_args.get('role', None)

        # jump_hosts = []
        for node in host_config:
            if role in host_config[node].get('role', None):
                jump_host = node
                # jump_hosts.append(node)

        print "###############",jump_host

        node_reboot_command = "reboot -f"

        if self.sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        ha_interval = self.get_ha_interval()

        # jump host details
        jump_host_ip = host_config.get(node, None).get('ip', None)
        user = host_config.get(node,None).get('user',None)
        password = host_config.get(node,None).get('password',None)

        #TODO - if its more than one jump host

        # Write into txt file to pass via ansible playbook
        f = open('/tmp/remote_ips','w+')
        for ip in nodes_to_be_disrupted:
            f.write(ip+'\n')
        f.close()


        while infra.is_execution_completed(self.finish_execution) is False:
           # for node in nodes_to_be_disrupted:
                # node = nodes_to_be_disrupted[0]
                # ip = host_config.get(node, None).get('ip', None)
                # user = host_config.get(node, None).get('user', None)
                # password = host_config.get(node, None).get('password', None)
                ip = node
                # openrc = host_config.get(node, None).get('openrc', None)
                # password = host_config.get(node, None).get('password', None)
                infra.display_on_terminal(self, "Nodes to be disrupted: ", str(nodes_to_be_disrupted), " Jump host: ",
                                          jump_host_ip)

                infra.display_on_terminal(self, "Executing ",
                                          node_reboot_command)
                print "*"*50
                print "user :"******"password :"******"jump_host_ip :",jump_host_ip
                ret = AnsibleRunner(jump_host_ip,user,password).execute_on_remote()
                print ret
                # parse the output for report

                # node_list = os.walk('/tmp/hainfra').next()[1]
                # output_objs = eval(open('/tmp/hainfra/'+node+'/tmp/output','r').read())
                output_objs = eval(open('/tmp/hainfra/output','r').read())
                print output_objs

                for results in output_objs:
                    error = []
                    for (hostname, result) in results['contacted'].items():
                        if 'failed' in result:
                            print "%s >>> %s" % (hostname, result['msg'])
                            error = result['msg']
                    
                    if error:
                        infra.display_on_terminal(self, "Error ", error,
                                                  "color=red")

                    infra.display_on_terminal(self, "waiting for ", hostname, " to "
                                                                        "come "
                                                                        "online")
                    if infra.wait_for_ping(hostname, 240, 5):
                        infra.display_on_terminal(self, "Node ", hostname,
                                                  " is online", "color=green")

                    infra.display_on_terminal(self, "Will sleep for interval ",
                                              str(ha_interval))
                    #time.sleep(ha_interval)
                    if not error:
                        infra.add_table_rows(self, table_name, [[jump_host_ip,
                                                                 hostname,
                                                                 utils.get_timestamp(),
                                                                 HAConstants.OKGREEN +
                                                                 'Rebooted' +
                                                                 HAConstants.ENDC]])

                    else:

                        infra.add_table_rows(self, table_name, [[jump_host_ip,
                                                                 hostname,
                                                                 utils.get_timestamp(),
                                                                 HAConstants.FAIL +
                                                                 str(error)+
                                                                 HAConstants.ENDC]])

                    # bring it back to stable state
                    '''
                    infra.display_on_terminal(self, "Waiting for the node to become stable")
                    if infra.wait_for_ping(hostname, 240, 10):
                        infra.display_on_terminal(self, "Node ", hostname, " is in stable state",
                                                  "color=green")
                    '''
        infra.display_on_terminal(self, "Finishing Node Disruption")
Beispiel #10
0
    def vm_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(self, "Entering  VM Disruption plugin")

        table_name = "VM Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(
            self, table_name,
            ["VM", "IP", "TimeStamp", "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")

        input_args_dict = self.get_input_arguments()
        print '========', input_args_dict
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        if input_args:
            print "Inpt " + str(input_args)
            role = input_args.get('role', None)

        print '*' * 20
        print 'host_config ==', host_config
        print 'input_args ==', input_args
        print '*' * 20

        # nodes_to_be_disrupted = []
        for node in host_config:
            if role == host_config[node].get('role', None):
                jump_host = node

        print jump_host
        nodes_to_be_disrupted = input_args.get('name')
        print nodes_to_be_disrupted

        node_reboot_command = "reboot"

        if self.sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        openrc = host_config.get(jump_host, None).get('openrc', None)
        password = host_config.get(jump_host, None).get('password', None)

        print openrc
        ha_interval = self.get_ha_interval()
        # for i in range(1):
        while infra.is_execution_completed(self.finish_execution) is False:
            for node in nodes_to_be_disrupted:
                # node = nodes_to_be_disrupted[0]
                # ip = host_config.get(node, None).get('ip', None)
                # user = host_config.get(node, None).get('user', None)
                # password = host_config.get(node, None).get('password', None)
                ip = node
                # openrc = host_config.get(node, None).get('openrc', None)
                # password = host_config.get(node, None).get('password', None)
                infra.display_on_terminal(self, "IP: ", ip, " openrc: ",
                                          openrc)

                infra.display_on_terminal(self, "Executing ",
                                          node_reboot_command)

                # Using nova api performing the vm stop operation
                cred = credentials.Credentials(openrc, password, 'no_env')

                try:
                    nova = nova_api.NovaHealth(cred.get_nova_credentials_v2())
                    ret = nova.nova_stop_server(ip)
                    time.sleep(ha_interval)
                    infra.display_on_terminal(self, "Rebooting ", ip)
                    nova.nova_start_server(ip)
                    error = []
                except Exception, error:
                    pass

                if error:
                    infra.display_on_terminal(self, "Error ", error,
                                              "color=red")

                infra.display_on_terminal(self, "waiting for ", ip, " to "
                                          "come "
                                          "online")
                if infra.wait_for_ping(ip, 240, 10):
                    infra.display_on_terminal(self, "Node ", ip, " is online",
                                              "color=green")

                infra.display_on_terminal(self, "Will sleep for interval ",
                                          str(ha_interval))
                #time.sleep(ha_interval)
                infra.add_table_rows(self, table_name, [[
                    node, ip,
                    utils.get_timestamp(),
                    HAConstants.OKGREEN + 'Rebooted' + HAConstants.ENDC
                ]])
Beispiel #11
0
    def process_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution

        table_name = "Process Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(self, table_name,
                                ["Host", "Process", "TimeStamp", "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")

        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        if input_args:
            print "Inpt " + str(input_args)
            process_name = input_args.get('process_name', None)
            role = input_args.get('role', None)
            type = input_args.get('type', None)

        infra.display_on_terminal(self, "Process ", process_name,
                                  " will be disrupted")

        nodes_to_be_disrupted = []
        for node in host_config:
            if 'controller' in host_config[node].get('role', None):
                infra.display_on_terminal(self, node, " will be disrupted ")
                nodes_to_be_disrupted.append(node)
                self.expected_failures.append(node + "::" + process_name)

        self.set_expected_failures(self.expected_failures)
        rhel_stop_command = "systemctl stop " + process_name
        rhel_start_command = "systemctl start " + process_name

        if sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        ha_interval = self.get_ha_interval()
        disruption_count = self.get_disruption_count()
        infra.display_on_terminal(self, "Process will be disrupted " , str(disruption_count))
        while infra.is_execution_completed(self.finish_execution) is False:
            if disruption_count:
              disruption_count = disruption_count - 1
              for node in nodes_to_be_disrupted:
                  ip = host_config.get(node, None).get('ip', None)
                  user = host_config.get(node, None).get('user', None)
                  password = host_config.get(node, None).get('password', None)
                  infra.display_on_terminal(self, "IP: ", ip, " User: "******" Pwd: ", password)
                  infra.display_on_terminal(self, "Stopping ", process_name)
                  infra.display_on_terminal(self, "Executing ", rhel_stop_command)
                  code, out, error = infra.ssh_and_execute_command(ip, user,
                                                                 password,
                                                            rhel_stop_command)
                  infra.add_table_rows(self, table_name,
                                     [[ip, process_name,
                                       utils.get_timestamp(),
                                       HAConstants.WARNING +
                                       'Stopped' + HAConstants.ENDC]])
                  infra.display_on_terminal(self, "Sleeping for interval ",
                                      str(ha_interval), " seconds")
                  time.sleep(ha_interval)
                  infra.display_on_terminal(self, "Starting ", process_name)
                  infra.display_on_terminal(self, "Executing ", rhel_start_command)
                  code, out, error = infra.ssh_and_execute_command(ip, user,
                                                            password,
                                                         rhel_start_command)
                  time.sleep(ha_interval)
                  infra.add_table_rows(self, table_name,
                                     [[ip, process_name,
                                       utils.get_timestamp(),
                                       HAConstants.OKGREEN +
                                       'Started' + HAConstants.ENDC]])

        # bring it back to stable state
        infra.display_on_terminal(self, "Bringing the process  to stable state")
        infra.display_on_terminal(self, "Executing ", rhel_start_command)
        code, out, error = infra.ssh_and_execute_command(ip, user, password,
                                                         rhel_start_command)

        infra.display_on_terminal(self, "Finishing Process Disruption")
    def jump_host_process_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(self, "Entering Jump Host Process Disruption plugin")

        table_name = "Jump host Process Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(self, table_name,
                                ["VM", "Process", "TimeStamp",
                                 "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")


        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        print "*"*20
        print input_args_dict
        print "input_args ==>",input_args
        print "host_config ==>",host_config


        nodes_to_be_disrupted = input_args.get('node',[])
        process_name = input_args.get('process_name',[])
        
        if input_args:
            print "Inpt " + str(input_args)
            role = input_args.get('role', None)

        # jump_hosts = []
        for node in host_config:
            if role in host_config[node].get('role', None):
                jump_host = node
                # jump_hosts.append(node)

        print "###############",process_name

        # node_reboot_command = "reboot -f"
        # process_start_command = 
        rhel_stop_command = "systemctl stop " + process_name
        rhel_start_command = "systemctl start " + process_name


        # jump host details
        jump_host_ip = host_config.get(node, None).get('ip', None)
        user = host_config.get(node,None).get('user',None)
        password = host_config.get(node,None).get('password',None)
        # copy necessary file to jump host
        runner = AnsibleRunner(jump_host_ip,user,password)
        infra.display_on_terminal(self, "Copying to ",
                                   jump_host_ip)        
        runner.copy('jump_host_executor.py','scripts/','/tmp/')
        
        infra.display_on_terminal(self, "Copied to ",
                                   jump_host_ip)        


        if self.sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        ha_interval = self.get_ha_interval()


        #TODO - if its more than one jump host

        # Write into txt file to pass via ansible playbook
        '''
        f = open('/tmp/remote_ips','w+')
        for ip in nodes_to_be_disrupted:
            f.write(ip+'\n')
        f.close()
        '''

        while infra.is_execution_completed(self.finish_execution) is False:
                ip = node
                # openrc = host_config.get(node, None).get('openrc', None)
                # password = host_config.get(node, None).get('password', None)
                infra.display_on_terminal(self, "Nodes to be disrupted: ", str(nodes_to_be_disrupted), " Jump host: ",
                                          jump_host_ip)

                infra.display_on_terminal(self, "Executing ",
                                          rhel_stop_command)
                # ret = AnsibleRunner(jump_host_ip,user,password).execute_on_remote()
                infra.display_on_terminal(self, "Stopping ", process_name)
                # replacing the playbook logic with ansible runner
                # Execute the script on jump host
                ret = runner.shell('python /tmp/jump_host_executor.py "%s" "%s" >>/tmp/output'%(nodes_to_be_disrupted,rhel_stop_command))
                print ret
                # Fetching the result to local
                runner.fetch('output','/tmp/','/tmp/hainfra/output')
                # Deleting the output file
                runner.shell('rm /tmp/output')
                # parse the output for report
                output_objs = eval(open('/tmp/hainfra/output','r').read())
                print output_objs

                for results in output_objs:
                    error = []
                    for (hostname, result) in results['contacted'].items():
                        if 'failed' in result:
                            print "%s >>> %s" % (hostname, result['msg'])
                            error = result['msg']
                    
                    if error:
                        infra.display_on_terminal(self, "Error ", error,
                                                  "color=red")


                    if not error:
                        infra.add_table_rows(self, table_name, [[hostname,
                                                                 process_name,
                                                                 utils.get_timestamp(),
                                                                 HAConstants.OKGREEN +
                                                                 'Stopped' +
                                                                 HAConstants.ENDC]])

                    else:

                        infra.add_table_rows(self, table_name, [[hostname,
                                                                 process_name,
                                                                 utils.get_timestamp(),
                                                                 HAConstants.FAIL +
                                                                 str(error)+
                                                                 HAConstants.ENDC]])

                    infra.display_on_terminal(self, "Will sleep for interval ",
                                              str(ha_interval))
                    time.sleep(ha_interval)

                infra.display_on_terminal(self, "Starting ", process_name)
                infra.display_on_terminal(self, "Executing ",
                                          rhel_start_command)
                ret = runner.shell('python /tmp/jump_host_executor.py "%s" "%s" >>/tmp/output'%(nodes_to_be_disrupted,rhel_start_command))
                print ret
                runner.fetch('output','/tmp/','/tmp/hainfra/output')
                runner.shell('rm /tmp/output')
                # parse the output for report
                output_objs = eval(open('/tmp/hainfra/output','r').read())
                for results in output_objs:
                    hostname = results['hostname']
                    error = results['error']
                    if error:
                        infra.display_on_terminal(self, "Error ", error,
                                                  "color=red")


                    if not error:
                        infra.add_table_rows(self, table_name, [[hostname,
                                                                 process_name,
                                                                 utils.get_timestamp(),
                                                                 HAConstants.OKGREEN +
                                                                 'Started' +
                                                                 HAConstants.ENDC]])

                    else:

                        infra.add_table_rows(self, table_name, [[hostname,
                                                                 process_name,
                                                                 utils.get_timestamp(),
                                                                 HAConstants.FAIL +
                                                                 str(error)+
                                                                 HAConstants.ENDC]])


        infra.display_on_terminal(self, "Finishing Process Disruption")
Beispiel #13
0
    def jump_host_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(self, "Entering Jump Host Disruption plugin")

        table_name = "Jump host Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(
            self, table_name,
            ["VM", "IP", "TimeStamp", "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")

        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        print "*" * 20
        print input_args_dict
        print "input_args ==>", input_args
        print "host_config ==>", host_config

        nodes_to_be_disrupted = input_args.get('name', [])

        if input_args:
            print "Inpt " + str(input_args)
            role = input_args.get('role', None)

        # jump_hosts = []
        for node in host_config:
            if role in host_config[node].get('role', None):
                jump_host = node
                # jump_hosts.append(node)

        print "###############", jump_host

        node_reboot_command = "reboot -f"

        if self.sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        ha_interval = self.get_ha_interval()

        # jump host details
        jump_host_ip = host_config.get(node, None).get('ip', None)
        user = host_config.get(node, None).get('user', None)
        password = host_config.get(node, None).get('password', None)

        #TODO - if its more than one jump host

        # Write into txt file to pass via ansible playbook
        f = open('/tmp/remote_ips', 'w+')
        for ip in nodes_to_be_disrupted:
            f.write(ip + '\n')
        f.close()

        while infra.is_execution_completed(self.finish_execution) is False:
            # for node in nodes_to_be_disrupted:
            # node = nodes_to_be_disrupted[0]
            # ip = host_config.get(node, None).get('ip', None)
            # user = host_config.get(node, None).get('user', None)
            # password = host_config.get(node, None).get('password', None)
            ip = node
            # openrc = host_config.get(node, None).get('openrc', None)
            # password = host_config.get(node, None).get('password', None)
            infra.display_on_terminal(self, "Nodes to be disrupted: ",
                                      str(nodes_to_be_disrupted),
                                      " Jump host: ", jump_host_ip)

            infra.display_on_terminal(self, "Executing ", node_reboot_command)
            print "*" * 50
            print "user :"******"password :"******"jump_host_ip :", jump_host_ip
            ret = AnsibleRunner(jump_host_ip, user,
                                password).execute_on_remote()
            print ret
            # parse the output for report

            # node_list = os.walk('/tmp/hainfra').next()[1]
            # output_objs = eval(open('/tmp/hainfra/'+node+'/tmp/output','r').read())
            output_objs = eval(open('/tmp/hainfra/output', 'r').read())
            print output_objs

            for results in output_objs:
                error = []
                for (hostname, result) in results['contacted'].items():
                    if 'failed' in result:
                        print "%s >>> %s" % (hostname, result['msg'])
                        error = result['msg']

                if error:
                    infra.display_on_terminal(self, "Error ", error,
                                              "color=red")

                infra.display_on_terminal(self, "waiting for ", hostname,
                                          " to "
                                          "come "
                                          "online")
                if infra.wait_for_ping(hostname, 240, 5):
                    infra.display_on_terminal(self, "Node ", hostname,
                                              " is online", "color=green")

                infra.display_on_terminal(self, "Will sleep for interval ",
                                          str(ha_interval))
                #time.sleep(ha_interval)
                if not error:
                    infra.add_table_rows(self, table_name, [[
                        jump_host_ip, hostname,
                        utils.get_timestamp(),
                        HAConstants.OKGREEN + 'Rebooted' + HAConstants.ENDC
                    ]])

                else:

                    infra.add_table_rows(self, table_name, [[
                        jump_host_ip, hostname,
                        utils.get_timestamp(),
                        HAConstants.FAIL + str(error) + HAConstants.ENDC
                    ]])

                # bring it back to stable state
                '''
                    infra.display_on_terminal(self, "Waiting for the node to become stable")
                    if infra.wait_for_ping(hostname, 240, 10):
                        infra.display_on_terminal(self, "Node ", hostname, " is in stable state",
                                                  "color=green")
                    '''
        infra.display_on_terminal(self, "Finishing Node Disruption")
    def jump_host_process_disruption(self, sync=None, finish_execution=None):
        self.sync = sync
        self.finish_execution = finish_execution
        infra.display_on_terminal(
            self, "Entering Jump Host Process Disruption plugin")

        table_name = "Jump host Process Disruption"
        infra.create_report_table(self, table_name)
        infra.add_table_headers(
            self, table_name,
            ["VM", "Process", "TimeStamp", "Status of Disruption"])

        infra.display_on_terminal(self, "Entering  Process Disruption plugin")

        input_args_dict = self.get_input_arguments()
        node_name = input_args_dict.keys()[0]
        input_args = input_args_dict.get(node_name, None)
        host_config = infra.get_openstack_config()

        print "*" * 20
        print input_args_dict
        print "input_args ==>", input_args
        print "host_config ==>", host_config

        nodes_to_be_disrupted = input_args.get('node', [])
        process_name = input_args.get('process_name', [])

        if input_args:
            print "Inpt " + str(input_args)
            role = input_args.get('role', None)

        # jump_hosts = []
        for node in host_config:
            if role in host_config[node].get('role', None):
                jump_host = node
                # jump_hosts.append(node)

        print "###############", process_name

        # node_reboot_command = "reboot -f"
        # process_start_command =
        rhel_stop_command = "systemctl stop " + process_name
        rhel_start_command = "systemctl start " + process_name

        # jump host details
        jump_host_ip = host_config.get(node, None).get('ip', None)
        user = host_config.get(node, None).get('user', None)
        password = host_config.get(node, None).get('password', None)
        # copy necessary file to jump host
        runner = AnsibleRunner(jump_host_ip, user, password)
        infra.display_on_terminal(self, "Copying to ", jump_host_ip)
        runner.copy('jump_host_executor.py', 'scripts/', '/tmp/')

        infra.display_on_terminal(self, "Copied to ", jump_host_ip)

        if self.sync:
            infra.display_on_terminal(self, "Waiting for notification")
            infra.wait_for_notification(sync)
            infra.display_on_terminal(self, "Received notification, Starting")

        ha_interval = self.get_ha_interval()

        #TODO - if its more than one jump host

        # Write into txt file to pass via ansible playbook
        '''
        f = open('/tmp/remote_ips','w+')
        for ip in nodes_to_be_disrupted:
            f.write(ip+'\n')
        f.close()
        '''

        while infra.is_execution_completed(self.finish_execution) is False:
            ip = node
            # openrc = host_config.get(node, None).get('openrc', None)
            # password = host_config.get(node, None).get('password', None)
            infra.display_on_terminal(self, "Nodes to be disrupted: ",
                                      str(nodes_to_be_disrupted),
                                      " Jump host: ", jump_host_ip)

            infra.display_on_terminal(self, "Executing ", rhel_stop_command)
            # ret = AnsibleRunner(jump_host_ip,user,password).execute_on_remote()
            infra.display_on_terminal(self, "Stopping ", process_name)
            # replacing the playbook logic with ansible runner
            # Execute the script on jump host
            ret = runner.shell(
                'python /tmp/jump_host_executor.py "%s" "%s" >>/tmp/output' %
                (nodes_to_be_disrupted, rhel_stop_command))
            print ret
            # Fetching the result to local
            runner.fetch('output', '/tmp/', '/tmp/hainfra/output')
            # Deleting the output file
            runner.shell('rm /tmp/output')
            # parse the output for report
            output_objs = eval(open('/tmp/hainfra/output', 'r').read())
            print output_objs

            for results in output_objs:
                error = []
                for (hostname, result) in results['contacted'].items():
                    if 'failed' in result:
                        print "%s >>> %s" % (hostname, result['msg'])
                        error = result['msg']

                if error:
                    infra.display_on_terminal(self, "Error ", error,
                                              "color=red")

                if not error:
                    infra.add_table_rows(self, table_name, [[
                        hostname, process_name,
                        utils.get_timestamp(),
                        HAConstants.OKGREEN + 'Stopped' + HAConstants.ENDC
                    ]])

                else:

                    infra.add_table_rows(self, table_name, [[
                        hostname, process_name,
                        utils.get_timestamp(),
                        HAConstants.FAIL + str(error) + HAConstants.ENDC
                    ]])

                infra.display_on_terminal(self, "Will sleep for interval ",
                                          str(ha_interval))
                time.sleep(ha_interval)

            infra.display_on_terminal(self, "Starting ", process_name)
            infra.display_on_terminal(self, "Executing ", rhel_start_command)
            ret = runner.shell(
                'python /tmp/jump_host_executor.py "%s" "%s" >>/tmp/output' %
                (nodes_to_be_disrupted, rhel_start_command))
            print ret
            runner.fetch('output', '/tmp/', '/tmp/hainfra/output')
            runner.shell('rm /tmp/output')
            # parse the output for report
            output_objs = eval(open('/tmp/hainfra/output', 'r').read())
            for results in output_objs:
                hostname = results['hostname']
                error = results['error']
                if error:
                    infra.display_on_terminal(self, "Error ", error,
                                              "color=red")

                if not error:
                    infra.add_table_rows(self, table_name, [[
                        hostname, process_name,
                        utils.get_timestamp(),
                        HAConstants.OKGREEN + 'Started' + HAConstants.ENDC
                    ]])

                else:

                    infra.add_table_rows(self, table_name, [[
                        hostname, process_name,
                        utils.get_timestamp(),
                        HAConstants.FAIL + str(error) + HAConstants.ENDC
                    ]])

        infra.display_on_terminal(self, "Finishing Process Disruption")