Example #1
0
 def _manage_ssh_failure(self, kwargs):
     try:
         self.stage_class = kwargs['stage_class']
         failure_detection = FailureDetection()
         vm_terminated = failure_detection.node_terminated(
             kwargs['settings'], kwargs['vm_id'])
         if vm_terminated:
             self._flag_failed_vm(kwargs['vm_ip'], self.stage_class.created_nodes)
             list_of_process_lists = [self.stage_class.current_procs, self.stage_class.all_procs]
             self.flag_all_processes(list_of_process_lists, kwargs['vm_ip'])
             self.decrease_max_retry(list_of_process_lists, kwargs['vm_ip'], kwargs['process_id'])
         else:
             self._manage_process_terminated_error(kwargs)
     except KeyError as e:
         logger.debug('key_error=%s' % e)
Example #2
0
 def manage_failed_process(self, settings, process_id, host_node, host_node_id,
                           host_node_ip, failed_nodes, executed_procs, current_procs, all_procs,
                           procs_2b_rescheduled):
     failure_detection = FailureDetection()
     list_of_process_lists = [executed_procs, current_procs, all_procs]
     self.decrease_max_retry(list_of_process_lists, host_node_ip, process_id)
     if failure_detection.node_terminated(settings, host_node_id):
         if not failure_detection.recorded_failed_node(
                         failed_nodes, host_node_ip):
             failed_nodes.append(host_node)
         self.flag_all_processes(list_of_process_lists, host_node_ip)
     else:
         self.flag_this_process(list_of_process_lists, host_node_ip, process_id)
     failed_processes = self.get_total_failed_processes(current_procs)
     self.collect_failed_processes(current_procs, procs_2b_rescheduled)
     return failed_processes
Example #3
0
 def manage_failed_process(self, settings, process_id, host_node,
                           host_node_id, host_node_ip, failed_nodes,
                           executed_procs, current_procs, all_procs,
                           procs_2b_rescheduled):
     failure_detection = FailureDetection()
     list_of_process_lists = [executed_procs, current_procs, all_procs]
     self.decrease_max_retry(list_of_process_lists, host_node_ip,
                             process_id)
     if failure_detection.node_terminated(settings, host_node_id):
         if not failure_detection.recorded_failed_node(
                 failed_nodes, host_node_ip):
             failed_nodes.append(host_node)
         self.flag_all_processes(list_of_process_lists, host_node_ip)
     else:
         self.flag_this_process(list_of_process_lists, host_node_ip,
                                process_id)
     failed_processes = self.get_total_failed_processes(current_procs)
     self.collect_failed_processes(current_procs, procs_2b_rescheduled)
     return failed_processes
Example #4
0
 def _manage_ssh_failure(self, kwargs):
     try:
         self.stage_class = kwargs['stage_class']
         failure_detection = FailureDetection()
         vm_terminated = failure_detection.node_terminated(
             kwargs['settings'], kwargs['vm_id'])
         if vm_terminated:
             self._flag_failed_vm(kwargs['vm_ip'],
                                  self.stage_class.created_nodes)
             list_of_process_lists = [
                 self.stage_class.current_procs, self.stage_class.all_procs
             ]
             self.flag_all_processes(list_of_process_lists, kwargs['vm_ip'])
             self.decrease_max_retry(list_of_process_lists, kwargs['vm_ip'],
                                     kwargs['process_id'])
         else:
             self._manage_process_terminated_error(kwargs)
     except KeyError as e:
         logger.debug('key_error=%s' % e)
Example #5
0
 def is_triggered(self, run_settings):
     """
         Checks whether there is a non-zero number of runs still going.
     """
     self.ftmanager = FTManager()
     self.failure_detector = FailureDetection()
     #self.cleanup_nodes = self.ftmanager.get_cleanup_nodes(run_settings, smartconnectorscheduler)
     try:
         failed_str = getval(run_settings, '%s/stages/create/failed_nodes' % RMIT_SCHEMA)
         self.failed_nodes = ast.literal_eval(failed_str)
     except SettingNotFoundException, e:
         logger.debug(e)
         self.failed_nodes = []