def ev_timer(self, timer): """Schedule the current command on the next node or the next command on the first batch of nodes. This callback is triggered by `ClusterShell` when a scheduled `Task.timer()` goes off. :Parameters: according to parent :py:meth:`ClusterShell.Event.EventHandler.ev_timer`. """ success_ratio = 1 - ((self.counters['failed'] + self.counters['timeout']) / self.counters['total']) node = None if success_ratio >= self.success_threshold: # Success ratio is still good, looking for the next node with self.lock: # Avoid modifications of the same data from other callbacks triggered by ClusterShell for new_node in self.nodes.values(): if new_node.state.is_pending: # Found the next node where to execute all the commands node = new_node node.state.update(State.scheduled) break if node is not None: # Schedule the exeuction of the first command to the next node with ClusterShell command = node.commands[0] self.logger.debug("next_node=%s, timeout=%s, command='%s'", node.name, command.command, command.timeout) Task.task_self().shell( command.command, handler=timer.eh, timeout=command.timeout, nodes=nodeset(node.name)) else: self.logger.debug('No more nodes left')
def start_command(self, schedule=False): """Initialize progress bars and variables for this command execution. Executed at the start of each command. Arguments: schedule (bool, optional): whether the next command should be sent to ClusterShell for execution or not. """ self.counters['success'] = 0 self.progress.init(self.counters['total']) # Schedule the next command, the first was already scheduled by ClusterShellWorker.execute() if schedule: with self.lock: # Avoid modifications of the same data from other callbacks triggered by ClusterShell # Available nodes for the next command execution were already update back to the pending state remaining_nodes = [node.name for node in self.nodes.values() if node.state.is_pending] first_batch = remaining_nodes[:self.target.batch_size] first_batch_set = nodeset_fromlist(first_batch) for node_name in first_batch: self.nodes[node_name].state.update(State.scheduled) command = self.commands[self.current_command_index] self.logger.debug( "command='%s', timeout=%s, first_batch=%s", command.command, command.timeout, first_batch_set) # Schedule the command for execution in ClusterShell Task.task_self().flush_buffers() Task.task_self().shell(command.command, nodes=first_batch_set, handler=self, timeout=command.timeout)
def end_command(self): """Command terminated, print the result and schedule the next command if criteria are met. Executed at the end of each command inside a lock. Returns: bool: :py:data:`True` if the next command should be scheduled, :py:data:`False` otherwise. """ self._commands_output_report(Task.task_self(), command=self.commands[self.current_command_index].command) self.progress.close() self._failed_commands_report(filter_command_index=self.current_command_index) self._success_nodes_report(command=self.commands[self.current_command_index].command) success_ratio = self.counters['success'] / self.counters['total'] # Abort on failure if success_ratio < self.success_threshold: self.return_value = 2 self.aborted = True # Tells other timers that might trigger after that the abort is already in progress return False if success_ratio == 1: self.return_value = 0 else: self.return_value = 1 if self.current_command_index == (len(self.commands) - 1): self.logger.debug('This was the last command') return False # This was the last command return True
def __init__(self, config, target): """Worker ClusterShell constructor. :Parameters: according to parent :py:meth:`cumin.transports.BaseWorker.__init__`. """ super().__init__(config, target) self.task = Task.task_self() # Initialize a ClusterShell task self._handler_instance = None # Set any ClusterShell task options for key, value in config.get('clustershell', {}).items(): if isinstance(value, list): self.task.set_info(key, ' '.join(value)) else: self.task.set_info(key, value)
def tasks_run(self, arg_tasks, action): """ Prepare and run tasks """ print "TASKS RUN =>", action workers = [] task = Task.task_self() for script in arg_tasks: groupedNodes = Node.Node.group_by_manager(arg_tasks[script]) for manager in groupedNodes: nodesList = ",".join([node.name for node in groupedNodes[manager]]) command = managers.get_command(manager=manager, service=script, action=action) print "Task run: " + command + ", nodes: " + nodesList worker = task.shell(command, nodes=nodesList) workers.append((worker, script)) task.run() task.join() return workers
def ev_timer(self, timer): # noqa, mccabe: MC0001 too complex (15) FIXME """Schedule the current command on the next node or the next command on the first batch of nodes. This callback is triggered by `ClusterShell` when a scheduled `Task.timer()` goes off. :Parameters: according to parent :py:meth:`ClusterShell.Event.EventHandler.ev_timer`. """ success_ratio = 1 - ((self.counters['failed'] + self.counters['timeout']) / self.counters['total']) node = None if success_ratio >= self.success_threshold: # Success ratio is still good, looking for the next node with self.lock: # Avoid modifications of the same data from other callbacks triggered by ClusterShell for new_node in self.nodes.values(): if new_node.state.is_pending: # Found the next node where to execute the command node = new_node node.state.update(State.scheduled) break if node is not None: # Schedule the execution with ClusterShell of the current command to the next node found above command = self.nodes[node.name].commands[self.nodes[node.name].running_command_index + 1] self.logger.debug("next_node=%s, timeout=%s, command='%s'", node.name, command.command, command.timeout) Task.task_self().shell(command.command, handler=timer.eh, timeout=command.timeout, nodes=nodeset(node.name)) return # No more nodes were left for the execution of the current command with self.lock: # Avoid modifications of the same data from other callbacks triggered by ClusterShell try: command = self.commands[self.current_command_index].command except IndexError: command = None # Last command reached # Get a list of the nodes still in pending state pending = [pending_node.name for pending_node in self.nodes.values() if pending_node.state.is_pending] # Nodes in running are still running the command and nodes in scheduled state will execute the command # anyway, they were already offloaded to ClusterShell accounted = len(pending) + self.counters['failed'] + self.counters['success'] + self.counters['timeout'] # Avoid race conditions if self.aborted or accounted != self.counters['total'] or command is None or self.global_timedout: self.logger.debug("Skipped timer") return if pending: # This usually happens when executing in batches self.logger.warning("Command '%s' was not executed on: %s", command, nodeset_fromlist(pending)) self.logger.info("Completed command '%s'", command) restart = self.end_command() self.current_command_index += 1 # Move the global pointer of the command in execution if restart: for node in self.nodes.values(): if node.state.is_success: # Only nodes in pending state will be scheduled for the next command node.state.update(State.pending) if restart: self.start_command(schedule=True)