def __api_cmd_list(self, arg): with self.__lock: ret = charon_agent_interface_pb2.CmdListRet() for cmd_id, cmd_state in self.__cmd_map.iteritems(): status_path = os.path.join(self.cmd_dir(cmd_id), "status.bin") cmd_status = CmdStatus() cmd_status.ParseFromString(open(status_path).read()) cmd_summary = ret.cmd_summary_list.add() cmd_summary.cmd_id = cmd_id cmd_summary.cmd = cmd_state.cmd cmd_summary.state = cmd_status.state return ret.SerializeToString()
def test_wait_CmdStatus_kFailed(self): for mock_vm in self.vms: mock_vm.check_cmd.return_value = CmdStatus(state=CmdStatus.kFailed, exit_status=1) self.scenario.should_stop = lambda: False results = self.iogen.wait(self.vms) for result in results: self.assertFalse(result.success)
def test_start_no_runtime(self): for mock_vm in self.vms: mock_vm.check_cmd.return_value = CmdStatus( state=CmdStatus.kSucceeded, exit_status=0) with mock.patch.object(self.iogen, "_cmd") as mock_cmd: mock_cmd.return_value = "go_make_io_happen" self.scenario.should_stop = lambda: False self.iogen.start(self.vms) for vm in self.vms: vm.execute_async.assert_called_once_with(self.iogen.get_cmd_id(), "go_make_io_happen", user="******") self.assertIsNone(self.iogen._expected_workload_finish_secs)
def test_wait_vm_inaccessible(self): for mock_vm in self.vms: mock_vm.check_cmd.return_value = CmdStatus( state=CmdStatus.kSucceeded, exit_status=0) self.scenario.should_stop = lambda: False self.vms[0].is_accessible.return_value = False with mock.patch.object(iogen.log, "warning", wraps=iogen.log.warning) as mock_warning: results = self.iogen.wait(self.vms) self.assertFalse(results[0].success) for result in results[1:]: self.assertTrue(result.success) self.assertGreater(mock_warning.call_count, 0)
def test_start_sets_expected_finish_time_no_stagger(self, mock_time): mock_time.time.return_value = 12345 for mock_vm in self.vms: mock_vm.check_cmd.return_value = CmdStatus( state=CmdStatus.kSucceeded, exit_status=0) with mock.patch.object(self.iogen, "_cmd") as mock_cmd: mock_cmd.return_value = "go_make_io_happen" self.scenario.should_stop = lambda: False self.iogen.start(self.vms, runtime_secs=900) for vm in self.vms: vm.execute_async.assert_called_once_with(self.iogen.get_cmd_id(), "go_make_io_happen", user="******") self.assertEqual(12345 + 900, self.iogen._expected_workload_finish_secs)
def test_wait_should_stop_during(self): for mock_vm in self.vms: mock_vm.check_cmd.return_value = CmdStatus( state=CmdStatus.kSucceeded, exit_status=0) with mock.patch.object(self.scenario, "should_stop") as mock_should_stop: mock_should_stop.side_effect = chain([False, False], cycle([True])) with mock.patch("curie.iogen.iogen.IOGen.stop") as mock_stop: results = self.iogen.wait(self.vms) num_success = len( [result.success for result in results if result.success]) num_failure = len([ result.success for result in results if not result.success ]) self.assertGreater(num_success, 0) self.assertGreater(num_failure, 0) self.assertGreater(mock_stop.call_count, 0)
def __execute_cmd(self, cmd_id, cmd, cmd_uid): """ Start asynchronously executing the command 'cmd' with command ID 'cmd_id' as user 'cmd_uid'. Assumes self.__lock is held. """ cmd_dir = self.cmd_dir(cmd_id) os.mkdir(cmd_dir) if cmd_uid != self.__agent_uid: CHECK_EQ(self.__agent_uid, 0) # Change the ownership of 'cmd_dir' so the command can write to its # stdout and stderr files. pwd_entry = pwd.getpwuid(cmd_uid) os.chown(cmd_dir, pwd_entry.pw_uid, pwd_entry.pw_gid) # Run the command using the command wrapper. We run the command using the # wrapper so we can identify commands run by the agent by matching against # process command lines should the agent crash and restart. wrapper_path = "/usr/local/bin/curie_cmd_wrapper" wrapped_cmd = map(str, [wrapper_path, cmd_id, cmd_dir, cmd_uid, cmd]) wrapper_stdout_path = os.path.join(cmd_dir, "wrapper_stdout.txt") wrapper_stderr_path = os.path.join(cmd_dir, "wrapper_stderr.txt") log.info("Executing command %s (%s)", cmd_id, cmd) log.info("Wrapped command %s (%s)", cmd_id, wrapped_cmd) proc = subprocess.Popen(wrapped_cmd, stdout=open(wrapper_stdout_path, "w", 0), stderr=open(wrapper_stderr_path, "w", 0), close_fds=True) # Fill in 'cmd_dir' with the RPC request and the command's initial status. arg_path = os.path.join(cmd_dir, "arg.bin") OsUtil.write_and_rename(arg_path, flask.request.data) cmd_status = CmdStatus() cmd_status.state = CmdStatus.kRunning cmd_status.pid = proc.pid cmd_status.stdout_path = os.path.join(cmd_dir, "stdout.txt") cmd_status.stderr_path = os.path.join(cmd_dir, "stderr.txt") status_path = os.path.join(cmd_dir, "status.bin") status_data = cmd_status.SerializeToString() OsUtil.write_and_rename(status_path, status_data) # Add command's state to the agent's state. cmd_state = CurieCmdState(cmd_id, proc=proc, pid=proc.pid) self.__cmd_map[cmd_id] = cmd_state return cmd_state
def __maybe_finalize_cmd_state(self, cmd_state, exit_status=None): """ Finalize the command state in memory/disk if the command isn't already in a terminal state. An 'exit_status' value of -1 indicates the command was stopped. An 'exit_status' value of -2 indicates that the command had a non-normal exit (e.g. was terminated by a signal), which we just classify as failed. Assumes self.__lock is held. """ status_path = os.path.join(self.cmd_dir(cmd_state.cmd_id), "status.bin") cmd_status = CmdStatus() cmd_status.ParseFromString(open(status_path).read()) if cmd_status.state != CmdStatus.kRunning: # Command may have already been stopped. return log.info("Finalizing status for command %s: exit_status %s", cmd_state.cmd_id, exit_status) CHECK(cmd_status.HasField("pid"), msg=cmd_state.cmd_id) CHECK(not cmd_status.HasField("exit_status"), msg=cmd_state.cmd_id) if exit_status is not None: if exit_status == -1: cmd_status.state = CmdStatus.kStopped elif exit_status == 0: cmd_status.state = CmdStatus.kSucceeded else: cmd_status.state = CmdStatus.kFailed cmd_status.exit_status = exit_status else: cmd_status.state = CmdStatus.kUnknown cmd_status.ClearField("pid") status_data = cmd_status.SerializeToString() OsUtil.write_and_rename(status_path, status_data) cmd_state.proc = None cmd_state.pid = None
cmd_id = match.group(1) cmd_id_pid_map[cmd_id] = pid # Kill any unknown commands (ones with no entry in the commands directory). cmd_ids = set(os.listdir(self.cmds_dir())) for cmd_id, pid in cmd_id_pid_map.iteritems(): if NameUtil.sanitize_filename(cmd_id) not in cmd_ids: log.warning("Killing process group %d for unknown command %s", pid, cmd_id) try: os.killpg(pid, signal.SIGKILL) except OSError, ex: CHECK_EQ(ex.errno, errno.ESRCH, msg=str(ex)) # Reconstruct self.__cmd_map. for cmd_id in cmd_ids: status_path = os.path.join(self.cmd_dir(cmd_id), "status.bin") cmd_status = CmdStatus() cmd_status.ParseFromString(open(status_path).read()) if cmd_status.HasField("pid"): pid = cmd_status.pid else: pid = None cmd_state = CurieCmdState(cmd_id, proc=None, pid=pid) if pid is not None and cmd_id not in cmd_id_pid_map: log.warning("Command %s exited while the agent was down", cmd_id) self.__maybe_finalize_cmd_state(cmd_state, exit_status=None) self.__cmd_map[cmd_id] = cmd_state log.info("Recovered state for command %s (state %s)", cmd_id, CmdStatus.Type.Name(cmd_status.state)) return cmd_id_pid_map