def update_testset_list(self, available_tests): msg_sig = controllerd_pb2.Signal() msg_avail_testsets = msg_sig.testsets with self._testsets_lock: self._testsets = dict() testsets = self.testset_class.list(self.ezbench_dir) msg_avail_testsets.testsets_count = 0 for testset in testsets: if not testset.parse(available_tests, silent=True): continue # Keep a copy of the testset self._testsets[testset.name] = testset msg_avail_testsets.testsets_count += 1 msg_testset = msg_avail_testsets.testsets.add() msg_testset.name = testset.name msg_testset.description = testset.description for test in testset: msg_test = msg_testset.tests.add() msg_test.name = test msg_test.rounds = testset[test] self.send_msg(msg_sig)
def serve_forever(self): while not self._exit_now: try: self.connect() except ConnectionRefusedError: print( "The controller is not available, re-trying in {} seconds..." .format(Configuration.controllerd_reconnection_period)) time.sleep(Configuration.controllerd_reconnection_period) self.socket.close() continue except (IOError, BrokenPipeError): print( "The controller got disconnected, re-trying in {} seconds..." .format(Configuration.controllerd_reconnection_period)) self.socket.close() time.sleep(Configuration.controllerd_reconnection_period) continue except Exception as e: traceback.print_exc() self.socket.close() time.sleep(1) continue try: while not self._exit_now: cmd = self.recv_msg(controllerd_pb2.Cmd()) if cmd is None: break # Answer ping commands immediately if cmd.HasField("ping"): sig = controllerd_pb2.Signal() sig.cmd_status.id = cmd.id sig.cmd_status.err_code = controllerd_pb2.CmdStatus.OK self.send_msg(sig) else: self._cmd_queue.append(cmd) self._run_reports_event.set() except IOError as e: pass except Exception as e: traceback.print_exc() try: self.socket.shutdown(socket.SHUT_RDWR) self.socket.close() except IOError as e: pass except Exception as e: print(e) # Exit immediately if it was requested, wait before trying to # reconnected otherwise if self._exit_now: return else: time.sleep(Configuration.controllerd_reconnection_period)
def refresh_reports_state(self): reports = self.smartezbench_class.list_reports(self.ezbench_dir) sbenches = dict() msg_sig = controllerd_pb2.Signal() msg_avail_reports = msg_sig.reports for report in reports: try: sbenches[report] = sbench = self.__create_smartezbench__( report) if (self.controller_name is None or sbench.user_data( "controller_name", None) != self.controller_name): continue profile = sbenches[report].profile() if profile is None: profile = "" msg_report = msg_avail_reports.reports.add() msg_report.name = sbench.user_data("report_name") msg_report.profile = profile msg_report.state = sbenches[report].running_mode().name msg_report.state_disk = sbenches[report].running_mode( False).name samples = self._timingsdb.data("build", msg_report.profile) if len(samples) > 0: msg_report.build_time = statistics.median(samples) else: msg_report.build_time = 5 * 60 # default to 5 minutes samples = self._timingsdb.data("deploy", msg_report.profile) if len(samples) > 0: msg_report.deploy_time = statistics.median(samples) else: msg_report.deploy_time = 2 * 60 # default to 2 minutes except Exception as e: traceback.print_exc(file=sys.stderr) sys.stderr.write("\n") pass # Work around a bug in protobuf msg_avail_reports.report_count = len(msg_avail_reports.reports) # Try sending the list of reports to the controller try: self.send_msg(msg_sig) except: pass return sbenches
def gen_msg_reports(self, reports): msg_sig = controllerd_pb2.Signal() msg_avail_reports = msg_sig.reports msg_avail_reports.report_count = len(reports) for report in reports: msg_report = msg_avail_reports.reports.add() for key in report: setattr(msg_report, key, report[key]) return msg_sig
def gen_msg_tests(self, tests): msg_sig = controllerd_pb2.Signal() msg_avail_tests = msg_sig.tests msg_avail_tests.tests_count = len(tests) for test in tests: msg_test = msg_avail_tests.tests.add() for key in test: setattr(msg_test, key, test[key]) return msg_sig
def send_logs(self, sbench, msg): msg_sig = controllerd_pb2.Signal() # Get the report's name, exit if it is not available as we do not have # a valid report created by dutd report = sbench.user_data("report_name", None) if report is None: return msg_sig.log.report = report msg_sig.log.msg = msg self.send_msg(msg_sig)
def __push_reference__(self, sbench, repo, reference): try: remote = repo.remotes[self.__report_remote_name__()] git_cb = pygit2.RemoteCallbacks( credentials=self.report_upload_credential) remote.push([reference], git_cb) # Tell that the report got pushed sig = controllerd_pb2.Signal() sig.report_pushed.report = sbench.user_data("report_name") self.send_msg(sig) except Exception as e: self.log(Criticality.EE, traceback.format_exc())
def gen_msg_testsets(self, testsets): msg_sig = controllerd_pb2.Signal() msg_avail_testsets = msg_sig.testsets msg_avail_testsets.testsets_count = len(testsets) for testset in testsets: msg_testset = msg_avail_testsets.testsets.add() for key in testset: if key == "tests": for test in testset["tests"]: msg_test = msg_testset.tests.add() msg_test.name = test['name'] msg_test.rounds = test['rounds'] else: setattr(msg_testset, key, testset[key]) return msg_sig
def sbench_hooks_callback(self, state): if state.action == "start_running_tests": self.refresh_reports_state() elif state.action == "done_running_test": # First, make a commit of all the changes and push them if possible self.__commit_report__(state.sbench, state.hook_parameters['task']) # Handle all the commands we received, to verify if we need to stop # doing anything self.handle_command_queue() elif state.action == "done_running_tests": self.refresh_reports_state() elif state.action == "reboot_needed": msg_sig = controllerd_pb2.Signal() msg_sig.reboot.timestamp = datetime.utcnow().timestamp() self.send_msg(msg_sig) elif state.action == "mode_changed": # Do not send the change in state if we are exiting because the # pausing is just a way to stop the execution if not self._exit_now: self.refresh_reports_state()
def handle_command_queue(self): while not self._exit_now: try: cmd = self._cmd_queue.popleft() except IndexError: #print("handle_command_queue: Nothing to dequeu, DONE!") return # Prepare the ACK to the command sig = controllerd_pb2.Signal() sig.cmd_status.id = cmd.id sig.cmd_status.err_code = controllerd_pb2.CmdStatus.OK try: if cmd.HasField("delete_report"): with self._sbenches_lock: report_name = "{}/{}".format(self.controller_name, cmd.delete_report.name) sbench = self._sbenches.get(report_name, None) if sbench is None: sig.cmd_status.err_code = controllerd_pb2.CmdStatus.ERROR sig.cmd_status.err_msg = "Could not find the report " + cmd.delete_report.name else: sbench.delete() if sig.cmd_status.err_code == controllerd_pb2.CmdStatus.OK: self.update_reports_list() elif cmd.HasField("set_work"): self.__set_work__(cmd.set_work, sig) else: sig.cmd_status.err_code = controllerd_pb2.CmdStatus.ERROR sig.cmd_status.err_msg = "Could not find any action to do. Software needs to be updated?" except Exception as e: traceback.print_exc() sig.cmd_status.err_code = controllerd_pb2.CmdStatus.ERROR sig.cmd_status.err_msg = "Caught an unexpected exception" # Now send the cmd_status! self.send_msg(sig)
def update_test_list(self): tests = set() msg_sig = controllerd_pb2.Signal() msg_avail_tests = msg_sig.tests with self._tests_lock: msg_avail_tests.tests_count = len(self._tests) for test in self._tests: msg_test = msg_avail_tests.tests.add() msg_test.name = test samples = self._timingsdb.data("test", test) if len(samples) > 0: msg_test.exec_time_median = statistics.median(samples) msg_test.exec_time_max = max(samples) else: msg_test.exec_time_median = self._tests[ test].time_estimation msg_test.exec_time_max = self._tests[test].time_estimation tests.add(test) self.send_msg(msg_sig) return tests
def read_cmd(self, machine): r = requests.get(url("/machines/{}".format(machine.name))) self.assertEqual(r.status_code, 200) state = r.json() # Make sure the machine is online self.assertTrue(state['online']) # Look for a command that has not been acknowledged found = False for queued_cmd in state['queued_cmds']: self.assertIsInstance(queued_cmd['acknowledged'], float) if queued_cmd['acknowledged']: self.assertNotEqual(queued_cmd['err_code'], "NON_ACK") continue else: found = True # We found a command we need to ACK, wait for it to be sent # before waiting for it to detect self.wait_until(0.5, lambda: self.command_sent(queued_cmd['url']), "The command was not sent by the controller") break if not found: return None # The REST API says we have a message, verify that the timings are OK # then check it out r = requests.get(url(queued_cmd['url'])) state = r.json() self.assertEqual(r.status_code, 200) self.assertLessEqual( datetime.utcnow().timestamp() - state['last_sent'], 0.5) self.assertEqual(state['err_code'], "NON_ACK") self.assertNotIn("err_msg", state) # Read the actual command and verify that the ID matches cmd = machine.recv_msg(controllerd_pb2.Cmd()) self.assertIsNot(cmd, None) self.assertEqual(queued_cmd['id'], cmd.id) # Send the acknowledgment immediately since we won't do anything with it sig = controllerd_pb2.Signal() sig.cmd_status.id = cmd.id sig.cmd_status.err_msg = "Debug test" sig.cmd_status.err_code = controllerd_pb2.CmdStatus.OK self.send_sig(machine, sig) # Wait for the controller to receive our ACK self.wait_until( 0.5, lambda: self.command_acknowledged(queued_cmd['url']), "The command did not get acknowledged by the controller") # Check that the acknowledgment date matches and that all the other fields # match what we sent r = requests.get(url(queued_cmd['url'])) state = r.json() self.assertEqual(r.status_code, 200) self.assertEqual(state['id'], cmd.id) self.assertIsInstance(state['description'], str) self.assertEqual(state['err_code'], "OK") self.assertEqual(state['err_msg'], sig.cmd_status.err_msg) self.assertLessEqual( datetime.utcnow().timestamp() - r.json()['acknowledged'], 0.5) return cmd