def insert_job(self, tag, job, commit=None): job.machine_idx = self.lookup_machine(job.machine) if not job.machine_idx: job.machine_idx = self.insert_machine(job, commit=commit) else: self.update_machine_information(job, commit=commit) afe_job_id = utils.get_afe_job_id(tag) data = { "tag": tag, "label": job.label, "username": job.user, "machine_idx": job.machine_idx, "queued_time": job.queued_time, "started_time": job.started_time, "finished_time": job.finished_time, "afe_job_id": afe_job_id, } is_update = hasattr(job, "index") if is_update: self.update("tko_jobs", data, {"job_idx": job.index}, commit=commit) else: self.insert("tko_jobs", data, commit=commit) job.index = self.get_last_autonumber_value() self.update_job_keyvals(job, commit=commit) for test in job.tests: self.insert_test(job, test, commit=commit)
def reimage_and_run(**dargs): """ Backward-compatible API for dynamic_suite. Will re-image a number of devices (of the specified board) with the provided builds, and then run the indicated test suite on them. Guaranteed to be compatible with any build from stable to dev. @param dargs: Dictionary containing the arguments passed to _SuiteSpec(). @raises AsynchronousBuildFailure: if there was an issue finishing staging from the devserver. @raises MalformedDependenciesException: if the dependency_info file for the required build fails to parse. """ suite_spec = _SuiteSpec(**dargs) afe = frontend_wrappers.RetryingAFE(timeout_min=30, delay_sec=10, user=suite_spec.job.user, debug=False) tko = frontend_wrappers.RetryingTKO(timeout_min=30, delay_sec=10, user=suite_spec.job.user, debug=False) try: my_job_id = int(tko_utils.get_afe_job_id(dargs['job'].tag)) logging.debug('Determined own job id: %d', my_job_id) except ValueError: my_job_id = None logging.warning('Could not determine own job id.') _perform_reimage_and_run(suite_spec, afe, tko, suite_job_id=my_job_id) logging.debug('Returning from dynamic_suite.reimage_and_run.')
def insert_job(self, tag, job, commit = None): job.machine_idx = self.lookup_machine(job.machine) if not job.machine_idx: job.machine_idx = self.insert_machine(job, commit=commit) else: self.update_machine_information(job, commit=commit) afe_job_id = utils.get_afe_job_id(tag) data = {'tag':tag, 'label': job.label, 'username': job.user, 'machine_idx': job.machine_idx, 'queued_time': job.queued_time, 'started_time': job.started_time, 'finished_time': job.finished_time, 'afe_job_id': afe_job_id} is_update = hasattr(job, 'index') if is_update: self.update('tko_jobs', data, {'job_idx': job.index}, commit=commit) else: self.insert('tko_jobs', data, commit=commit) job.index = self.get_last_autonumber_value() self.update_job_keyvals(job, commit=commit) for test in job.tests: self.insert_test(job, test, commit=commit)
def run_provision_suite(**dargs): """ Run a provision suite. Will re-image a number of devices (of the specified board) with the provided builds by scheduling dummy_Pass. @param job: an instance of client.common_lib.base_job representing the currently running suite job. @raises AsynchronousBuildFailure: if there was an issue finishing staging from the devserver. @raises MalformedDependenciesException: if the dependency_info file for the required build fails to parse. """ spec = _SuiteSpec(**dargs) afe = frontend_wrappers.RetryingAFE(timeout_min=30, delay_sec=10, user=spec.job.user, debug=False) tko = frontend_wrappers.RetryingTKO(timeout_min=30, delay_sec=10, user=spec.job.user, debug=False) try: my_job_id = int(tko_utils.get_afe_job_id(spec.job.tag)) logging.debug('Determined own job id: %d', my_job_id) except ValueError: my_job_id = None logging.warning('Could not determine own job id.') suite = ProvisionSuite(tag=spec.name, builds=spec.builds, board=spec.board, devserver=spec.devserver, count=1, afe=afe, tko=tko, pool=spec.pool, results_dir=spec.job.resultdir, max_runtime_mins=spec.max_runtime_mins, timeout_mins=spec.timeout_mins, file_bugs=spec.file_bugs, suite_job_id=my_job_id, extra_deps=spec.suite_dependencies, priority=spec.priority, wait_for_results=spec.wait_for_results, job_retry=spec.job_retry, max_retries=spec.max_retries, offload_failures_only=spec.offload_failures_only, test_source_build=spec.test_source_build, run_prod_code=spec.run_prod_code, job_keyvals=spec.job_keyvals, test_args=spec.test_args) _run_suite_with_spec(suite, spec) logging.debug('Returning from dynamic_suite.run_provision_suite')
def set_afe_job_id_and_tag(self, pb_job, tag): """Sets the pb job's afe_job_id and tag field. @param pb_job: the pb job that will have it's fields set. tag: used to set pb_job.tag and pb_job.afe_job_id. """ pb_job.tag = tag pb_job.afe_job_id = utils.get_afe_job_id(tag)
def set_afe_job_id_and_tag(self, pb_job, tag): """Sets the pb job's afe_job_id and tag field. @param pb_job: the pb job that will have it's fields set. tag: used to set pb_job.tag and pb_job.afe_job_id. """ pb_job.tag = tag pb_job.afe_job_id = utils.get_afe_job_id(tag)
def insert_job(self, tag, job, parent_job_id=None, commit=None): """Insert a tko job. @param tag: The job tag. @param job: The job object. @param parent_job_id: The parent job id. @param commit: If commit the transaction . @return The dict of data inserted into the tko_jobs table. """ job.machine_idx = self.lookup_machine(job.machine) if not job.machine_idx: job.machine_idx = self.insert_machine(job, commit=commit) elif job.machine: # Only try to update tko_machines record if machine is set. This # prevents unnecessary db writes for suite jobs. self.update_machine_information(job, commit=commit) afe_job_id = utils.get_afe_job_id(tag) data = { 'tag': tag, 'label': job.label, 'username': job.user, 'machine_idx': job.machine_idx, 'queued_time': job.queued_time, 'started_time': job.started_time, 'finished_time': job.finished_time, 'afe_job_id': afe_job_id, 'afe_parent_job_id': parent_job_id, 'build': job.build, 'build_version': job.build_version, 'board': job.board, 'suite': job.suite } job.afe_job_id = afe_job_id if parent_job_id: job.afe_parent_job_id = str(parent_job_id) # TODO(ntang): check job.index directly. is_update = hasattr(job, 'index') if is_update: self.update('tko_jobs', data, {'job_idx': job.index}, commit=commit) else: self.insert('tko_jobs', data, commit=commit) job.index = self.get_last_autonumber_value() self.update_job_keyvals(job, commit=commit) for test in job.tests: self.insert_test(job, test, commit=commit) data['job_idx'] = job.index return data
def insert_job(self, tag, job, parent_job_id=None, commit=None): job.machine_idx = self.lookup_machine(job.machine) if not job.machine_idx: job.machine_idx = self.insert_machine(job, commit=commit) elif job.machine: # Only try to update tko_machines record if machine is set. This # prevents unnecessary db writes for suite jobs. self.update_machine_information(job, commit=commit) afe_job_id = utils.get_afe_job_id(tag) data = { 'tag': tag, 'label': job.label, 'username': job.user, 'machine_idx': job.machine_idx, 'queued_time': job.queued_time, 'started_time': job.started_time, 'finished_time': job.finished_time, 'afe_job_id': afe_job_id, 'afe_parent_job_id': parent_job_id } if job.label: label_info = site_utils.parse_job_name(job.label) if label_info: data['build'] = label_info.get('build', None) data['build_version'] = label_info.get('build_version', None) data['board'] = label_info.get('board', None) data['suite'] = label_info.get('suite', None) is_update = hasattr(job, 'index') if is_update: self.update('tko_jobs', data, {'job_idx': job.index}, commit=commit) else: self.insert('tko_jobs', data, commit=commit) job.index = self.get_last_autonumber_value() self.update_job_keyvals(job, commit=commit) for test in job.tests: self.insert_test(job, test, commit=commit)
def reimage_and_run(**dargs): """ Backward-compatible API for dynamic_suite. Will re-image a number of devices (of the specified board) with the provided build, and then run the indicated test suite on them. Guaranteed to be compatible with any build from stable to dev. @param dargs: Dictionary containing the arguments listed below. Currently required args: @param board: which kind of devices to reimage. @param name: a value of the SUITE control file variable to search for. @param job: an instance of client.common_lib.base_job representing the currently running suite job. Currently supported optional args: @param build: the build to install e.g. x86-alex-release/R18-1655.0.0-a1-b1584. @param builds: the builds to install e.g. {'cros-version:': 'x86-alex-release/R18-1655.0.0', 'fw-version:': 'x86-alex-firmware/R36-5771.50.0'} @param pool: specify the pool of machines to use for scheduling purposes. Default: None @param num: the maximum number of devices to reimage. Default in global_config @param check_hosts: require appropriate hosts to be available now. @param add_experimental: schedule experimental tests as well, or not. Default: True @param file_bugs: automatically file bugs on test failures. Default: False @param suite_dependencies: A string with a comma separated list of suite level dependencies, which act just like test dependencies and are appended to each test's set of dependencies at job creation time. @param devserver_url: url to the selected devserver. @param predicate: Optional argument. If present, should be a function mapping ControlData objects to True if they should be included in suite. If argument is absent, suite behavior will default to creating a suite of based on the SUITE field of control files. @param job_retry: A bool value indicating whether jobs should be retired on failure. If True, the field 'JOB_RETRIES' in control files will be respected. If False, do not retry. @param max_retries: Maximum retry limit at suite level. Regardless how many times each individual test has been retried, the total number of retries happening in the suite can't exceed _max_retries. Default to None, no max. @param offload_failures_only: Only enable gs_offloading for failed jobs. @raises AsynchronousBuildFailure: if there was an issue finishing staging from the devserver. @raises MalformedDependenciesException: if the dependency_info file for the required build fails to parse. """ suite_spec = SuiteSpec(**dargs) # To support provision both CrOS and firmware, option builds is added to # SuiteSpec, e.g., # builds = {'cros-version:': 'x86-alex-release/R18-1655.0.0', # 'fw-version:': 'x86-alex-firmware/R36-5771.50.0'} # Option build, version_prefix and firmware_reimage will all be obsoleted. # For backwards compatibility, these option will be default to # firmware_reimage = False # version_prefix = provision.CROS_VERSION_PREFIX # build will be used as CrOS build suite_spec.firmware_reimage = False # </backwards_compatibility_hacks> # version_prefix+build should make it into each test as a DEPENDENCY. The # easiest way to do this is to tack it onto the suite_dependencies. suite_spec.suite_dependencies.extend( provision.join(version_prefix, build) for version_prefix, build in suite_spec.builds.items()) afe = frontend_wrappers.RetryingAFE(timeout_min=30, delay_sec=10, user=suite_spec.job.user, debug=False) tko = frontend_wrappers.RetryingTKO(timeout_min=30, delay_sec=10, user=suite_spec.job.user, debug=False) try: my_job_id = int(tko_utils.get_afe_job_id(dargs['job'].tag)) logging.debug('Determined own job id: %d', my_job_id) except ValueError: my_job_id = None logging.warning('Could not determine own job id.') if suite_spec.predicate is None: predicate = Suite.name_in_tag_predicate(suite_spec.name) else: predicate = suite_spec.predicate _perform_reimage_and_run(suite_spec, afe, tko, predicate, suite_job_id=my_job_id) logging.debug('Returning from dynamic_suite.reimage_and_run.')
def parse_one(db, pid_file_manager, jobname, path, parse_options): """Parse a single job. Optionally send email on failure. @param db: database object. @param pid_file_manager: pidfile.PidFileManager object. @param jobname: the tag used to search for existing job in db, e.g. '1234-chromeos-test/host1' @param path: The path to the results to be parsed. @param parse_options: _ParseOptions instance. """ reparse = parse_options.reparse mail_on_failure = parse_options.mail_on_failure dry_run = parse_options.dry_run suite_report = parse_options.suite_report datastore_creds = parse_options.datastore_creds export_to_gcloud_path = parse_options.export_to_gcloud_path tko_utils.dprint("\nScanning %s (%s)" % (jobname, path)) old_job_idx = db.find_job(jobname) if old_job_idx is not None and not reparse: tko_utils.dprint("! Job is already parsed, done") return # look up the status version job_keyval = models.job.read_keyval(path) status_version = job_keyval.get("status_version", 0) parser = parser_lib.parser(status_version) job = parser.make_job(path) tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname)) status_log_path = _find_status_log_path(path) if not status_log_path: tko_utils.dprint("! Unable to parse job, no status file") return _parse_status_log(parser, job, status_log_path) if old_job_idx is not None: job.job_idx = old_job_idx unmatched_tests = _match_existing_tests(db, job) if not dry_run: _delete_tests_from_db(db, unmatched_tests) job.afe_job_id = tko_utils.get_afe_job_id(jobname) job.skylab_task_id = tko_utils.get_skylab_task_id(jobname) job.afe_parent_job_id = job_keyval.get(constants.PARENT_JOB_ID) job.skylab_parent_task_id = job_keyval.get(constants.PARENT_JOB_ID) job.build = None job.board = None job.build_version = None job.suite = None if job.label: label_info = site_utils.parse_job_name(job.label) if label_info: job.build = label_info.get('build', None) job.build_version = label_info.get('build_version', None) job.board = label_info.get('board', None) job.suite = label_info.get('suite', None) result_utils_lib.LOG = tko_utils.dprint _throttle_result_size(path) # Record test result size to job_keyvals start_time = time.time() result_size_info = site_utils.collect_result_sizes( path, log=tko_utils.dprint) tko_utils.dprint('Finished collecting result sizes after %s seconds' % (time.time()-start_time)) job.keyval_dict.update(result_size_info.__dict__) # TODO(dshi): Update sizes with sponge_invocation.xml and throttle it. # check for failures message_lines = [""] job_successful = True for test in job.tests: if not test.subdir: continue tko_utils.dprint("* testname, subdir, status, reason: %s %s %s %s" % (test.testname, test.subdir, test.status, test.reason)) if test.status not in ('GOOD', 'WARN'): job_successful = False pid_file_manager.num_tests_failed += 1 message_lines.append(format_failure_message( jobname, test.kernel.base, test.subdir, test.status, test.reason)) message = "\n".join(message_lines) if not dry_run: # send out a email report of failure if len(message) > 2 and mail_on_failure: tko_utils.dprint("Sending email report of failure on %s to %s" % (jobname, job.user)) mailfailure(jobname, job, message) # Upload perf values to the perf dashboard, if applicable. for test in job.tests: perf_uploader.upload_test(job, test, jobname) # Upload job details to Sponge. sponge_url = sponge_utils.upload_results(job, log=tko_utils.dprint) if sponge_url: job.keyval_dict['sponge_url'] = sponge_url _write_job_to_db(db, jobname, job) # Verify the job data is written to the database. if job.tests: tests_in_db = db.find_tests(job.job_idx) tests_in_db_count = len(tests_in_db) if tests_in_db else 0 if tests_in_db_count != len(job.tests): tko_utils.dprint( 'Failed to find enough tests for job_idx: %d. The ' 'job should have %d tests, only found %d tests.' % (job.job_idx, len(job.tests), tests_in_db_count)) metrics.Counter( 'chromeos/autotest/result/db_save_failure', description='The number of times parse failed to ' 'save job to TKO database.').increment() # Although the cursor has autocommit, we still need to force it to # commit existing changes before we can use django models, otherwise # it will go into deadlock when django models try to start a new # trasaction while the current one has not finished yet. db.commit() # Handle retry job. orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID, None) if orig_afe_job_id: orig_job_idx = tko_models.Job.objects.get( afe_job_id=orig_afe_job_id).job_idx _invalidate_original_tests(orig_job_idx, job.job_idx) # Serializing job into a binary file export_tko_to_file = global_config.global_config.get_config_value( 'AUTOSERV', 'export_tko_job_to_file', type=bool, default=False) binary_file_name = os.path.join(path, "job.serialize") if export_tko_to_file: export_tko_job_to_file(job, jobname, binary_file_name) if not dry_run: db.commit() # Generate a suite report. # Check whether this is a suite job, a suite job will be a hostless job, its # jobname will be <JOB_ID>-<USERNAME>/hostless, the suite field will not be # NULL. Only generate timeline report when datastore_parent_key is given. datastore_parent_key = job_keyval.get('datastore_parent_key', None) provision_job_id = job_keyval.get('provision_job_id', None) if (suite_report and jobname.endswith('/hostless') and job.suite and datastore_parent_key): tko_utils.dprint('Start dumping suite timing report...') timing_log = os.path.join(path, 'suite_timing.log') dump_cmd = ("%s/site_utils/dump_suite_report.py %s " "--output='%s' --debug" % (common.autotest_dir, job.afe_job_id, timing_log)) if provision_job_id is not None: dump_cmd += " --provision_job_id=%d" % int(provision_job_id) subprocess.check_output(dump_cmd, shell=True) tko_utils.dprint('Successfully finish dumping suite timing report') if (datastore_creds and export_to_gcloud_path and os.path.exists(export_to_gcloud_path)): upload_cmd = [export_to_gcloud_path, datastore_creds, timing_log, '--parent_key', datastore_parent_key] tko_utils.dprint('Start exporting timeline report to gcloud') subprocess.check_output(upload_cmd) tko_utils.dprint('Successfully export timeline report to ' 'gcloud') else: tko_utils.dprint('DEBUG: skip exporting suite timeline to ' 'gcloud, because either gcloud creds or ' 'export_to_gcloud script is not found.') # Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of # the function, so any failure, e.g., db connection error, will stop # gs_offloader_instructions being updated, and logs can be uploaded for # troubleshooting. if job_successful: # Check if we should not offload this test's results. if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False): # Update the gs_offloader_instructions json file. gs_instructions_file = os.path.join( path, constants.GS_OFFLOADER_INSTRUCTIONS) gs_offloader_instructions = {} if os.path.exists(gs_instructions_file): with open(gs_instructions_file, 'r') as f: gs_offloader_instructions = json.load(f) gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True with open(gs_instructions_file, 'w') as f: json.dump(gs_offloader_instructions, f)