def test_legacy_swarming_path(self): self.assertEqual( "3e4391423c3a4311", job_directories.get_job_id_or_task_id( "/autotest/results/swarming-3e4391423c3a4311"), ) self.assertEqual( "3e4391423c3a4311", job_directories.get_job_id_or_task_id("swarming-3e4391423c3a4311"), )
def record_autoserv(options, start_time): """Record autoserv end-to-end time in metadata db. @param options: parser options. @param start_time: When autoserv started """ # Get machine hostname machines = options.machines.replace( ',', ' ').strip().split() if options.machines else [] num_machines = len(machines) if num_machines > 1: # Skip the case where atomic group is used. return elif num_machines == 0: machines.append('hostless') # Determine the status that will be reported. status = get_job_status(options) is_special_task = status not in [ job_overhead.STATUS.RUNNING, job_overhead.STATUS.GATHERING ] job_or_task_id = job_directories.get_job_id_or_task_id(options.results) duration_secs = (datetime.datetime.now() - start_time).total_seconds() job_overhead.record_state_duration(job_or_task_id, machines[0], status, duration_secs, is_special_task=is_special_task)
def _get_es_metadata(dir_entry): """Get ES metadata for the given test result directory. @param dir_entry: Directory entry to offload. @return A dictionary for the metadata to be uploaded. """ fields = _get_metrics_fields(dir_entry) fields['hostname'] = socket.gethostname() # Include more data about the test job in metadata. if dir_entry: fields['dir_entry'] = dir_entry fields['job_id'] = job_directories.get_job_id_or_task_id(dir_entry) return fields
def schedule(self, job, timeout_mins, machine): """ Sequence a job on the running AFE. Will schedule a given test on the job machine(s). Support a subset of tests: - server job - no hostless. - no cleanup around tests. @param job: server_job object that will server as parent. @param timeout_mins: timeout to set up: if the test last more than timeout_mins, the test will fail. @param machine: machine to run the test on. @returns a maximal time in minutes that the sequence can take. """ afe = frontend_wrappers.RetryingAFE(timeout_min=30, delay_sec=10, user=job.user, debug=False) # job_directores.get_job_id_or_task_id() will return a non-int opaque id # for Chrome OS Skylab tasks. But sequences will break in that case # anyway, because they try to create AFE jobs internally. current_job_id = int( job_directories.get_job_id_or_task_id(job.resultdir)) logging.debug('Current job id: %s', current_job_id) runtime_mins = self.child_job_timeout() hostname = utils.get_hostname_from_machine(machine) for i in xrange(0, self._iteration): child_job_name = self.child_job_name(hostname, i) logging.debug('Creating job: %s', child_job_name) afe.create_job( self.child_control_file(), name=child_job_name, priority=priorities.Priority.DEFAULT, control_type=control_data.CONTROL_TYPE.SERVER, hosts=[hostname], meta_hosts=(), one_time_hosts=(), synch_count=None, is_template=False, timeout_mins=timeout_mins + (i + 1) * runtime_mins, max_runtime_mins=runtime_mins, run_verify=False, email_list='', dependencies=(), reboot_before=None, reboot_after=None, parse_failed_repair=None, hostless=False, keyvals=None, drone_set=None, image=None, parent_job_id=current_job_id, test_retry=0, run_reset=False, require_ssp=utils.is_in_container()) return runtime_mins * self._iteration
def record_autoserv(options, duration_secs): """Record autoserv end-to-end time in metadata db. @param options: parser options. @param duration_secs: How long autoserv has taken, in secs. """ # Get machine hostname machines = options.machines.replace( ',', ' ').strip().split() if options.machines else [] num_machines = len(machines) if num_machines > 1: # Skip the case where atomic group is used. return elif num_machines == 0: machines.append('hostless') # Determine the status that will be reported. s = job_overhead.STATUS task_mapping = { 'reset': s.RESETTING, 'verify': s.VERIFYING, 'provision': s.PROVISIONING, 'repair': s.REPAIRING, 'cleanup': s.CLEANING, 'collect_crashinfo': s.GATHERING } match = filter(lambda task: getattr(options, task, False) == True, task_mapping) status = task_mapping[match[0]] if match else s.RUNNING is_special_task = status not in [s.RUNNING, s.GATHERING] job_or_task_id = job_directories.get_job_id_or_task_id(options.results) job_overhead.record_state_duration(job_or_task_id, machines[0], status, duration_secs, is_special_task=is_special_task)
def main(): start_time = datetime.datetime.now() parser = autoserv_parser.autoserv_parser parser.parse_args() if len(sys.argv) == 1: parser.parser.print_help() sys.exit(1) if parser.options.no_logging: results = None else: results = parser.options.results if not results: results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') results = os.path.abspath(results) resultdir_exists = False for filename in ('control.srv', 'status.log', '.autoserv_execute'): if os.path.exists(os.path.join(results, filename)): resultdir_exists = True if not parser.options.use_existing_results and resultdir_exists: error = "Error: results directory already exists: %s\n" % results sys.stderr.write(error) sys.exit(1) # Now that we certified that there's no leftover results dir from # previous jobs, lets create the result dir since the logging system # needs to create the log file in there. if not os.path.isdir(results): os.makedirs(results) if parser.options.require_ssp: # This is currently only used for skylab (i.e., when --control-name is # used). use_ssp = _require_ssp_from_control(parser.options.control_name) else: use_ssp = False if use_ssp: log_dir = os.path.join(results, 'ssp_logs') if results else None if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) else: log_dir = results logging_manager.configure_logging( server_logging_config.ServerLoggingConfig(), results_dir=log_dir, use_console=not parser.options.no_tee, verbose=parser.options.verbose, no_console_prefix=parser.options.no_console_prefix) logging.debug('autoserv is running in drone %s.', socket.gethostname()) logging.debug('autoserv command was: %s', ' '.join(sys.argv)) logging.debug('autoserv parsed options: %s', parser.options) if use_ssp: ssp_url = _stage_ssp(parser, results) else: ssp_url = None if results: logging.info("Results placed in %s" % results) # wait until now to perform this check, so it get properly logged if (parser.options.use_existing_results and not resultdir_exists and not utils.is_in_container()): logging.error("No existing results directory found: %s", results) sys.exit(1) if parser.options.write_pidfile and results: pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, results) pid_file_manager.open_file() else: pid_file_manager = None autotest.Autotest.set_install_in_tmpdir(parser.options.install_in_tmpdir) exit_code = 0 # TODO(beeps): Extend this to cover different failure modes. # Testing exceptions are matched against labels sent to autoserv. Eg, # to allow only the hostless job to run, specify # testing_exceptions: test_suite in the shadow_config. To allow both # the hostless job and dummy_Pass to run, specify # testing_exceptions: test_suite,dummy_Pass. You can figure out # what label autoserv is invoked with by looking through the logs of a test # for the autoserv command's -l option. testing_exceptions = _CONFIG.get_config_value('AUTOSERV', 'testing_exceptions', type=list, default=[]) test_mode = _CONFIG.get_config_value('AUTOSERV', 'testing_mode', type=bool, default=False) test_mode = ( results_mocker and test_mode and not any([ex in parser.options.label for ex in testing_exceptions])) is_task = (parser.options.verify or parser.options.repair or parser.options.provision or parser.options.reset or parser.options.cleanup or parser.options.collect_crashinfo) trace_labels = { 'job_id': job_directories.get_job_id_or_task_id(parser.options.results) } trace = cloud_trace.SpanStack( labels=trace_labels, global_context=parser.options.cloud_trace_context) trace.enabled = parser.options.cloud_trace_context_enabled == 'True' try: try: if test_mode: # The parser doesn't run on tasks anyway, so we can just return # happy signals without faking results. if not is_task: machine = parser.options.results.split('/')[-1] # TODO(beeps): The proper way to do this would be to # refactor job creation so we can invoke job.record # directly. To do that one needs to pipe the test_name # through run_autoserv and bail just before invoking # the server job. See the comment in # puppylab/results_mocker for more context. results_mocker.ResultsMocker('unknown-test', parser.options.results, machine).mock_results() return else: with trace.Span(get_job_status(parser.options)): run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp) except SystemExit as e: exit_code = e.code if exit_code: logging.exception('Uncaught SystemExit with code %s', exit_code) except Exception: # If we don't know what happened, we'll classify it as # an 'abort' and return 1. logging.exception('Uncaught Exception, exit_code = 1.') exit_code = 1 finally: if pid_file_manager: pid_file_manager.close_file(exit_code) sys.exit(exit_code)
def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp): """Run server job with given options. @param pid_file_manager: PidFileManager used to monitor the autoserv process @param results: Folder to store results. @param parser: Parser for the command line arguments. @param ssp_url: Url to server-side package. @param use_ssp: Set to True to run with server-side packaging. """ # send stdin to /dev/null dev_null = os.open(os.devnull, os.O_RDONLY) os.dup2(dev_null, sys.stdin.fileno()) os.close(dev_null) # Create separate process group if the process is not a process group # leader. This allows autoserv process to keep running after the caller # process (drone manager call) exits. if os.getpid() != os.getpgid(0): os.setsid() # Container name is predefined so the container can be destroyed in # handle_sigterm. job_or_task_id = job_directories.get_job_id_or_task_id( parser.options.results) container_id = lxc.ContainerId(job_or_task_id, time.time(), os.getpid()) # Implement SIGTERM handler def handle_sigterm(signum, frame): logging.debug('Received SIGTERM') if pid_file_manager: pid_file_manager.close_file(1, signal.SIGTERM) logging.debug('Finished writing to pid_file. Killing process.') # Update results folder's file permission. This needs to be done ASAP # before the parsing process tries to access the log. if use_ssp and results: correct_results_folder_permission(results) # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved. # This sleep allows the pending output to be logged before the kill # signal is sent. time.sleep(.1) if use_ssp: logging.debug( 'Destroy container %s before aborting the autoserv ' 'process.', container_id) try: bucket = lxc.ContainerBucket() container = bucket.get_container(container_id) if container: container.destroy() else: logging.debug('Container %s is not found.', container_id) except: # Handle any exception so the autoserv process can be aborted. logging.exception('Failed to destroy container %s.', container_id) # Try to correct the result file permission again after the # container is destroyed, as the container might have created some # new files in the result folder. if results: correct_results_folder_permission(results) os.killpg(os.getpgrp(), signal.SIGKILL) # Set signal handler signal.signal(signal.SIGTERM, handle_sigterm) # faulthandler is only needed to debug in the Lab and is not avaliable to # be imported in the chroot as part of VMTest, so Try-Except it. try: import faulthandler faulthandler.register(signal.SIGTERM, all_threads=True, chain=True) logging.debug('faulthandler registered on SIGTERM.') except ImportError: sys.exc_clear() # Ignore SIGTTOU's generated by output from forked children. signal.signal(signal.SIGTTOU, signal.SIG_IGN) # If we received a SIGALARM, let's be loud about it. signal.signal(signal.SIGALRM, log_alarm) # Server side tests that call shell scripts often depend on $USER being set # but depending on how you launch your autotest scheduler it may not be set. os.environ['USER'] = getpass.getuser() label = parser.options.label group_name = parser.options.group_name user = parser.options.user client = parser.options.client server = parser.options.server verify = parser.options.verify repair = parser.options.repair cleanup = parser.options.cleanup provision = parser.options.provision reset = parser.options.reset job_labels = parser.options.job_labels no_tee = parser.options.no_tee execution_tag = parser.options.execution_tag ssh_user = parser.options.ssh_user ssh_port = parser.options.ssh_port ssh_pass = parser.options.ssh_pass collect_crashinfo = parser.options.collect_crashinfo control_filename = parser.options.control_filename verify_job_repo_url = parser.options.verify_job_repo_url skip_crash_collection = parser.options.skip_crash_collection ssh_verbosity = int(parser.options.ssh_verbosity) ssh_options = parser.options.ssh_options no_use_packaging = parser.options.no_use_packaging in_lab = bool(parser.options.lab) # can't be both a client and a server side test if client and server: parser.parser.error( "Can not specify a test as both server and client!") if provision and client: parser.parser.error("Cannot specify provisioning and client!") is_special_task = (verify or repair or cleanup or collect_crashinfo or provision or reset) use_client_trampoline = False if parser.options.control_name: if use_ssp: # When use_ssp is True, autoserv will be re-executed inside a # container preserving the --control-name argument. Control file # will be staged inside the rexecuted autoserv. control = None else: try: control = _stage_control_file(parser.options.control_name, results) except error.AutoservError as e: logging.info("Using client trampoline because of: %s", e) control = parser.options.control_name use_client_trampoline = True elif parser.args: control = parser.args[0] else: if not is_special_task: parser.parser.error("Missing argument: control file") control = None if ssh_verbosity > 0: # ssh_verbosity is an integer between 0 and 3, inclusive ssh_verbosity_flag = '-' + 'v' * ssh_verbosity else: ssh_verbosity_flag = '' machines = _get_machines(parser) if group_name and len(machines) < 2: parser.parser.error('-G %r may only be supplied with more than one ' 'machine.' % group_name) job_kwargs = { 'control': control, 'args': parser.args[1:], 'resultdir': results, 'label': label, 'user': user, 'machines': machines, 'machine_dict_list': server_job.get_machine_dicts( machine_names=machines, store_dir=os.path.join(results, parser.options.host_info_subdir), in_lab=in_lab, use_shadow_store=not parser.options.local_only_host_info, host_attributes=parser.options.host_attributes, ), 'client': client, 'ssh_user': ssh_user, 'ssh_port': ssh_port, 'ssh_pass': ssh_pass, 'ssh_verbosity_flag': ssh_verbosity_flag, 'ssh_options': ssh_options, 'group_name': group_name, 'tag': execution_tag, 'disable_sysinfo': parser.options.disable_sysinfo, 'in_lab': in_lab, 'use_client_trampoline': use_client_trampoline, } if parser.options.parent_job_id: job_kwargs['parent_job_id'] = int(parser.options.parent_job_id) if control_filename: job_kwargs['control_filename'] = control_filename job = server_job.server_job(**job_kwargs) job.logging.start_logging() # perform checks job.precheck() # run the job exit_code = 0 auto_start_servod = _CONFIG.get_config_value('AUTOSERV', 'auto_start_servod', type=bool, default=False) site_utils.SetupTsMonGlobalState('autoserv', indirect=False, short_lived=True) try: try: if repair: if auto_start_servod and len(machines) == 1: _start_servod(machines[0]) job.repair(job_labels) elif verify: job.verify(job_labels) elif provision: job.provision(job_labels) elif reset: job.reset(job_labels) elif cleanup: job.cleanup(job_labels) else: if auto_start_servod and len(machines) == 1: _start_servod(machines[0]) if use_ssp: try: _run_with_ssp(job, container_id, job_or_task_id, results, parser, ssp_url, machines) finally: # Update the ownership of files in result folder. correct_results_folder_permission(results) else: if collect_crashinfo: # Update the ownership of files in result folder. If the # job to collect crashinfo was running inside container # (SSP) and crashed before correcting folder permission, # the result folder might have wrong permission setting. try: correct_results_folder_permission(results) except: # Ignore any error as the user may not have root # permission to run sudo command. pass metric_name = ('chromeos/autotest/experimental/' 'autoserv_job_run_duration') f = { 'in_container': utils.is_in_container(), 'success': False } with metrics.SecondsTimer(metric_name, fields=f) as c: job.run(verify_job_repo_url=verify_job_repo_url, only_collect_crashinfo=collect_crashinfo, skip_crash_collection=skip_crash_collection, job_labels=job_labels, use_packaging=(not no_use_packaging)) c['success'] = True finally: job.close() # Special task doesn't run parse, so result summary needs to be # built here. if results and (repair or verify or reset or cleanup or provision): # Throttle the result on the server side. try: result_utils.execute( results, control_data.DEFAULT_MAX_RESULT_SIZE_KB) except: logging.exception( 'Non-critical failure: Failed to throttle results ' 'in directory %s.', results) # Build result view and report metrics for result sizes. site_utils.collect_result_sizes(results) except: exit_code = 1 traceback.print_exc() finally: metrics.Flush() sys.exit(exit_code)
def run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp): """Run server job with given options. @param pid_file_manager: PidFileManager used to monitor the autoserv process @param results: Folder to store results. @param parser: Parser for the command line arguments. @param ssp_url: Url to server-side package. @param use_ssp: Set to True to run with server-side packaging. """ if parser.options.warn_no_ssp: # Post a warning in the log. logging.warn('Autoserv is required to run with server-side packaging. ' 'However, no drone is found to support server-side ' 'packaging. The test will be executed in a drone without ' 'server-side packaging supported.') # send stdin to /dev/null dev_null = os.open(os.devnull, os.O_RDONLY) os.dup2(dev_null, sys.stdin.fileno()) os.close(dev_null) # Create separate process group if the process is not a process group # leader. This allows autoserv process to keep running after the caller # process (drone manager call) exits. if os.getpid() != os.getpgid(0): os.setsid() # Container name is predefined so the container can be destroyed in # handle_sigterm. job_or_task_id = job_directories.get_job_id_or_task_id( parser.options.results) container_name = (lxc.TEST_CONTAINER_NAME_FMT % (job_or_task_id, time.time(), os.getpid())) job_folder = job_directories.get_job_folder_name(parser.options.results) # Implement SIGTERM handler def handle_sigterm(signum, frame): logging.debug('Received SIGTERM') if pid_file_manager: pid_file_manager.close_file(1, signal.SIGTERM) logging.debug('Finished writing to pid_file. Killing process.') # Update results folder's file permission. This needs to be done ASAP # before the parsing process tries to access the log. if use_ssp and results: correct_results_folder_permission(results) # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved. # This sleep allows the pending output to be logged before the kill # signal is sent. time.sleep(.1) if use_ssp: logging.debug( 'Destroy container %s before aborting the autoserv ' 'process.', container_name) metadata = { 'drone': socket.gethostname(), 'job_id': job_or_task_id, 'container_name': container_name, 'action': 'abort', 'success': True } try: bucket = lxc.ContainerBucket() container = bucket.get(container_name) if container: container.destroy() else: metadata['success'] = False metadata['error'] = 'container not found' logging.debug('Container %s is not found.', container_name) except: metadata['success'] = False metadata['error'] = 'Exception: %s' % str(sys.exc_info()) # Handle any exception so the autoserv process can be aborted. logging.exception('Failed to destroy container %s.', container_name) autotest_es.post(use_http=True, type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE, metadata=metadata) # Try to correct the result file permission again after the # container is destroyed, as the container might have created some # new files in the result folder. if results: correct_results_folder_permission(results) os.killpg(os.getpgrp(), signal.SIGKILL) # Set signal handler signal.signal(signal.SIGTERM, handle_sigterm) # faulthandler is only needed to debug in the Lab and is not avaliable to # be imported in the chroot as part of VMTest, so Try-Except it. try: import faulthandler faulthandler.register(signal.SIGTERM, all_threads=True, chain=True) logging.debug('faulthandler registered on SIGTERM.') except ImportError: sys.exc_clear() # Ignore SIGTTOU's generated by output from forked children. signal.signal(signal.SIGTTOU, signal.SIG_IGN) # If we received a SIGALARM, let's be loud about it. signal.signal(signal.SIGALRM, log_alarm) # Server side tests that call shell scripts often depend on $USER being set # but depending on how you launch your autotest scheduler it may not be set. os.environ['USER'] = getpass.getuser() label = parser.options.label group_name = parser.options.group_name user = parser.options.user client = parser.options.client server = parser.options.server install_before = parser.options.install_before install_after = parser.options.install_after verify = parser.options.verify repair = parser.options.repair cleanup = parser.options.cleanup provision = parser.options.provision reset = parser.options.reset job_labels = parser.options.job_labels no_tee = parser.options.no_tee parse_job = parser.options.parse_job execution_tag = parser.options.execution_tag if not execution_tag: execution_tag = parse_job ssh_user = parser.options.ssh_user ssh_port = parser.options.ssh_port ssh_pass = parser.options.ssh_pass collect_crashinfo = parser.options.collect_crashinfo control_filename = parser.options.control_filename test_retry = parser.options.test_retry verify_job_repo_url = parser.options.verify_job_repo_url skip_crash_collection = parser.options.skip_crash_collection ssh_verbosity = int(parser.options.ssh_verbosity) ssh_options = parser.options.ssh_options no_use_packaging = parser.options.no_use_packaging host_attributes = parser.options.host_attributes in_lab = bool(parser.options.lab) # can't be both a client and a server side test if client and server: parser.parser.error( "Can not specify a test as both server and client!") if provision and client: parser.parser.error("Cannot specify provisioning and client!") is_special_task = (verify or repair or cleanup or collect_crashinfo or provision or reset) if len(parser.args) < 1 and not is_special_task: parser.parser.error("Missing argument: control file") if ssh_verbosity > 0: # ssh_verbosity is an integer between 0 and 3, inclusive ssh_verbosity_flag = '-' + 'v' * ssh_verbosity else: ssh_verbosity_flag = '' # We have a control file unless it's just a verify/repair/cleanup job if len(parser.args) > 0: control = parser.args[0] else: control = None machines = _get_machines(parser) if group_name and len(machines) < 2: parser.parser.error('-G %r may only be supplied with more than one ' 'machine.' % group_name) kwargs = { 'group_name': group_name, 'tag': execution_tag, 'disable_sysinfo': parser.options.disable_sysinfo } if parser.options.parent_job_id: kwargs['parent_job_id'] = int(parser.options.parent_job_id) if control_filename: kwargs['control_filename'] = control_filename if host_attributes: kwargs['host_attributes'] = host_attributes kwargs['in_lab'] = in_lab job = server_job.server_job(control, parser.args[1:], results, label, user, machines, client, parse_job, ssh_user, ssh_port, ssh_pass, ssh_verbosity_flag, ssh_options, test_retry, **kwargs) job.logging.start_logging() job.init_parser() # perform checks job.precheck() # run the job exit_code = 0 auto_start_servod = _CONFIG.get_config_value('AUTOSERV', 'auto_start_servod', type=bool, default=False) site_utils.SetupTsMonGlobalState('autoserv', indirect=False, short_lived=True) try: try: if repair: if auto_start_servod and len(machines) == 1: _start_servod(machines[0]) job.repair(job_labels) elif verify: job.verify(job_labels) elif provision: job.provision(job_labels) elif reset: job.reset(job_labels) elif cleanup: job.cleanup(job_labels) else: if auto_start_servod and len(machines) == 1: _start_servod(machines[0]) if use_ssp: try: _run_with_ssp(job, container_name, job_or_task_id, results, parser, ssp_url, job_folder, machines) finally: # Update the ownership of files in result folder. correct_results_folder_permission(results) else: if collect_crashinfo: # Update the ownership of files in result folder. If the # job to collect crashinfo was running inside container # (SSP) and crashed before correcting folder permission, # the result folder might have wrong permission setting. try: correct_results_folder_permission(results) except: # Ignore any error as the user may not have root # permission to run sudo command. pass metric_name = ('chromeos/autotest/experimental/' 'autoserv_job_run_duration') f = { 'in_container': utils.is_in_container(), 'success': False } with metrics.SecondsTimer(metric_name, fields=f) as c: job.run(install_before, install_after, verify_job_repo_url=verify_job_repo_url, only_collect_crashinfo=collect_crashinfo, skip_crash_collection=skip_crash_collection, job_labels=job_labels, use_packaging=(not no_use_packaging)) c['success'] = True finally: while job.hosts: host = job.hosts.pop() host.close() except: exit_code = 1 traceback.print_exc() finally: metrics.Flush() if pid_file_manager: pid_file_manager.num_tests_failed = job.num_tests_failed pid_file_manager.close_file(exit_code) job.cleanup_parser() sys.exit(exit_code)
def id(self): """The id of the autotest job.""" return job_directories.get_job_id_or_task_id(self._job.dir)
def main(): start_time = datetime.datetime.now() # grab the parser parser = autoserv_parser.autoserv_parser parser.parse_args() if len(sys.argv) == 1: parser.parser.print_help() sys.exit(1) if parser.options.no_logging: results = None else: results = parser.options.results if not results: results = 'results.' + time.strftime('%Y-%m-%d-%H.%M.%S') results = os.path.abspath(results) resultdir_exists = False for filename in ('control.srv', 'status.log', '.autoserv_execute'): if os.path.exists(os.path.join(results, filename)): resultdir_exists = True if not parser.options.use_existing_results and resultdir_exists: error = "Error: results directory already exists: %s\n" % results sys.stderr.write(error) sys.exit(1) # Now that we certified that there's no leftover results dir from # previous jobs, lets create the result dir since the logging system # needs to create the log file in there. if not os.path.isdir(results): os.makedirs(results) # If the job requires to run with server-side package, try to stage server- # side package first. If that fails with error that autotest server package # does not exist, fall back to run the job without using server-side # packaging. If option warn_no_ssp is specified, that means autoserv is # running in a drone does not support SSP, thus no need to stage server-side # package. ssp_url = None ssp_url_warning = False if (not parser.options.warn_no_ssp and parser.options.require_ssp): ssp_url, ssp_error_msg = _stage_ssp(parser, results) # The build does not have autotest server package. Fall back to not # to use server-side package. Logging is postponed until logging being # set up. ssp_url_warning = not ssp_url # Server-side packaging will only be used if it's required and the package # is available. If warn_no_ssp is specified, it means that autoserv is # running in a drone does not have SSP supported and a warning will be logs. # Therefore, it should not run with SSP. use_ssp = (not parser.options.warn_no_ssp and parser.options.require_ssp and ssp_url) if use_ssp: log_dir = os.path.join(results, 'ssp_logs') if results else None if log_dir and not os.path.exists(log_dir): os.makedirs(log_dir) else: log_dir = results logging_manager.configure_logging( server_logging_config.ServerLoggingConfig(), results_dir=log_dir, use_console=not parser.options.no_tee, verbose=parser.options.verbose, no_console_prefix=parser.options.no_console_prefix) if ssp_url_warning: logging.warn( 'Autoserv is required to run with server-side packaging. ' 'However, no server-side package can be staged based on ' '`--image`, host attribute job_repo_url or host OS version ' 'label. It could be that the build to test is older than the ' 'minimum version that supports server-side packaging, or no ' 'devserver can be found to stage server-side package. The test ' 'will be executed without using erver-side packaging. ' 'Following is the detailed error:\n%s', ssp_error_msg) if results: logging.info("Results placed in %s" % results) # wait until now to perform this check, so it get properly logged if (parser.options.use_existing_results and not resultdir_exists and not utils.is_in_container()): logging.error("No existing results directory found: %s", results) sys.exit(1) logging.debug('autoserv is running in drone %s.', socket.gethostname()) logging.debug('autoserv command was: %s', ' '.join(sys.argv)) if parser.options.write_pidfile and results: pid_file_manager = pidfile.PidFileManager(parser.options.pidfile_label, results) pid_file_manager.open_file() else: pid_file_manager = None autotest.Autotest.set_install_in_tmpdir(parser.options.install_in_tmpdir) exit_code = 0 # TODO(beeps): Extend this to cover different failure modes. # Testing exceptions are matched against labels sent to autoserv. Eg, # to allow only the hostless job to run, specify # testing_exceptions: test_suite in the shadow_config. To allow both # the hostless job and dummy_Pass to run, specify # testing_exceptions: test_suite,dummy_Pass. You can figure out # what label autoserv is invoked with by looking through the logs of a test # for the autoserv command's -l option. testing_exceptions = _CONFIG.get_config_value('AUTOSERV', 'testing_exceptions', type=list, default=[]) test_mode = _CONFIG.get_config_value('AUTOSERV', 'testing_mode', type=bool, default=False) test_mode = ( results_mocker and test_mode and not any([ex in parser.options.label for ex in testing_exceptions])) is_task = (parser.options.verify or parser.options.repair or parser.options.provision or parser.options.reset or parser.options.cleanup or parser.options.collect_crashinfo) trace_labels = { 'job_id': job_directories.get_job_id_or_task_id(parser.options.results) } trace = cloud_trace.SpanStack( labels=trace_labels, global_context=parser.options.cloud_trace_context) trace.enabled = parser.options.cloud_trace_context_enabled == 'True' try: try: if test_mode: # The parser doesn't run on tasks anyway, so we can just return # happy signals without faking results. if not is_task: machine = parser.options.results.split('/')[-1] # TODO(beeps): The proper way to do this would be to # refactor job creation so we can invoke job.record # directly. To do that one needs to pipe the test_name # through run_autoserv and bail just before invoking # the server job. See the comment in # puppylab/results_mocker for more context. results_mocker.ResultsMocker('unknown-test', parser.options.results, machine).mock_results() return else: with trace.Span(get_job_status(parser.options)): run_autoserv(pid_file_manager, results, parser, ssp_url, use_ssp) except SystemExit as e: exit_code = e.code if exit_code: logging.exception('Uncaught SystemExit with code %s', exit_code) except Exception: # If we don't know what happened, we'll classify it as # an 'abort' and return 1. logging.exception('Uncaught Exception, exit_code = 1.') exit_code = 1 finally: if pid_file_manager: pid_file_manager.close_file(exit_code) # Record the autoserv duration time. Must be called # just before the system exits to ensure accuracy. record_autoserv(parser.options, start_time) sys.exit(exit_code)