Beispiel #1
0
def record_state_duration(
        job_or_task_id, hostname, status, duration_secs,
        type_str=DEFAULT_KEY, is_special_task=False):
    """Record state duration for a job or a task.

    @param job_or_task_id: Integer, representing a job id or a special task id.
    @param hostname: String, representing a hostname.
    @param status: One of the enum values of job_overhead.STATUS.
    @param duration_secs: Duration of the job/task in secs.
    @param is_special_task: True/Fals, whether this is a special task.
    @param type_str: The elastic search type string to be used when sending data
                     to metadata db.
    """
    if not job_or_task_id or not hostname or not status:
        logging.error(
                'record_state_duration failed: job_or_task_id=%s, '
                'hostname=%s, status=%s', job_or_task_id, hostname, status)
        return
    id_str = 'task_id' if is_special_task else 'job_id'
    metadata = {
            id_str: int(job_or_task_id),
            'hostname': hostname,
            'status': status,
            'duration': duration_secs}
    autotest_es.post(type_str=type_str, metadata=metadata)
def correct_results_folder_permission(results):
    """Make sure the results folder has the right permission settings.

    For tests running with server-side packaging, the results folder has the
    owner of root. This must be changed to the user running the autoserv
    process, so parsing job can access the results folder.
    TODO(dshi): crbug.com/459344 Remove this function when test container can be
    unprivileged container.

    @param results: Path to the results folder.

    """
    if not results:
        return

    try:
        utils.run('sudo -n chown -R %s "%s"' % (os.getuid(), results))
        utils.run('sudo -n chgrp -R %s "%s"' % (os.getgid(), results))
    except error.CmdError as e:
        metadata = {
            'error': str(e),
            'result_folder': results,
            'drone': socket.gethostname()
        }
        autotest_es.post(use_http=True,
                         type_str='correct_results_folder_failure',
                         metadata=metadata)
        raise
    def create_from_base(self, name, disable_snapshot_clone=False,
                         force_cleanup=False):
        """Create a container from the base container.

        @param name: Name of the container.
        @param disable_snapshot_clone: Set to True to force to clone without
                using snapshot clone even if the host supports that.
        @param force_cleanup: Force to cleanup existing container.

        @return: A Container object for the created container.

        @raise ContainerError: If the container already exist.
        @raise error.CmdError: If lxc-clone call failed for any reason.
        """
        if self.exist(name) and not force_cleanup:
            raise error.ContainerError('Container %s already exists.' % name)

        # Cleanup existing container with the given name.
        container_folder = os.path.join(self.container_path, name)
        if lxc_utils.path_exists(container_folder) and force_cleanup:
            container = Container(self.container_path, {'name': name})
            try:
                container.destroy()
            except error.CmdError as e:
                # The container could be created in a incompleted state. Delete
                # the container folder instead.
                logging.warn('Failed to destroy container %s, error: %s',
                             name, e)
                utils.run('sudo rm -rf "%s"' % container_folder)

        use_snapshot = SUPPORT_SNAPSHOT_CLONE and not disable_snapshot_clone
        snapshot = '-s' if  use_snapshot else ''
        # overlayfs is the default clone backend storage. However it is not
        # supported in Ganeti yet. Use aufs as the alternative.
        aufs = '-B aufs' if utils.is_vm() and use_snapshot else ''
        cmd = ('sudo lxc-clone -p %s -P %s %s' %
               (self.container_path, self.container_path,
                ' '.join([BASE, name, snapshot, aufs])))
        try:
            utils.run(cmd)
            return self.get(name)
        except error.CmdError:
            if not use_snapshot:
                raise
            else:
                # Snapshot clone failed, retry clone without snapshot. The retry
                # won't hit the code here and cause an infinite loop as
                # disable_snapshot_clone is set to True.
                container = self.create_from_base(
                        name, disable_snapshot_clone=True, force_cleanup=True)
                # Report metadata about retry success.
                autotest_es.post(use_http=True,
                                 type_str=CONTAINER_CREATE_RETRY_METADB_TYPE,
                                 metadata={'drone': socket.gethostname(),
                                           'name': name,
                                           'success': True})
                return container
Beispiel #4
0
def delete(board):
    """Delete stable version record for the given board.

    @param board: Name of the board.
    """
    stable_version = models.StableVersion.objects.get(board=board)
    stable_version.delete()
    autotest_es.post(type_str=_STABLE_VERSION_TYPE,
                     metadata={
                         'board': board,
                         'version': get()
                     })
    def handle_sigterm(signum, frame):
        logging.debug('Received SIGTERM')
        if pid_file_manager:
            pid_file_manager.close_file(1, signal.SIGTERM)
        logging.debug('Finished writing to pid_file. Killing process.')

        # Update results folder's file permission. This needs to be done ASAP
        # before the parsing process tries to access the log.
        if use_ssp and results:
            correct_results_folder_permission(results)

        # TODO (sbasi) - remove the time.sleep when crbug.com/302815 is solved.
        # This sleep allows the pending output to be logged before the kill
        # signal is sent.
        time.sleep(.1)
        if use_ssp:
            logging.debug(
                'Destroy container %s before aborting the autoserv '
                'process.', container_name)
            metadata = {
                'drone': socket.gethostname(),
                'job_id': job_or_task_id,
                'container_name': container_name,
                'action': 'abort',
                'success': True
            }
            try:
                bucket = lxc.ContainerBucket()
                container = bucket.get(container_name)
                if container:
                    container.destroy()
                else:
                    metadata['success'] = False
                    metadata['error'] = 'container not found'
                    logging.debug('Container %s is not found.', container_name)
            except:
                metadata['success'] = False
                metadata['error'] = 'Exception: %s' % str(sys.exc_info())
                # Handle any exception so the autoserv process can be aborted.
                logging.exception('Failed to destroy container %s.',
                                  container_name)
            autotest_es.post(use_http=True,
                             type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
                             metadata=metadata)
            # Try to correct the result file permission again after the
            # container is destroyed, as the container might have created some
            # new files in the result folder.
            if results:
                correct_results_folder_permission(results)

        os.killpg(os.getpgrp(), signal.SIGKILL)
Beispiel #6
0
        def func_cleanup_if_fail(*args, **kwargs):
            """Decorator to do cleanup if container fails to be set up.

            The first argument must be a ContainerBucket object, which can be
            used to retrieve the container object by name.

            @param func: function to be called.
            @param args: arguments for function to be called.
            @param kwargs: keyword arguments for function to be called.
            """
            bucket = args[0]
            name = utils.get_function_arg_value(func, 'name', args, kwargs)
            try:
                skip_cleanup = utils.get_function_arg_value(
                    func, 'skip_cleanup', args, kwargs)
            except (KeyError, ValueError):
                skip_cleanup = False
            try:
                return func(*args, **kwargs)
            except:
                exc_info = sys.exc_info()
                try:
                    container = bucket.get(name)
                    if container and not skip_cleanup:
                        container.destroy()
                except error.CmdError as e:
                    logging.error(e)

                try:
                    job_id = utils.get_function_arg_value(
                        func, 'job_id', args, kwargs)
                except (KeyError, ValueError):
                    job_id = ''
                metadata = {
                    'drone': socket.gethostname(),
                    'job_id': job_id,
                    'success': False
                }
                # Record all args if job_id is not available.
                if not job_id:
                    metadata['args'] = str(args)
                    if kwargs:
                        metadata.update(kwargs)
                autotest_es.post(use_http=True,
                                 type_str=CONTAINER_CREATE_METADB_TYPE,
                                 metadata=metadata)

                # Raise the cached exception with original backtrace.
                raise exc_info[0], exc_info[1], exc_info[2]
Beispiel #7
0
def set(version, board=DEFAULT):
    """Set stable version for the given board.

    @param version: The new value of stable version for given board.
    @param board: Name of the board, default to value `DEFAULT`.
    """
    try:
        stable_version = models.StableVersion.objects.get(board=board)
        stable_version.version = version
        stable_version.save()
    except django.core.exceptions.ObjectDoesNotExist:
        models.StableVersion.objects.create(board=board, version=version)
    autotest_es.post(type_str=_STABLE_VERSION_TYPE,
                     metadata={
                         'board': board,
                         'version': version
                     })
    def schedule_synchronized_reboot(self, dut_list, afe, force_reboot=False):
        """Schedule a job to reboot the servo host.

        When we schedule a job, it will create a ServoHost object which will
        go through this entire flow of checking if a reboot is needed and
        trying to schedule it.  There is probably a better approach to setting
        up a synchronized reboot but I'm coming up short on better ideas so I
        apologize for this circus show.

        @param dut_list:      List of duts that need to be locked.
        @param afe:           Instance of afe.
        @param force_reboot:  Boolean to indicate if a forced reboot should be
                              scheduled or not.
        """
        # If we've already scheduled job on a dut, we're done here.
        if self._sync_job_scheduled_for_duts(dut_list, afe):
            return

        # Looks like we haven't scheduled a job yet.
        test = (_SERVO_HOST_REBOOT_TEST_NAME
                if not force_reboot else _SERVO_HOST_FORCE_REBOOT_TEST_NAME)
        dut = self._choose_dut_for_synchronized_reboot(dut_list, afe)
        getter = control_file_getter.FileSystemGetter([AUTOTEST_BASE])
        control_file = getter.get_control_file_contents_by_name(test)
        control_type = control_data.CONTROL_TYPE_NAMES.SERVER
        try:
            afe.create_job(control_file=control_file,
                           name=test,
                           control_type=control_type,
                           hosts=[dut])
        except Exception as e:
            # Sometimes creating the job will raise an exception. We'll log it
            # but we don't want to fail because of it.
            logging.exception('Scheduling reboot job failed: %s', e)
            metadata = {
                'dut': dut,
                'servo_host': self.hostname,
                'error': str(e),
                'details': traceback.format_exc()
            }
            # We want to track how often we fail here so we can justify
            # investing some effort into hardening up afe.create_job().
            autotest_es.post(use_http=True,
                             type_str='servohost_Reboot_schedule_fail',
                             metadata=metadata)
    def record_state(self, type_str, state, value):
        """Record metadata in elasticsearch.

        If ES configured to use http, then we will time that http request.
        Otherwise, it uses UDP, so we will not need to time it.

        @param type_str: sets the _type field in elasticsearch db.
        @param state: string representing what state we are recording,
                      e.g. 'status'
        @param value: value of the state, e.g. 'verifying'
        """
        metadata = {
            'time_changed': time.time(),
             state: value,
            'job_id': self.job_id,
        }
        if self.host:
            metadata['hostname'] = self.host.hostname
        autotest_es.post(type_str=type_str, metadata=metadata)
Beispiel #10
0
def collect_info():
    """Collect label info and report to metaDB.
    """
    # time_index is to index all host labels collected together. It's
    # converted to int to make search faster.
    time_index = int(time.time())
    hosts = models.Host.objects.filter(invalid=False)
    data_list = []
    for host in hosts:
        info = {'_type': _HOST_LABEL_TYPE,
                'hostname': host.hostname,
                'labels': [label.name for label in host.labels.all()],
                'time_index': time_index}
        data_list.append(info)
    if not autotest_es.bulk_post(data_list, log_time_recorded=False):
        raise Exception('Failed to upload host label info.')

    # After all host label information is logged, save the time stamp.
    autotest_es.post(use_http=True, type_str=_HOST_LABEL_TIME_INDEX_TYPE,
                     metadata={'time_index': time_index},
                     log_time_recorded=False)
    logging.info('Finished collecting host labels for %d hosts.', len(hosts))
Beispiel #11
0
def record_suite_runtime(suite_job_id, suite_name, board, build, num_child_jobs,
                         runtime_in_secs):
    """Record suite runtime.

    @param suite_job_id: The job id of the suite for which we are going to
                         collect stats.
    @param suite_name: The suite name, e.g. 'bvt', 'dummy'.
    @param board: The target board for which the suite is run,
                  e.g., 'lumpy', 'link'.
    @param build: The build for which the suite is run,
                  e.g. 'lumpy-release/R35-5712.0.0'.
    @param num_child_jobs: Total number of child jobs of the suite.
    @param runtime_in_secs: Duration of the suite from the start to the end.
    """
    metadata = {
            'suite_job_id': suite_job_id,
            'suite_name': suite_name,
            'board': board,
            'build': build,
            'num_child_jobs': num_child_jobs,
            'duration': runtime_in_secs}
    autotest_es.post(type_str=SUITE_RUNTIME_KEY, metadata=metadata)
def _run_with_ssp(job, container_name, job_id, results, parser, ssp_url,
                  job_folder, machines):
    """Run the server job with server-side packaging.

    @param job: The server job object.
    @param container_name: Name of the container to run the test.
    @param job_id: ID of the test job.
    @param results: Folder to store results. This could be different from
                    parser.options.results:
                    parser.options.results  can be set to None for results to be
                    stored in a temp folder.
                    results can be None for autoserv run requires no logging.
    @param parser: Command line parser that contains the options.
    @param ssp_url: url of the staged server-side package.
    @param job_folder: Name of the job result folder.
    @param machines: A list of machines to run the test.
    """
    bucket = lxc.ContainerBucket()
    control = (parser.args[0]
               if len(parser.args) > 0 and parser.args[0] != '' else None)
    try:
        dut_name = machines[0] if len(machines) >= 1 else None
        test_container = bucket.setup_test(container_name,
                                           job_id,
                                           ssp_url,
                                           results,
                                           control=control,
                                           job_folder=job_folder,
                                           dut_name=dut_name)
    except Exception as e:
        job.record(
            'FAIL', None, None,
            'Failed to setup container for test: %s. Check logs in '
            'ssp_logs folder for more details.' % e)
        raise

    args = sys.argv[:]
    args.remove('--require-ssp')
    # --parent_job_id is only useful in autoserv running in host, not in
    # container. Include this argument will cause test to fail for builds before
    # CL 286265 was merged.
    if '--parent_job_id' in args:
        index = args.index('--parent_job_id')
        args.remove('--parent_job_id')
        # Remove the actual parent job id in command line arg.
        del args[index]

    # A dictionary of paths to replace in the command line. Key is the path to
    # be replaced with the one in value.
    paths_to_replace = {}
    # Replace the control file path with the one in container.
    if control:
        container_control_filename = os.path.join(lxc.CONTROL_TEMP_PATH,
                                                  os.path.basename(control))
        paths_to_replace[control] = container_control_filename
    # Update result directory with the one in container.
    container_result_dir = os.path.join(lxc.RESULT_DIR_FMT % job_folder)
    if parser.options.results:
        paths_to_replace[parser.options.results] = container_result_dir
    # Update parse_job directory with the one in container. The assumption is
    # that the result folder to be parsed is always the same as the results_dir.
    if parser.options.parse_job:
        paths_to_replace[parser.options.parse_job] = container_result_dir

    args = [paths_to_replace.get(arg, arg) for arg in args]

    # Apply --use-existing-results, results directory is aready created and
    # mounted in container. Apply this arg to avoid exception being raised.
    if not '--use-existing-results' in args:
        args.append('--use-existing-results')

    # Make sure autoserv running in container using a different pid file.
    if not '--pidfile-label' in args:
        args.extend(['--pidfile-label', 'container_autoserv'])

    cmd_line = ' '.join(["'%s'" % arg if ' ' in arg else arg for arg in args])
    logging.info('Run command in container: %s', cmd_line)
    success = False
    try:
        test_container.attach_run(cmd_line)
        success = True
    except Exception as e:
        # If the test run inside container fails without generating any log,
        # write a message to status.log to help troubleshooting.
        debug_files = os.listdir(os.path.join(results, 'debug'))
        if not debug_files:
            job.record(
                'FAIL', None, None,
                'Failed to run test inside the container: %s. Check '
                'logs in ssp_logs folder for more details.' % e)
        raise
    finally:
        metrics.Counter('chromeos/autotest/experimental/execute_job_in_ssp'
                        ).increment(fields={'success': success})
        # metadata is uploaded separately so it can use http to upload.
        metadata = {
            'drone': socket.gethostname(),
            'job_id': job_id,
            'success': success
        }
        autotest_es.post(use_http=True,
                         type_str=lxc.CONTAINER_RUN_TEST_METADB_TYPE,
                         metadata=metadata)
        test_container.destroy()
def main():
    """Main entrance."""
    start_time = datetime.datetime.now()
    # Record the processed jobs so that
    # we can send the duration of parsing to metadata db.
    processed_jobs = set()

    options, args = parse_args()
    parse_options = _ParseOptions(options.reparse, options.mailit,
                                  options.dry_run, options.suite_report,
                                  options.datastore_creds,
                                  options.export_to_gcloud_path)
    results_dir = os.path.abspath(args[0])
    assert os.path.exists(results_dir)

    pid_file_manager = pidfile.PidFileManager("parser", results_dir)

    if options.write_pidfile:
        pid_file_manager.open_file()

    try:
        # build up the list of job dirs to parse
        if options.singledir:
            jobs_list = [results_dir]
        else:
            jobs_list = [
                os.path.join(results_dir, subdir)
                for subdir in os.listdir(results_dir)
            ]

        # build up the database
        db = tko_db.db(autocommit=False,
                       host=options.db_host,
                       user=options.db_user,
                       password=options.db_pass,
                       database=options.db_name)

        # parse all the jobs
        for path in jobs_list:
            lockfile = open(os.path.join(path, ".parse.lock"), "w")
            flags = fcntl.LOCK_EX
            if options.noblock:
                flags |= fcntl.LOCK_NB
            try:
                fcntl.flock(lockfile, flags)
            except IOError, e:
                # lock is not available and nonblock has been requested
                if e.errno == errno.EWOULDBLOCK:
                    lockfile.close()
                    continue
                else:
                    raise  # something unexpected happened
            try:
                new_jobs = parse_path(db, path, options.level, parse_options)
                processed_jobs.update(new_jobs)

            finally:
                fcntl.flock(lockfile, fcntl.LOCK_UN)
                lockfile.close()

    except Exception as e:
        pid_file_manager.close_file(1)

        metadata = {
            'results_dir': results_dir,
            'error': str(e),
            'details': traceback.format_exc()
        }
        autotest_es.post(use_http=True,
                         type_str='parse_failure_final',
                         metadata=metadata)

        raise
    else:
        pid_file_manager.close_file(0)
    duration_secs = (datetime.datetime.now() - start_time).total_seconds()
    if options.record_duration:
        record_parsing(processed_jobs, duration_secs)
def parse_one(db, jobname, path, parse_options):
    """Parse a single job. Optionally send email on failure.

    @param db: database object.
    @param jobname: the tag used to search for existing job in db,
                    e.g. '1234-chromeos-test/host1'
    @param path: The path to the results to be parsed.
    @param parse_options: _ParseOptions instance.
    """
    reparse = parse_options.reparse
    mail_on_failure = parse_options.mail_on_failure
    dry_run = parse_options.dry_run
    suite_report = parse_options.suite_report
    datastore_creds = parse_options.datastore_creds
    export_to_gcloud_path = parse_options.export_to_gcloud_path

    tko_utils.dprint("\nScanning %s (%s)" % (jobname, path))
    old_job_idx = db.find_job(jobname)
    # old tests is a dict from tuple (test_name, subdir) to test_idx
    old_tests = {}
    if old_job_idx is not None:
        if not reparse:
            tko_utils.dprint("! Job is already parsed, done")
            return

        raw_old_tests = db.select("test_idx,subdir,test", "tko_tests",
                                  {"job_idx": old_job_idx})
        if raw_old_tests:
            old_tests = dict(((test, subdir), test_idx)
                             for test_idx, subdir, test in raw_old_tests)

    # look up the status version
    job_keyval = models.job.read_keyval(path)
    status_version = job_keyval.get("status_version", 0)

    # parse out the job
    parser = parser_lib.parser(status_version)
    job = parser.make_job(path)
    status_log = os.path.join(path, "status.log")
    if not os.path.exists(status_log):
        status_log = os.path.join(path, "status")
    if not os.path.exists(status_log):
        tko_utils.dprint("! Unable to parse job, no status file")
        return

    # parse the status logs
    tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname))
    status_lines = open(status_log).readlines()
    parser.start(job)
    tests = parser.end(status_lines)

    # parser.end can return the same object multiple times, so filter out dups
    job.tests = []
    already_added = set()
    for test in tests:
        if test not in already_added:
            already_added.add(test)
            job.tests.append(test)

    # try and port test_idx over from the old tests, but if old tests stop
    # matching up with new ones just give up
    if reparse and old_job_idx is not None:
        job.index = old_job_idx
        for test in job.tests:
            test_idx = old_tests.pop((test.testname, test.subdir), None)
            if test_idx is not None:
                test.test_idx = test_idx
            else:
                tko_utils.dprint("! Reparse returned new test "
                                 "testname=%r subdir=%r" %
                                 (test.testname, test.subdir))
        if not dry_run:
            for test_idx in old_tests.itervalues():
                where = {'test_idx': test_idx}
                db.delete('tko_iteration_result', where)
                db.delete('tko_iteration_perf_value', where)
                db.delete('tko_iteration_attributes', where)
                db.delete('tko_test_attributes', where)
                db.delete('tko_test_labels_tests', {'test_id': test_idx})
                db.delete('tko_tests', where)

    job.build = None
    job.board = None
    job.build_version = None
    job.suite = None
    if job.label:
        label_info = site_utils.parse_job_name(job.label)
        if label_info:
            job.build = label_info.get('build', None)
            job.build_version = label_info.get('build_version', None)
            job.board = label_info.get('board', None)
            job.suite = label_info.get('suite', None)

    # Upload job details to Sponge.
    if not dry_run:
        sponge_url = sponge_utils.upload_results(job, log=tko_utils.dprint)
        if sponge_url:
            job.keyval_dict['sponge_url'] = sponge_url

    # check for failures
    message_lines = [""]
    job_successful = True
    for test in job.tests:
        if not test.subdir:
            continue
        tko_utils.dprint("* testname, status, reason: %s %s %s" %
                         (test.subdir, test.status, test.reason))
        if test.status != 'GOOD':
            job_successful = False
            message_lines.append(
                format_failure_message(jobname, test.kernel.base, test.subdir,
                                       test.status, test.reason))
    try:
        message = "\n".join(message_lines)

        if not dry_run:
            # send out a email report of failure
            if len(message) > 2 and mail_on_failure:
                tko_utils.dprint(
                    "Sending email report of failure on %s to %s" %
                    (jobname, job.user))
                mailfailure(jobname, job, message)

            # write the job into the database.
            job_data = db.insert_job(jobname,
                                     job,
                                     parent_job_id=job_keyval.get(
                                         constants.PARENT_JOB_ID, None))

            # Upload perf values to the perf dashboard, if applicable.
            for test in job.tests:
                perf_uploader.upload_test(job, test, jobname)

            # Although the cursor has autocommit, we still need to force it to
            # commit existing changes before we can use django models, otherwise
            # it will go into deadlock when django models try to start a new
            # trasaction while the current one has not finished yet.
            db.commit()

            # Handle retry job.
            orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID,
                                             None)
            if orig_afe_job_id:
                orig_job_idx = tko_models.Job.objects.get(
                    afe_job_id=orig_afe_job_id).job_idx
                _invalidate_original_tests(orig_job_idx, job.index)
    except Exception as e:
        metadata = {
            'path': path,
            'error': str(e),
            'details': traceback.format_exc()
        }
        tko_utils.dprint("Hit exception while uploading to tko db:\n%s" %
                         traceback.format_exc())
        autotest_es.post(use_http=True,
                         type_str='parse_failure',
                         metadata=metadata)
        raise e

    # Serializing job into a binary file
    try:
        from autotest_lib.tko import tko_pb2
        from autotest_lib.tko import job_serializer

        serializer = job_serializer.JobSerializer()
        binary_file_name = os.path.join(path, "job.serialize")
        serializer.serialize_to_binary(job, jobname, binary_file_name)

        if reparse:
            site_export_file = "autotest_lib.tko.site_export"
            site_export = utils.import_site_function(__file__,
                                                     site_export_file,
                                                     "site_export",
                                                     _site_export_dummy)
            site_export(binary_file_name)

    except ImportError:
        tko_utils.dprint("DEBUG: tko_pb2.py doesn't exist. Create by "
                         "compiling tko/tko.proto.")

    if not dry_run:
        db.commit()

    # Generate a suite report.
    # Check whether this is a suite job, a suite job will be a hostless job, its
    # jobname will be <JOB_ID>-<USERNAME>/hostless, the suite field will not be
    # NULL. Only generate timeline report when datastore_parent_key is given.
    try:
        datastore_parent_key = job_keyval.get('datastore_parent_key', None)
        if (suite_report and jobname.endswith('/hostless')
                and job_data['suite'] and datastore_parent_key):
            tko_utils.dprint('Start dumping suite timing report...')
            timing_log = os.path.join(path, 'suite_timing.log')
            dump_cmd = (
                "%s/site_utils/dump_suite_report.py %s "
                "--output='%s' --debug" %
                (common.autotest_dir, job_data['afe_job_id'], timing_log))
            subprocess.check_output(dump_cmd, shell=True)
            tko_utils.dprint('Successfully finish dumping suite timing report')

            if (datastore_creds and export_to_gcloud_path
                    and os.path.exists(export_to_gcloud_path)):
                upload_cmd = [
                    export_to_gcloud_path, datastore_creds, timing_log,
                    '--parent_key',
                    repr(tuple(datastore_parent_key))
                ]
                tko_utils.dprint('Start exporting timeline report to gcloud')
                subprocess.check_output(upload_cmd)
                tko_utils.dprint('Successfully export timeline report to '
                                 'gcloud')
            else:
                tko_utils.dprint('DEBUG: skip exporting suite timeline to '
                                 'gcloud, because either gcloud creds or '
                                 'export_to_gcloud script is not found.')
    except Exception as e:
        tko_utils.dprint("WARNING: fail to dump/export suite report. "
                         "Error:\n%s" % e)

    # Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of
    # the function, so any failure, e.g., db connection error, will stop
    # gs_offloader_instructions being updated, and logs can be uploaded for
    # troubleshooting.
    if job_successful:
        # Check if we should not offload this test's results.
        if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False):
            # Update the gs_offloader_instructions json file.
            gs_instructions_file = os.path.join(
                path, constants.GS_OFFLOADER_INSTRUCTIONS)
            gs_offloader_instructions = {}
            if os.path.exists(gs_instructions_file):
                with open(gs_instructions_file, 'r') as f:
                    gs_offloader_instructions = json.load(f)

            gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True
            with open(gs_instructions_file, 'w') as f:
                json.dump(gs_offloader_instructions, f)
Beispiel #15
0
    def setup_test(self,
                   name,
                   job_id,
                   server_package_url,
                   result_path,
                   control=None,
                   skip_cleanup=False,
                   job_folder=None,
                   dut_name=None):
        """Setup test container for the test job to run.

        The setup includes:
        1. Install autotest_server package from given url.
        2. Copy over local shadow_config.ini.
        3. Mount local site-packages.
        4. Mount test result directory.

        TODO(dshi): Setup also needs to include test control file for autoserv
                    to run in container.

        @param name: Name of the container.
        @param job_id: Job id for the test job to run in the test container.
        @param server_package_url: Url to download autotest_server package.
        @param result_path: Directory to be mounted to container to store test
                            results.
        @param control: Path to the control file to run the test job. Default is
                        set to None.
        @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
                             container failures.
        @param job_folder: Folder name of the job, e.g., 123-debug_user.
        @param dut_name: Name of the dut to run test, used as the hostname of
                         the container. Default is None.
        @return: A Container object for the test container.

        @raise ContainerError: If container does not exist, or not running.
        """
        start_time = time.time()

        if not os.path.exists(result_path):
            raise error.ContainerError('Result directory does not exist: %s',
                                       result_path)
        result_path = os.path.abspath(result_path)

        # Save control file to result_path temporarily. The reason is that the
        # control file in drone_tmp folder can be deleted during scheduler
        # restart. For test not using SSP, the window between test starts and
        # control file being picked up by the test is very small (< 2 seconds).
        # However, for tests using SSP, it takes around 1 minute before the
        # container is setup. If scheduler is restarted during that period, the
        # control file will be deleted, and the test will fail.
        if control:
            control_file_name = os.path.basename(control)
            safe_control = os.path.join(result_path, control_file_name)
            utils.run('cp %s %s' % (control, safe_control))

        # Create test container from the base container.
        container = self.create_from_base(name)

        # Update the hostname of the test container to be `dut_name`.
        # Some TradeFed tests use hostname in test results, which is used to
        # group test results in dashboard. The default container name is set to
        # be the name of the folder, which is unique (as it is composed of job
        # id and timestamp. For better result view, the container's hostname is
        # set to be a string containing the dut hostname.
        if dut_name:
            config_file = os.path.join(container.container_path, name,
                                       'config')
            lxc_utsname_setting = (
                'lxc.utsname = ' +
                CONTAINER_UTSNAME_FORMAT % dut_name.replace('.', '_'))
            utils.run(APPEND_CMD_FMT % {
                'content': lxc_utsname_setting,
                'file': config_file
            })

        # Deploy server side package
        usr_local_path = os.path.join(container.rootfs, 'usr', 'local')
        autotest_pkg_path = os.path.join(usr_local_path,
                                         'autotest_server_package.tar.bz2')
        autotest_path = os.path.join(usr_local_path, 'autotest')
        # sudo is required so os.makedirs may not work.
        utils.run('sudo mkdir -p %s' % usr_local_path)

        download_extract(server_package_url, autotest_pkg_path, usr_local_path)
        deploy_config_manager = lxc_config.DeployConfigManager(container)
        deploy_config_manager.deploy_pre_start()

        # Copy over control file to run the test job.
        if control:
            container_drone_temp = os.path.join(autotest_path, 'drone_tmp')
            utils.run('sudo mkdir -p %s' % container_drone_temp)
            container_control_file = os.path.join(container_drone_temp,
                                                  control_file_name)
            # Move the control file stored in the result folder to container.
            utils.run('sudo mv %s %s' % (safe_control, container_control_file))

        if IS_MOBLAB:
            site_packages_path = MOBLAB_SITE_PACKAGES
            site_packages_container_path = MOBLAB_SITE_PACKAGES_CONTAINER[1:]
        else:
            site_packages_path = os.path.join(common.autotest_dir,
                                              'site-packages')
            site_packages_container_path = os.path.join(
                lxc_config.CONTAINER_AUTOTEST_DIR, 'site-packages')
        mount_entries = [
            (site_packages_path, site_packages_container_path, True),
            (os.path.join(common.autotest_dir, 'puppylab'),
             os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR,
                          'puppylab'), True),
            (result_path, os.path.join(RESULT_DIR_FMT % job_folder), False),
        ]
        for mount_config in deploy_config_manager.mount_configs:
            mount_entries.append((mount_config.source, mount_config.target,
                                  mount_config.readonly))
        # Update container config to mount directories.
        for source, destination, readonly in mount_entries:
            container.mount_dir(source, destination, readonly)

        # Update file permissions.
        # TODO(dshi): crbug.com/459344 Skip following action when test container
        # can be unprivileged container.
        utils.run('sudo chown -R root "%s"' % autotest_path)
        utils.run('sudo chgrp -R root "%s"' % autotest_path)

        container.start(name)
        deploy_config_manager.deploy_post_start()

        container.modify_import_order()

        container.verify_autotest_setup(job_folder)

        autotest_es.post(use_http=True,
                         type_str=CONTAINER_CREATE_METADB_TYPE,
                         metadata={
                             'drone': socket.gethostname(),
                             'job_id': job_id,
                             'time_used': time.time() - start_time,
                             'success': True
                         })

        logging.debug('Test container %s is set up.', name)
        return container
Beispiel #16
0
    def setup_test(self, name, job_id, server_package_url, result_path,
                   control=None, skip_cleanup=False):
        """Setup test container for the test job to run.

        The setup includes:
        1. Install autotest_server package from given url.
        2. Copy over local shadow_config.ini.
        3. Mount local site-packages.
        4. Mount test result directory.

        TODO(dshi): Setup also needs to include test control file for autoserv
                    to run in container.

        @param name: Name of the container.
        @param job_id: Job id for the test job to run in the test container.
        @param server_package_url: Url to download autotest_server package.
        @param result_path: Directory to be mounted to container to store test
                            results.
        @param control: Path to the control file to run the test job. Default is
                        set to None.
        @param skip_cleanup: Set to True to skip cleanup, used to troubleshoot
                             container failures.

        @return: A Container object for the test container.

        @raise ContainerError: If container does not exist, or not running.
        """
        start_time = time.time()

        if not os.path.exists(result_path):
            raise error.ContainerError('Result directory does not exist: %s',
                                       result_path)
        result_path = os.path.abspath(result_path)

        # Create test container from the base container.
        container = self.create_from_base(name)

        # Deploy server side package
        usr_local_path = os.path.join(container.rootfs, 'usr', 'local')
        autotest_pkg_path = os.path.join(usr_local_path,
                                         'autotest_server_package.tar.bz2')
        autotest_path = os.path.join(usr_local_path, 'autotest')
        # sudo is required so os.makedirs may not work.
        utils.run('sudo mkdir -p %s'% usr_local_path)

        download_extract(server_package_url, autotest_pkg_path, usr_local_path)
        deploy_config_manager = lxc_config.DeployConfigManager(container)
        deploy_config_manager.deploy_pre_start()

        # Copy over control file to run the test job.
        if control:
            container_drone_temp = os.path.join(autotest_path, 'drone_tmp')
            utils.run('sudo mkdir -p %s'% container_drone_temp)
            container_control_file = os.path.join(
                    container_drone_temp, os.path.basename(control))
            utils.run('sudo cp %s %s' % (control, container_control_file))

        if IS_MOBLAB:
            site_packages_path = MOBLAB_SITE_PACKAGES
            site_packages_container_path = MOBLAB_SITE_PACKAGES_CONTAINER[1:]
        else:
            site_packages_path = os.path.join(common.autotest_dir,
                                              'site-packages')
            site_packages_container_path = os.path.join(
                    lxc_config.CONTAINER_AUTOTEST_DIR, 'site-packages')
        mount_entries = [(site_packages_path, site_packages_container_path,
                          True),
                         (os.path.join(common.autotest_dir, 'puppylab'),
                          os.path.join(lxc_config.CONTAINER_AUTOTEST_DIR,
                                       'puppylab'),
                          True),
                         (result_path,
                          os.path.join(RESULT_DIR_FMT % job_id),
                          False),
                        ]
        # Update container config to mount directories.
        for source, destination, readonly in mount_entries:
            container.mount_dir(source, destination, readonly)

        # Update file permissions.
        # TODO(dshi): crbug.com/459344 Skip following action when test container
        # can be unprivileged container.
        utils.run('sudo chown -R root "%s"' % autotest_path)
        utils.run('sudo chgrp -R root "%s"' % autotest_path)

        container.start(name)
        deploy_config_manager.deploy_post_start()

        container.modify_import_order()

        container.verify_autotest_setup(job_id)

        autotest_es.post(use_http=True,
                         type_str=CONTAINER_CREATE_METADB_TYPE,
                         metadata={'drone': socket.gethostname(),
                                   'job_id': job_id,
                                   'time_used': time.time() - start_time,
                                   'success': True})

        logging.debug('Test container %s is set up.', name)
        return container