Beispiel #1
0
    def insert_job(self, tag, job, commit=None):
        job.machine_idx = self.lookup_machine(job.machine)
        if not job.machine_idx:
            job.machine_idx = self.insert_machine(job, commit=commit)
        else:
            self.update_machine_information(job, commit=commit)

        afe_job_id = utils.get_afe_job_id(tag)

        data = {
            "tag": tag,
            "label": job.label,
            "username": job.user,
            "machine_idx": job.machine_idx,
            "queued_time": job.queued_time,
            "started_time": job.started_time,
            "finished_time": job.finished_time,
            "afe_job_id": afe_job_id,
        }
        is_update = hasattr(job, "index")
        if is_update:
            self.update("tko_jobs", data, {"job_idx": job.index}, commit=commit)
        else:
            self.insert("tko_jobs", data, commit=commit)
            job.index = self.get_last_autonumber_value()
        self.update_job_keyvals(job, commit=commit)
        for test in job.tests:
            self.insert_test(job, test, commit=commit)
def reimage_and_run(**dargs):
    """
    Backward-compatible API for dynamic_suite.

    Will re-image a number of devices (of the specified board) with the
    provided builds, and then run the indicated test suite on them.
    Guaranteed to be compatible with any build from stable to dev.

    @param dargs: Dictionary containing the arguments passed to _SuiteSpec().
    @raises AsynchronousBuildFailure: if there was an issue finishing staging
                                      from the devserver.
    @raises MalformedDependenciesException: if the dependency_info file for
                                            the required build fails to parse.
    """
    suite_spec = _SuiteSpec(**dargs)

    afe = frontend_wrappers.RetryingAFE(timeout_min=30, delay_sec=10,
                                        user=suite_spec.job.user, debug=False)
    tko = frontend_wrappers.RetryingTKO(timeout_min=30, delay_sec=10,
                                        user=suite_spec.job.user, debug=False)

    try:
        my_job_id = int(tko_utils.get_afe_job_id(dargs['job'].tag))
        logging.debug('Determined own job id: %d', my_job_id)
    except ValueError:
        my_job_id = None
        logging.warning('Could not determine own job id.')

    _perform_reimage_and_run(suite_spec, afe, tko, suite_job_id=my_job_id)

    logging.debug('Returning from dynamic_suite.reimage_and_run.')
Beispiel #3
0
    def insert_job(self, tag, job, commit = None):
        job.machine_idx = self.lookup_machine(job.machine)
        if not job.machine_idx:
            job.machine_idx = self.insert_machine(job, commit=commit)
        else:
            self.update_machine_information(job, commit=commit)

        afe_job_id = utils.get_afe_job_id(tag)

        data = {'tag':tag,
                'label': job.label,
                'username': job.user,
                'machine_idx': job.machine_idx,
                'queued_time': job.queued_time,
                'started_time': job.started_time,
                'finished_time': job.finished_time,
                'afe_job_id': afe_job_id}
        is_update = hasattr(job, 'index')
        if is_update:
            self.update('tko_jobs', data, {'job_idx': job.index}, commit=commit)
        else:
            self.insert('tko_jobs', data, commit=commit)
            job.index = self.get_last_autonumber_value()
        self.update_job_keyvals(job, commit=commit)
        for test in job.tests:
            self.insert_test(job, test, commit=commit)
Beispiel #4
0
def run_provision_suite(**dargs):
    """
    Run a provision suite.

    Will re-image a number of devices (of the specified board) with the
    provided builds by scheduling dummy_Pass.

    @param job: an instance of client.common_lib.base_job representing the
                currently running suite job.

    @raises AsynchronousBuildFailure: if there was an issue finishing staging
                                      from the devserver.
    @raises MalformedDependenciesException: if the dependency_info file for
                                            the required build fails to parse.
    """
    spec = _SuiteSpec(**dargs)

    afe = frontend_wrappers.RetryingAFE(timeout_min=30,
                                        delay_sec=10,
                                        user=spec.job.user,
                                        debug=False)
    tko = frontend_wrappers.RetryingTKO(timeout_min=30,
                                        delay_sec=10,
                                        user=spec.job.user,
                                        debug=False)

    try:
        my_job_id = int(tko_utils.get_afe_job_id(spec.job.tag))
        logging.debug('Determined own job id: %d', my_job_id)
    except ValueError:
        my_job_id = None
        logging.warning('Could not determine own job id.')

    suite = ProvisionSuite(tag=spec.name,
                           builds=spec.builds,
                           board=spec.board,
                           devserver=spec.devserver,
                           count=1,
                           afe=afe,
                           tko=tko,
                           pool=spec.pool,
                           results_dir=spec.job.resultdir,
                           max_runtime_mins=spec.max_runtime_mins,
                           timeout_mins=spec.timeout_mins,
                           file_bugs=spec.file_bugs,
                           suite_job_id=my_job_id,
                           extra_deps=spec.suite_dependencies,
                           priority=spec.priority,
                           wait_for_results=spec.wait_for_results,
                           job_retry=spec.job_retry,
                           max_retries=spec.max_retries,
                           offload_failures_only=spec.offload_failures_only,
                           test_source_build=spec.test_source_build,
                           run_prod_code=spec.run_prod_code,
                           job_keyvals=spec.job_keyvals,
                           test_args=spec.test_args)

    _run_suite_with_spec(suite, spec)

    logging.debug('Returning from dynamic_suite.run_provision_suite')
Beispiel #5
0
    def set_afe_job_id_and_tag(self, pb_job, tag):
        """Sets the pb job's afe_job_id and tag field.

        @param
        pb_job: the pb job that will have it's fields set.
        tag: used to set pb_job.tag and pb_job.afe_job_id.
        """
        pb_job.tag = tag
        pb_job.afe_job_id = utils.get_afe_job_id(tag)
Beispiel #6
0
    def set_afe_job_id_and_tag(self, pb_job, tag):
        """Sets the pb job's afe_job_id and tag field.

        @param
        pb_job: the pb job that will have it's fields set.
        tag: used to set pb_job.tag and pb_job.afe_job_id.
        """
        pb_job.tag = tag
        pb_job.afe_job_id = utils.get_afe_job_id(tag)
Beispiel #7
0
    def insert_job(self, tag, job, parent_job_id=None, commit=None):
        """Insert a tko job.

        @param tag: The job tag.
        @param job: The job object.
        @param parent_job_id: The parent job id.
        @param commit: If commit the transaction .

        @return The dict of data inserted into the tko_jobs table.
        """
        job.machine_idx = self.lookup_machine(job.machine)
        if not job.machine_idx:
            job.machine_idx = self.insert_machine(job, commit=commit)
        elif job.machine:
            # Only try to update tko_machines record if machine is set. This
            # prevents unnecessary db writes for suite jobs.
            self.update_machine_information(job, commit=commit)

        afe_job_id = utils.get_afe_job_id(tag)

        data = {
            'tag': tag,
            'label': job.label,
            'username': job.user,
            'machine_idx': job.machine_idx,
            'queued_time': job.queued_time,
            'started_time': job.started_time,
            'finished_time': job.finished_time,
            'afe_job_id': afe_job_id,
            'afe_parent_job_id': parent_job_id,
            'build': job.build,
            'build_version': job.build_version,
            'board': job.board,
            'suite': job.suite
        }
        job.afe_job_id = afe_job_id
        if parent_job_id:
            job.afe_parent_job_id = str(parent_job_id)

        # TODO(ntang): check job.index directly.
        is_update = hasattr(job, 'index')
        if is_update:
            self.update('tko_jobs',
                        data, {'job_idx': job.index},
                        commit=commit)
        else:
            self.insert('tko_jobs', data, commit=commit)
            job.index = self.get_last_autonumber_value()
        self.update_job_keyvals(job, commit=commit)
        for test in job.tests:
            self.insert_test(job, test, commit=commit)

        data['job_idx'] = job.index
        return data
Beispiel #8
0
    def insert_job(self, tag, job, parent_job_id=None, commit=None):
        job.machine_idx = self.lookup_machine(job.machine)
        if not job.machine_idx:
            job.machine_idx = self.insert_machine(job, commit=commit)
        elif job.machine:
            # Only try to update tko_machines record if machine is set. This
            # prevents unnecessary db writes for suite jobs.
            self.update_machine_information(job, commit=commit)

        afe_job_id = utils.get_afe_job_id(tag)

        data = {
            'tag': tag,
            'label': job.label,
            'username': job.user,
            'machine_idx': job.machine_idx,
            'queued_time': job.queued_time,
            'started_time': job.started_time,
            'finished_time': job.finished_time,
            'afe_job_id': afe_job_id,
            'afe_parent_job_id': parent_job_id
        }
        if job.label:
            label_info = site_utils.parse_job_name(job.label)
            if label_info:
                data['build'] = label_info.get('build', None)
                data['build_version'] = label_info.get('build_version', None)
                data['board'] = label_info.get('board', None)
                data['suite'] = label_info.get('suite', None)
        is_update = hasattr(job, 'index')
        if is_update:
            self.update('tko_jobs',
                        data, {'job_idx': job.index},
                        commit=commit)
        else:
            self.insert('tko_jobs', data, commit=commit)
            job.index = self.get_last_autonumber_value()
        self.update_job_keyvals(job, commit=commit)
        for test in job.tests:
            self.insert_test(job, test, commit=commit)
Beispiel #9
0
def reimage_and_run(**dargs):
    """
    Backward-compatible API for dynamic_suite.

    Will re-image a number of devices (of the specified board) with the
    provided build, and then run the indicated test suite on them.
    Guaranteed to be compatible with any build from stable to dev.

    @param dargs: Dictionary containing the arguments listed below.

    Currently required args:
    @param board: which kind of devices to reimage.
    @param name: a value of the SUITE control file variable to search for.
    @param job: an instance of client.common_lib.base_job representing the
                currently running suite job.

    Currently supported optional args:
    @param build: the build to install e.g.
                  x86-alex-release/R18-1655.0.0-a1-b1584.
    @param builds: the builds to install e.g.
                   {'cros-version:': 'x86-alex-release/R18-1655.0.0',
                    'fw-version:':  'x86-alex-firmware/R36-5771.50.0'}
    @param pool: specify the pool of machines to use for scheduling purposes.
                 Default: None
    @param num: the maximum number of devices to reimage.
                Default in global_config
    @param check_hosts: require appropriate hosts to be available now.
    @param add_experimental: schedule experimental tests as well, or not.
                             Default: True
    @param file_bugs: automatically file bugs on test failures.
                      Default: False
    @param suite_dependencies: A string with a comma separated list of suite
                               level dependencies, which act just like test
                               dependencies and are appended to each test's
                               set of dependencies at job creation time.
    @param devserver_url: url to the selected devserver.
    @param predicate: Optional argument. If present, should be a function
                      mapping ControlData objects to True if they should be
                      included in suite. If argument is absent, suite
                      behavior will default to creating a suite of based
                      on the SUITE field of control files.
    @param job_retry: A bool value indicating whether jobs should be retired
                      on failure. If True, the field 'JOB_RETRIES' in control
                      files will be respected. If False, do not retry.
    @param max_retries: Maximum retry limit at suite level.
                        Regardless how many times each individual test
                        has been retried, the total number of retries
                        happening in the suite can't exceed _max_retries.
                        Default to None, no max.
    @param offload_failures_only: Only enable gs_offloading for failed jobs.
    @raises AsynchronousBuildFailure: if there was an issue finishing staging
                                      from the devserver.
    @raises MalformedDependenciesException: if the dependency_info file for
                                            the required build fails to parse.
    """
    suite_spec = SuiteSpec(**dargs)

    # To support provision both CrOS and firmware, option builds is added to
    # SuiteSpec, e.g.,
    # builds = {'cros-version:': 'x86-alex-release/R18-1655.0.0',
    #           'fw-version:':  'x86-alex-firmware/R36-5771.50.0'}
    # Option build, version_prefix and firmware_reimage will all be obsoleted.
    # For backwards compatibility, these option will be default to
    # firmware_reimage = False
    # version_prefix = provision.CROS_VERSION_PREFIX
    # build will be used as CrOS build
    suite_spec.firmware_reimage = False
    # </backwards_compatibility_hacks>

    # version_prefix+build should make it into each test as a DEPENDENCY.  The
    # easiest way to do this is to tack it onto the suite_dependencies.
    suite_spec.suite_dependencies.extend(
        provision.join(version_prefix, build)
        for version_prefix, build in suite_spec.builds.items())

    afe = frontend_wrappers.RetryingAFE(timeout_min=30,
                                        delay_sec=10,
                                        user=suite_spec.job.user,
                                        debug=False)
    tko = frontend_wrappers.RetryingTKO(timeout_min=30,
                                        delay_sec=10,
                                        user=suite_spec.job.user,
                                        debug=False)

    try:
        my_job_id = int(tko_utils.get_afe_job_id(dargs['job'].tag))
        logging.debug('Determined own job id: %d', my_job_id)
    except ValueError:
        my_job_id = None
        logging.warning('Could not determine own job id.')

    if suite_spec.predicate is None:
        predicate = Suite.name_in_tag_predicate(suite_spec.name)
    else:
        predicate = suite_spec.predicate

    _perform_reimage_and_run(suite_spec,
                             afe,
                             tko,
                             predicate,
                             suite_job_id=my_job_id)

    logging.debug('Returning from dynamic_suite.reimage_and_run.')
def parse_one(db, pid_file_manager, jobname, path, parse_options):
    """Parse a single job. Optionally send email on failure.

    @param db: database object.
    @param pid_file_manager: pidfile.PidFileManager object.
    @param jobname: the tag used to search for existing job in db,
                    e.g. '1234-chromeos-test/host1'
    @param path: The path to the results to be parsed.
    @param parse_options: _ParseOptions instance.
    """
    reparse = parse_options.reparse
    mail_on_failure = parse_options.mail_on_failure
    dry_run = parse_options.dry_run
    suite_report = parse_options.suite_report
    datastore_creds = parse_options.datastore_creds
    export_to_gcloud_path = parse_options.export_to_gcloud_path

    tko_utils.dprint("\nScanning %s (%s)" % (jobname, path))
    old_job_idx = db.find_job(jobname)
    if old_job_idx is not None and not reparse:
        tko_utils.dprint("! Job is already parsed, done")
        return

    # look up the status version
    job_keyval = models.job.read_keyval(path)
    status_version = job_keyval.get("status_version", 0)

    parser = parser_lib.parser(status_version)
    job = parser.make_job(path)
    tko_utils.dprint("+ Parsing dir=%s, jobname=%s" % (path, jobname))
    status_log_path = _find_status_log_path(path)
    if not status_log_path:
        tko_utils.dprint("! Unable to parse job, no status file")
        return
    _parse_status_log(parser, job, status_log_path)

    if old_job_idx is not None:
        job.job_idx = old_job_idx
        unmatched_tests = _match_existing_tests(db, job)
        if not dry_run:
            _delete_tests_from_db(db, unmatched_tests)

    job.afe_job_id = tko_utils.get_afe_job_id(jobname)
    job.skylab_task_id = tko_utils.get_skylab_task_id(jobname)
    job.afe_parent_job_id = job_keyval.get(constants.PARENT_JOB_ID)
    job.skylab_parent_task_id = job_keyval.get(constants.PARENT_JOB_ID)
    job.build = None
    job.board = None
    job.build_version = None
    job.suite = None
    if job.label:
        label_info = site_utils.parse_job_name(job.label)
        if label_info:
            job.build = label_info.get('build', None)
            job.build_version = label_info.get('build_version', None)
            job.board = label_info.get('board', None)
            job.suite = label_info.get('suite', None)

    result_utils_lib.LOG =  tko_utils.dprint
    _throttle_result_size(path)

    # Record test result size to job_keyvals
    start_time = time.time()
    result_size_info = site_utils.collect_result_sizes(
            path, log=tko_utils.dprint)
    tko_utils.dprint('Finished collecting result sizes after %s seconds' %
                     (time.time()-start_time))
    job.keyval_dict.update(result_size_info.__dict__)

    # TODO(dshi): Update sizes with sponge_invocation.xml and throttle it.

    # check for failures
    message_lines = [""]
    job_successful = True
    for test in job.tests:
        if not test.subdir:
            continue
        tko_utils.dprint("* testname, subdir, status, reason: %s %s %s %s"
                         % (test.testname, test.subdir, test.status,
                            test.reason))
        if test.status not in ('GOOD', 'WARN'):
            job_successful = False
            pid_file_manager.num_tests_failed += 1
            message_lines.append(format_failure_message(
                jobname, test.kernel.base, test.subdir,
                test.status, test.reason))

    message = "\n".join(message_lines)

    if not dry_run:
        # send out a email report of failure
        if len(message) > 2 and mail_on_failure:
            tko_utils.dprint("Sending email report of failure on %s to %s"
                                % (jobname, job.user))
            mailfailure(jobname, job, message)

        # Upload perf values to the perf dashboard, if applicable.
        for test in job.tests:
            perf_uploader.upload_test(job, test, jobname)

        # Upload job details to Sponge.
        sponge_url = sponge_utils.upload_results(job, log=tko_utils.dprint)
        if sponge_url:
            job.keyval_dict['sponge_url'] = sponge_url

        _write_job_to_db(db, jobname, job)

        # Verify the job data is written to the database.
        if job.tests:
            tests_in_db = db.find_tests(job.job_idx)
            tests_in_db_count = len(tests_in_db) if tests_in_db else 0
            if tests_in_db_count != len(job.tests):
                tko_utils.dprint(
                        'Failed to find enough tests for job_idx: %d. The '
                        'job should have %d tests, only found %d tests.' %
                        (job.job_idx, len(job.tests), tests_in_db_count))
                metrics.Counter(
                        'chromeos/autotest/result/db_save_failure',
                        description='The number of times parse failed to '
                        'save job to TKO database.').increment()

        # Although the cursor has autocommit, we still need to force it to
        # commit existing changes before we can use django models, otherwise
        # it will go into deadlock when django models try to start a new
        # trasaction while the current one has not finished yet.
        db.commit()

        # Handle retry job.
        orig_afe_job_id = job_keyval.get(constants.RETRY_ORIGINAL_JOB_ID,
                                            None)
        if orig_afe_job_id:
            orig_job_idx = tko_models.Job.objects.get(
                    afe_job_id=orig_afe_job_id).job_idx
            _invalidate_original_tests(orig_job_idx, job.job_idx)

    # Serializing job into a binary file
    export_tko_to_file = global_config.global_config.get_config_value(
            'AUTOSERV', 'export_tko_job_to_file', type=bool, default=False)

    binary_file_name = os.path.join(path, "job.serialize")
    if export_tko_to_file:
        export_tko_job_to_file(job, jobname, binary_file_name)

    if not dry_run:
        db.commit()

    # Generate a suite report.
    # Check whether this is a suite job, a suite job will be a hostless job, its
    # jobname will be <JOB_ID>-<USERNAME>/hostless, the suite field will not be
    # NULL. Only generate timeline report when datastore_parent_key is given.
    datastore_parent_key = job_keyval.get('datastore_parent_key', None)
    provision_job_id = job_keyval.get('provision_job_id', None)
    if (suite_report and jobname.endswith('/hostless')
        and job.suite and datastore_parent_key):
        tko_utils.dprint('Start dumping suite timing report...')
        timing_log = os.path.join(path, 'suite_timing.log')
        dump_cmd = ("%s/site_utils/dump_suite_report.py %s "
                    "--output='%s' --debug" %
                    (common.autotest_dir, job.afe_job_id,
                        timing_log))

        if provision_job_id is not None:
            dump_cmd += " --provision_job_id=%d" % int(provision_job_id)

        subprocess.check_output(dump_cmd, shell=True)
        tko_utils.dprint('Successfully finish dumping suite timing report')

        if (datastore_creds and export_to_gcloud_path
            and os.path.exists(export_to_gcloud_path)):
            upload_cmd = [export_to_gcloud_path, datastore_creds,
                            timing_log, '--parent_key',
                            datastore_parent_key]
            tko_utils.dprint('Start exporting timeline report to gcloud')
            subprocess.check_output(upload_cmd)
            tko_utils.dprint('Successfully export timeline report to '
                                'gcloud')
        else:
            tko_utils.dprint('DEBUG: skip exporting suite timeline to '
                                'gcloud, because either gcloud creds or '
                                'export_to_gcloud script is not found.')

    # Mark GS_OFFLOADER_NO_OFFLOAD in gs_offloader_instructions at the end of
    # the function, so any failure, e.g., db connection error, will stop
    # gs_offloader_instructions being updated, and logs can be uploaded for
    # troubleshooting.
    if job_successful:
        # Check if we should not offload this test's results.
        if job_keyval.get(constants.JOB_OFFLOAD_FAILURES_KEY, False):
            # Update the gs_offloader_instructions json file.
            gs_instructions_file = os.path.join(
                    path, constants.GS_OFFLOADER_INSTRUCTIONS)
            gs_offloader_instructions = {}
            if os.path.exists(gs_instructions_file):
                with open(gs_instructions_file, 'r') as f:
                    gs_offloader_instructions = json.load(f)

            gs_offloader_instructions[constants.GS_OFFLOADER_NO_OFFLOAD] = True
            with open(gs_instructions_file, 'w') as f:
                json.dump(gs_offloader_instructions, f)