예제 #1
0
def post_log_artifacts(job_log):
    """Post a list of artifacts to a job."""
    logger.debug("Downloading/parsing log for log %s", job_log.id)

    try:
        artifact_list = extract_text_log_artifacts(job_log)
    except LogSizeException as e:
        job_log.update_status(JobLog.SKIPPED_SIZE)
        logger.warning('Skipping parsing log for %s: %s', job_log.id, e)
        return
    except Exception as e:
        job_log.update_status(JobLog.FAILED)

        # Unrecoverable http error (doesn't exist or permission denied).
        # Apparently this can happen somewhat often with taskcluster if
        # the job fails (bug 1154248), so just warn rather than raising,
        # to prevent the noise/load from retrying.
        if isinstance(e, HTTPError) and e.response.status_code in (403, 404):
            logger.warning("Unable to retrieve log for %s: %s", job_log.id, e)
            return

        logger.error("Failed to download/parse log for %s: %s", job_log.id, e)
        raise

    try:
        serialized_artifacts = serialize_artifact_json_blobs(artifact_list)
        store_job_artifacts(serialized_artifacts)
        job_log.update_status(JobLog.PARSED)
        logger.debug("Stored artifact for %s %s", job_log.job.repository.name,
                     job_log.job.id)
    except Exception as e:
        logger.error("Failed to store parsed artifact for %s: %s", job_log.id,
                     e)
        raise
예제 #2
0
def test_load_long_job_details(test_job):
    def max_length(field):
        """Get the field's max_length for the JobDetail model"""
        return JobDetail._meta.get_field(field).max_length

    (long_title, long_value, long_url) = ('t' * (2 * max_length("title")),
                                          'v' * (2 * max_length("value")),
                                          'https://' + ('u' * (2 * max_length("url"))))
    ji_artifact = {
        'type': 'json',
        'name': 'Job Info',
        'blob': json.dumps({
            'job_details': [{
                'title': long_title,
                'value': long_value,
                'url': long_url
            }]
        }),
        'job_guid': test_job.guid
    }
    store_job_artifacts([ji_artifact])

    assert JobDetail.objects.count() == 1

    jd = JobDetail.objects.all()[0]
    assert jd.title == long_title[:max_length("title")]
    assert jd.value == long_value[:max_length("value")]
    assert jd.url == long_url[:max_length("url")]
예제 #3
0
def test_load_long_job_details(test_job):
    def max_length(field):
        """Get the field's max_length for the JobDetail model"""
        return JobDetail._meta.get_field(field).max_length

    (long_title, long_value, long_url) = (
        't' * (2 * max_length("title")),
        'v' * (2 * max_length("value")),
        'https://' + ('u' * (2 * max_length("url"))),
    )
    ji_artifact = {
        'type':
        'json',
        'name':
        'Job Info',
        'blob':
        json.dumps({
            'job_details': [{
                'title': long_title,
                'value': long_value,
                'url': long_url
            }]
        }),
        'job_guid':
        test_job.guid,
    }
    store_job_artifacts([ji_artifact])

    assert JobDetail.objects.count() == 1

    jd = JobDetail.objects.first()
    assert jd.title == long_title[:max_length("title")]
    assert jd.value == long_value[:max_length("value")]
    assert jd.url == long_url[:max_length("url")]
예제 #4
0
def test_load_textlog_summary_twice(test_repository, test_job):
    text_log_summary_artifact = {
        'type': 'json',
        'name': 'text_log_summary',
        'blob': json.dumps({
            'step_data': {
                "steps": [
                    {
                        'name': 'foo',
                        'started': '2016-05-10 12:44:23.103904',
                        'started_linenumber': 8,
                        'finished_linenumber': 10,
                        'finished': '2016-05-10 12:44:23.104394',
                        'result': 'success',
                        'errors': [
                            {
                                "line": '07:51:28  WARNING - foobar',
                                "linenumber": 1587
                            }
                        ]
                    }
                ]
            }
        }),
        'job_guid': test_job.guid
    }

    store_job_artifacts([text_log_summary_artifact])
    assert TextLogError.objects.count() == 1
    assert TextLogStep.objects.count() == 1
    # load again (simulating the job being parsed twice,
    # which sometimes happens)
    store_job_artifacts([text_log_summary_artifact])
    assert TextLogError.objects.count() == 1
    assert TextLogStep.objects.count() == 1
예제 #5
0
def post_log_artifacts(job_log):
    """Post a list of artifacts to a job."""
    logger.debug("Downloading/parsing log for log %s", job_log.id)

    try:
        artifact_list = extract_text_log_artifacts(job_log)
    except Exception as e:
        job_log.update_status(JobLog.FAILED)

        # unrecoverable http error (doesn't exist or permission denied)
        # (apparently this can happen somewhat often with taskcluster if
        # the job fails, so just warn about it -- see
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1154248)
        if isinstance(e, urllib2.HTTPError) and e.code in (403, 404):
            logger.warning("Unable to retrieve log for %s: %s", job_log.id, e)
            return

        if isinstance(e, urllib2.URLError):
            # possibly recoverable http error (e.g. problems on our end)
            logger.error("Failed to download log for %s: %s", job_log.id, e)
        else:
            # parse error or other unrecoverable error
            logger.error("Failed to download/parse log for %s: %s", job_log.id, e)
        raise

    try:
        serialized_artifacts = serialize_artifact_json_blobs(artifact_list)
        store_job_artifacts(serialized_artifacts)
        job_log.update_status(JobLog.PARSED)
        logger.debug("Stored artifact for %s %s", job_log.job.repository.name,
                     job_log.job.id)
    except Exception as e:
        logger.error("Failed to store parsed artifact for %s: %s", job_log.id, e)
        raise
예제 #6
0
def test_load_textlog_summary_twice(test_repository, test_job):
    text_log_summary_artifact = {
        'type':
        'json',
        'name':
        'text_log_summary',
        'blob':
        json.dumps({
            'errors': [
                {
                    "line": 'WARNING - foobar',
                    "linenumber": 1587
                },
                {
                    "line": 'WARNING - foobar',
                    "linenumber": 1590
                },
            ],
        }),
        'job_guid':
        test_job.guid,
    }

    store_job_artifacts([text_log_summary_artifact])
    assert TextLogError.objects.count() == 2
    # load again (simulating the job being parsed twice,
    # which sometimes happens)
    store_job_artifacts([text_log_summary_artifact])
    assert TextLogError.objects.count() == 2
예제 #7
0
def test_load_non_ascii_textlog_errors(test_job):
    text_log_summary_artifact = {
        'type':
        'json',
        'name':
        'text_log_summary',
        'blob':
        json.dumps({
            'errors': [
                {
                    # non-ascii character
                    "line": '07:51:28  WARNING - \U000000c3',
                    "linenumber": 1587,
                },
                {
                    # astral character (i.e. higher than ucs2)
                    "line": '07:51:29  WARNING - \U0001d400',
                    "linenumber": 1588,
                },
            ],
        }),
        'job_guid':
        test_job.guid,
    }

    store_job_artifacts([text_log_summary_artifact])

    assert TextLogError.objects.count() == 2
    assert TextLogError.objects.get(
        line_number=1587).line == '07:51:28  WARNING - \U000000c3'
    assert TextLogError.objects.get(
        line_number=1588).line == '07:51:29  WARNING - <U+01D400>'
예제 #8
0
def test_load_non_ascii_textlog_errors(test_job):
    text_log_summary_artifact = {
        'type':
        'json',
        'name':
        'text_log_summary',
        'blob':
        json.dumps({
            'step_data': {
                "steps": [{
                    'name':
                    'foo',
                    'started':
                    '2016-05-10 12:44:23.103904',
                    'started_linenumber':
                    8,
                    'finished_linenumber':
                    10,
                    'finished':
                    '2016-05-10 12:44:23.104394',
                    'result':
                    'success',
                    'errors': [
                        {
                            # non-ascii character
                            "line": '07:51:28  WARNING - \U000000c3',
                            "linenumber": 1587,
                        },
                        {
                            # astral character (i.e. higher than ucs2)
                            "line": '07:51:29  WARNING - \U0001d400',
                            "linenumber": 1588,
                        },
                    ],
                }]
            }
        }),
        'job_guid':
        test_job.guid,
    }
    store_job_artifacts([text_log_summary_artifact])

    assert TextLogError.objects.count() == 2
    assert TextLogError.objects.get(
        line_number=1587).line == '07:51:28  WARNING - \U000000c3'
    assert TextLogError.objects.get(
        line_number=1588).line == '07:51:29  WARNING - <U+01D400>'
예제 #9
0
def test_load_non_ascii_textlog_errors(test_job):
    text_log_summary_artifact = {
        'type': 'json',
        'name': 'text_log_summary',
        'blob': json.dumps({
            'step_data': {
                "steps": [
                    {
                        'name': 'foo',
                        'started': '2016-05-10 12:44:23.103904',
                        'started_linenumber': 8,
                        'finished_linenumber': 10,
                        'finished': '2016-05-10 12:44:23.104394',
                        'result': 'success',
                        'errors': [
                            {
                                # non-ascii character
                                "line": '07:51:28  WARNING - \U000000c3'.encode('utf-8'),
                                "linenumber": 1587
                            },
                            {
                                # astral character (i.e. higher than ucs2)
                                "line": '07:51:29  WARNING - \U0001d400'.encode('utf-8'),
                                "linenumber": 1588
                            }
                        ]
                    }
                ]
            }
        }),
        'job_guid': test_job.guid
    }
    store_job_artifacts([text_log_summary_artifact])

    assert TextLogError.objects.count() == 2
    assert TextLogError.objects.get(line_number=1587).line == '07:51:28  WARNING - \U000000c3'
    assert TextLogError.objects.get(line_number=1588).line == '07:51:29  WARNING - <U+01D400>'
예제 #10
0
def post_log_artifacts(job_log):
    """Post a list of artifacts to a job."""
    logger.debug("Downloading/parsing log for log %s", job_log.id)

    try:
        artifact_list = extract_text_log_artifacts(job_log)
    except Exception as e:
        job_log.update_status(JobLog.FAILED)

        # unrecoverable http error (doesn't exist or permission denied)
        # (apparently this can happen somewhat often with taskcluster if
        # the job fails, so just warn about it -- see
        # https://bugzilla.mozilla.org/show_bug.cgi?id=1154248)
        if isinstance(e, urllib2.HTTPError) and e.code in (403, 404):
            logger.warning("Unable to retrieve log for %s: %s", job_log.id, e)
            return

        if isinstance(e, urllib2.URLError):
            # possibly recoverable http error (e.g. problems on our end)
            logger.error("Failed to download log for %s: %s", job_log.id, e)
        else:
            # parse error or other unrecoverable error
            logger.error("Failed to download/parse log for %s: %s", job_log.id,
                         e)
        raise

    try:
        serialized_artifacts = serialize_artifact_json_blobs(artifact_list)
        store_job_artifacts(serialized_artifacts)
        job_log.update_status(JobLog.PARSED)
        logger.debug("Stored artifact for %s %s", job_log.job.repository.name,
                     job_log.job.id)
    except Exception as e:
        logger.error("Failed to store parsed artifact for %s: %s", job_log.id,
                     e)
        raise
예제 #11
0
파일: jobs.py 프로젝트: catlee/treeherder
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform', {}).get('architecture',
                                                             'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform', {}).get('architecture',
                                                               'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(
        option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash,
                option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    # if a job with this symbol and name exists, always
    # use its default group (even if that group is different
    # from that specified)
    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')
    if job_type.job_group:
        job_group = job_type.job_group
    else:
        job_group, _ = JobGroup.objects.get_or_create(
            name=job_datum.get('group_name') or 'unknown',
            symbol=job_datum.get('group_symbol') or 'unknown')
        job_type.job_group = job_group
        job_type.save(update_fields=['job_group'])

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x),
            [build_system_type, repository.name, build_platform.os_name,
             build_platform.platform, build_platform.architecture,
             machine_platform.os_name, machine_platform.platform,
             machine_platform.architecture,
             job_group.name, job_group.symbol, job_type.name,
             job_type.symbol, option_collection_hash,
             reference_data_name])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name, defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    if created:
        # A new ReferenceDataSignature has been added, so we need
        # to reload lower tier exclusions
        lower_tier_signatures = _get_lower_tier_signatures(repository)

    tier = job_datum.get('tier') or 1
    # job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that have
    # an active exclusion profile.

    result = job_datum.get('result', 'unknown')

    # As stated elsewhere, a job will end up in the lowest tier where its
    # signature belongs.  So if a signature is in Tier-2 and Tier-3, it
    # will end up in 3.
    for tier_info in lower_tier_signatures:
        if signature_hash in tier_info["signatures"]:
            tier = tier_info["tier"]

    try:
        duration = JobDuration.objects.values_list(
            'average_duration', flat=True).get(
                repository=repository, signature=signature_hash)
    except JobDuration.DoesNotExist:
        duration = 0

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # this could theoretically throw an exception if we were processing
        # several updates simultaneously, but that should never happen --
        # and if it does it's better just to error out
        Job.objects.create(
            guid=job_guid,
            repository=repository,
            signature=signature,
            build_platform=build_platform,
            machine_platform=machine_platform,
            machine=machine,
            option_collection_hash=option_collection_hash,
            job_type=job_type,
            product=product,
            failure_classification=default_failure_classification,
            who=who,
            reason=reason,
            result=result,
            state=state,
            tier=tier,
            submit_time=submit_time,
            start_time=start_time,
            end_time=end_time,
            last_modified=datetime.now(),
            running_eta=duration,
            push_id=push_id)

    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)

    # add taskcluster metadata if applicable
    if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass

    # Update job with any data that would have changed
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        running_eta=duration,
        push_id=push_id)

    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    job_logs = []
    if log_refs:
        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in
                                         JobLog.STATUSES])
                mapped_status = parse_status_map.get(
                    log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={
                    'status': parse_status
                })

            job_logs.append(jl)

        _schedule_log_parsing(job, job_logs, result)

    return (job_guid, signature_hash)
예제 #12
0
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform',
                               {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform',
                                   {}).get('architecture', 'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform',
                              {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform',
                               {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform',
                                   {}).get('architecture', 'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash, option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')

    job_group, _ = JobGroup.objects.get_or_create(
        name=job_datum.get('group_name') or 'unknown',
        symbol=job_datum.get('group_symbol') or 'unknown')

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x), [
            build_system_type, repository.name, build_platform.os_name,
            build_platform.platform, build_platform.architecture,
            machine_platform.os_name, machine_platform.platform,
            machine_platform.architecture, job_group.name, job_group.symbol,
            job_type.name, job_type.symbol, option_collection_hash,
            reference_data_name
        ])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name,
        defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    tier = job_datum.get('tier') or 1

    result = job_datum.get('result', 'unknown')

    # Job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that should
    # have an overridden tier.
    if lower_tier_signatures and signature_hash in lower_tier_signatures:
        tier = lower_tier_signatures[signature_hash]

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # This could theoretically already have been created by another process
        # that is running updates simultaneously.  So just attempt to create
        # it, but allow it to skip if it's the same guid.  The odds are
        # extremely high that this is a pending and running job that came in
        # quick succession and are being processed by two different workers.
        Job.objects.get_or_create(guid=job_guid,
                                  defaults={
                                      "repository": repository,
                                      "signature": signature,
                                      "build_platform": build_platform,
                                      "machine_platform": machine_platform,
                                      "machine": machine,
                                      "option_collection_hash":
                                      option_collection_hash,
                                      "job_type": job_type,
                                      "job_group": job_group,
                                      "product": product,
                                      "failure_classification":
                                      default_failure_classification,
                                      "who": who,
                                      "reason": reason,
                                      "result": result,
                                      "state": state,
                                      "tier": tier,
                                      "submit_time": submit_time,
                                      "start_time": start_time,
                                      "end_time": end_time,
                                      "last_modified": datetime.now(),
                                      "push_id": push_id
                                  })
    # Can't just use the ``job`` we would get from the ``get_or_create``
    # because we need to try the job_guid_root instance first for update,
    # rather than a possible retry job instance.
    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)

    # add taskcluster metadata if applicable
    if all([
            k in job_datum
            for k in ['taskcluster_task_id', 'taskcluster_retry_id']
    ]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass

    # Update job with any data that would have changed
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        job_group=job_group,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        push_id=push_id)

    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    job_logs = []
    if log_refs:
        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES])
                mapped_status = parse_status_map.get(log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={'status': parse_status})

            job_logs.append(jl)

        _schedule_log_parsing(job, job_logs, result)

    return (job_guid, signature_hash)
예제 #13
0
파일: jobs.py 프로젝트: SJasoria/treeherder
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform', {}).get('architecture',
                                                             'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform', {}).get('architecture',
                                                               'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(
        option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash,
                option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')

    job_group, _ = JobGroup.objects.get_or_create(
        name=job_datum.get('group_name') or 'unknown',
        symbol=job_datum.get('group_symbol') or 'unknown')

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x),
            [build_system_type, repository.name, build_platform.os_name,
             build_platform.platform, build_platform.architecture,
             machine_platform.os_name, machine_platform.platform,
             machine_platform.architecture,
             job_group.name, job_group.symbol, job_type.name,
             job_type.symbol, option_collection_hash,
             reference_data_name])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name, defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    tier = job_datum.get('tier') or 1

    result = job_datum.get('result', 'unknown')

    # Job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that should
    # have an overridden tier.
    if lower_tier_signatures and signature_hash in lower_tier_signatures:
        tier = lower_tier_signatures[signature_hash]

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # This could theoretically already have been created by another process
        # that is running updates simultaneously.  So just attempt to create
        # it, but allow it to skip if it's the same guid.  The odds are
        # extremely high that this is a pending and running job that came in
        # quick succession and are being processed by two different workers.
        Job.objects.get_or_create(
            guid=job_guid,
            defaults={
                "repository": repository,
                "signature": signature,
                "build_platform": build_platform,
                "machine_platform": machine_platform,
                "machine": machine,
                "option_collection_hash": option_collection_hash,
                "job_type": job_type,
                "job_group": job_group,
                "product": product,
                "failure_classification": default_failure_classification,
                "who": who,
                "reason": reason,
                "result": result,
                "state": state,
                "tier": tier,
                "submit_time": submit_time,
                "start_time": start_time,
                "end_time": end_time,
                "last_modified": datetime.now(),
                "push_id": push_id
            }
        )
    # Can't just use the ``job`` we would get from the ``get_or_create``
    # because we need to try the job_guid_root instance first for update,
    # rather than a possible retry job instance.
    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)

    # add taskcluster metadata if applicable
    if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass

    # Update job with any data that would have changed
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        job_group=job_group,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        push_id=push_id)

    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    job_logs = []
    if log_refs:
        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in
                                         JobLog.STATUSES])
                mapped_status = parse_status_map.get(
                    log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={
                    'status': parse_status
                })

            job_logs.append(jl)

        _schedule_log_parsing(job, job_logs, result)

    return (job_guid, signature_hash)
예제 #14
0
def _load_job(repository, job_datum, push_id, lower_tier_signatures):
    """
    Load a job into the treeherder database

    If the job is a ``retry`` the ``job_guid`` will have a special
    suffix on it.  But the matching ``pending``/``running`` job will not.
    So we append the suffixed ``job_guid`` to ``retry_job_guids``
    so that we can update the job_id_lookup later with the non-suffixed
    ``job_guid`` (root ``job_guid``). Then we can find the right
    ``pending``/``running`` job and update it with this ``retry`` job.
    """
    build_platform, _ = BuildPlatform.objects.get_or_create(
        os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('build_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('build_platform', {}).get('architecture',
                                                             'unknown'))

    machine_platform, _ = MachinePlatform.objects.get_or_create(
        os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'),
        platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'),
        architecture=job_datum.get('machine_platform', {}).get('architecture',
                                                               'unknown'))

    option_names = job_datum.get('option_collection', [])
    option_collection_hash = OptionCollection.calculate_hash(
        option_names)
    if not OptionCollection.objects.filter(
            option_collection_hash=option_collection_hash).exists():
        # in the unlikely event that we haven't seen this set of options
        # before, add the appropriate database rows
        options = []
        for option_name in option_names:
            option, _ = Option.objects.get_or_create(name=option_name)
            options.append(option)
        for option in options:
            OptionCollection.objects.create(
                option_collection_hash=option_collection_hash,
                option=option)

    machine, _ = Machine.objects.get_or_create(
        name=job_datum.get('machine', 'unknown'))

    # if a job with this symbol and name exists, always
    # use its default group (even if that group is different
    # from that specified)
    job_type, _ = JobType.objects.get_or_create(
        symbol=job_datum.get('job_symbol') or 'unknown',
        name=job_datum.get('name') or 'unknown')
    if job_type.job_group:
        job_group = job_type.job_group
    else:
        job_group, _ = JobGroup.objects.get_or_create(
            name=job_datum.get('group_name') or 'unknown',
            symbol=job_datum.get('group_symbol') or 'unknown')
        job_type.job_group = job_group
        job_type.save(update_fields=['job_group'])

    product_name = job_datum.get('product_name', 'unknown')
    if len(product_name.strip()) == 0:
        product_name = 'unknown'
    product, _ = Product.objects.get_or_create(name=product_name)

    job_guid = job_datum['job_guid']
    job_guid = job_guid[0:50]

    who = job_datum.get('who') or 'unknown'
    who = who[0:50]

    reason = job_datum.get('reason') or 'unknown'
    reason = reason[0:125]

    state = job_datum.get('state') or 'unknown'
    state = state[0:25]

    build_system_type = job_datum.get('build_system_type', 'buildbot')

    reference_data_name = job_datum.get('reference_data_name', None)

    default_failure_classification = FailureClassification.objects.get(
        name='not classified')

    sh = sha1()
    sh.update(''.join(
        map(lambda x: str(x),
            [build_system_type, repository.name, build_platform.os_name,
             build_platform.platform, build_platform.architecture,
             machine_platform.os_name, machine_platform.platform,
             machine_platform.architecture,
             job_group.name, job_group.symbol, job_type.name,
             job_type.symbol, option_collection_hash,
             reference_data_name])))
    signature_hash = sh.hexdigest()

    # Should be the buildername in the case of buildbot (if not provided
    # default to using the signature hash)
    if not reference_data_name:
        reference_data_name = signature_hash

    signature, created = ReferenceDataSignatures.objects.get_or_create(
        name=reference_data_name,
        signature=signature_hash,
        build_system_type=build_system_type,
        repository=repository.name, defaults={
            'first_submission_timestamp': time.time(),
            'build_os_name': build_platform.os_name,
            'build_platform': build_platform.platform,
            'build_architecture': build_platform.architecture,
            'machine_os_name': machine_platform.os_name,
            'machine_platform': machine_platform.platform,
            'machine_architecture': machine_platform.architecture,
            'job_group_name': job_group.name,
            'job_group_symbol': job_group.symbol,
            'job_type_name': job_type.name,
            'job_type_symbol': job_type.symbol,
            'option_collection_hash': option_collection_hash
        })

    if created:
        # A new ReferenceDataSignature has been added, so we need
        # to reload lower tier exclusions
        lower_tier_signatures = _get_lower_tier_signatures(repository)

    tier = job_datum.get('tier') or 1
    # job tier signatures override the setting from the job structure
    # Check the signatures list for any supported lower tiers that have
    # an active exclusion profile.

    result = job_datum.get('result', 'unknown')

    # As stated elsewhere, a job will end up in the lowest tier where its
    # signature belongs.  So if a signature is in Tier-2 and Tier-3, it
    # will end up in 3.
    for tier_info in lower_tier_signatures:
        if signature_hash in tier_info["signatures"]:
            tier = tier_info["tier"]

    try:
        duration = JobDuration.objects.values_list(
            'average_duration', flat=True).get(
                repository=repository, signature=signature_hash)
    except JobDuration.DoesNotExist:
        duration = 0

    submit_time = datetime.fromtimestamp(
        _get_number(job_datum.get('submit_timestamp')))
    start_time = datetime.fromtimestamp(
        _get_number(job_datum.get('start_timestamp')))
    end_time = datetime.fromtimestamp(
        _get_number(job_datum.get('end_timestamp')))

    # first, try to create the job with the given guid (if it doesn't
    # exist yet)
    job_guid_root = get_guid_root(job_guid)
    if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists():
        # this could theoretically throw an exception if we were processing
        # several updates simultaneously, but that should never happen --
        # and if it does it's better just to error out
        Job.objects.create(
            guid=job_guid,
            repository=repository,
            signature=signature,
            build_platform=build_platform,
            machine_platform=machine_platform,
            machine=machine,
            option_collection_hash=option_collection_hash,
            job_type=job_type,
            product=product,
            failure_classification=default_failure_classification,
            who=who,
            reason=reason,
            result=result,
            state=state,
            tier=tier,
            submit_time=submit_time,
            start_time=start_time,
            end_time=end_time,
            last_modified=datetime.now(),
            running_eta=duration,
            push_id=push_id)

    # if the job was pending, there's nothing more to do here
    # (pending jobs have no artifacts, and we would have just created
    # it)
    if state == 'pending':
        return (job_guid, signature_hash)

    # update job (in the case of a buildbot retrigger, we will
    # get the root object and update that to a retry)
    try:
        job = Job.objects.get(guid=job_guid_root)
    except ObjectDoesNotExist:
        job = Job.objects.get(guid=job_guid)
    Job.objects.filter(id=job.id).update(
        guid=job_guid,
        signature=signature,
        build_platform=build_platform,
        machine_platform=machine_platform,
        machine=machine,
        option_collection_hash=option_collection_hash,
        job_type=job_type,
        product=product,
        failure_classification=default_failure_classification,
        who=who,
        reason=reason,
        result=result,
        state=state,
        tier=tier,
        submit_time=submit_time,
        start_time=start_time,
        end_time=end_time,
        last_modified=datetime.now(),
        running_eta=duration,
        push_id=push_id)

    if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]):
        try:
            TaskclusterMetadata.objects.create(
                job=job,
                task_id=job_datum['taskcluster_task_id'],
                retry_id=job_datum['taskcluster_retry_id'])
        except IntegrityError:
            pass
    artifacts = job_datum.get('artifacts', [])

    has_text_log_summary = any(x for x in artifacts
                               if x['name'] == 'text_log_summary')
    if artifacts:
        artifacts = serialize_artifact_json_blobs(artifacts)

        # need to add job guid to artifacts, since they likely weren't
        # present in the beginning
        for artifact in artifacts:
            if not all(k in artifact for k in ("name", "type", "blob")):
                raise ValueError(
                    "Artifact missing properties: {}".format(artifact))
            # Ensure every artifact has a ``job_guid`` value.
            # It is legal to submit an artifact that doesn't have a
            # ``job_guid`` value.  But, if missing, it should inherit that
            # value from the job itself.
            if "job_guid" not in artifact:
                artifact["job_guid"] = job_guid

        store_job_artifacts(artifacts)

    log_refs = job_datum.get('log_references', [])
    if log_refs:

        for log in log_refs:
            name = log.get('name') or 'unknown'
            name = name[0:50]

            url = log.get('url') or 'unknown'
            url = url[0:255]

            # this indicates that a summary artifact was submitted with
            # this job that corresponds to the buildbot_text log url.
            # Therefore, the log does not need parsing.  So we should
            # ensure that it's marked as already parsed.
            if has_text_log_summary and name == 'buildbot_text':
                parse_status = JobLog.PARSED
            else:
                parse_status_map = dict([(k, v) for (v, k) in
                                         JobLog.STATUSES])
                mapped_status = parse_status_map.get(
                    log.get('parse_status'))
                if mapped_status:
                    parse_status = mapped_status
                else:
                    parse_status = JobLog.PENDING

            jl, _ = JobLog.objects.get_or_create(
                job=job, name=name, url=url, defaults={
                    'status': parse_status
                })

            _schedule_log_parsing(jl, result)

    return (job_guid, signature_hash)
예제 #15
0
    def create(self, request, project):
        serialized_artifacts = serialize_artifact_json_blobs(
            request.data)
        store_job_artifacts(serialized_artifacts)

        return Response({'message': 'Artifacts stored successfully'})