def get_history(failure_classification_id, push_date, num_days, option_map,
                repository_ids):
    start_date = push_date - datetime.timedelta(days=num_days)
    end_date = push_date - datetime.timedelta(days=2)
    cache_key = 'failure_history:{}:{}'.format(failure_classification_id,
                                               push_date)
    previous_failures_json = cache.get(cache_key)

    if not previous_failures_json:
        failure_lines = FailureLine.objects.filter(
            job_log__job__result='testfailed',
            job_log__job__tier=1,
            job_log__job__failure_classification_id=failure_classification_id,
            job_log__job__push__repository_id__in=repository_ids,
            job_log__job__push__time__gt=start_date,
            job_log__job__push__time__lt=end_date,
        ).exclude(test=None).select_related(
            'job_log__job__machine_platform', 'job_log__job__push').values(
                'test', 'job_log__job__machine_platform__platform',
                'job_log__job__option_collection_hash').distinct()
        previous_failures = defaultdict(
            lambda: defaultdict(lambda: defaultdict(int)))
        for line in failure_lines:
            previous_failures[clean_test(line['test'])][clean_platform(
                line['job_log__job__machine_platform__platform']
            )][clean_config(
                option_map[line['job_log__job__option_collection_hash']])] += 1

        cache.set(cache_key, json.dumps(previous_failures),
                  ONE_WEEK_IN_SECONDS)
    else:
        previous_failures = json.loads(previous_failures_json)

    return previous_failures, cache_key
Esempio n. 2
0
def get_push_failures(push, option_map):
    # Using .distinct(<fields>) here would help by removing duplicate FailureLines
    # for the same job (with different sub-tests), but it's only supported by
    # postgres.  Just using .distinct() has no effect.
    new_failure_lines = FailureLine.objects.filter(
        action='test_result',
        job_log__job__push=push,
        job_log__job__result='testfailed',
        job_log__job__tier=1).exclude(test=None).select_related(
            'job_log__job__job_type', 'job_log__job__machine_platform')

    # using a dict here to avoid duplicates due to multiple failure_lines for
    # each job.
    tests = {}
    for failure_line in new_failure_lines:
        test_name = clean_test(failure_line.test)
        if not test_name:
            continue
        job = failure_line.job_log.job
        config = clean_config(option_map[job.option_collection_hash])
        platform = clean_platform(job.machine_platform.platform)
        jobName = job.job_type.name
        jobSymbol = job.job_type.symbol
        test_key = '{}{}{}{}'.format(test_name, config, platform, jobName)

        if test_key not in tests:
            line = {
                'testName': test_name,
                'jobName': jobName,
                'jobSymbol': jobSymbol,
                'platform': platform,
                'config': config,
                'key': test_key,
                'failJobs': [],
                'passJobs': [],
                'logLines': [],
                'suggestedClassification': 'New Failure',
                'confidence': 0,
            }
            tests[test_key] = line

        # This ``test`` was either just added above, or already existed in the ``tests``
        # list in a previous iteration through ``failure_lines``
        test = tests[test_key]
        test['logLines'].append(failure_line.to_mozlog_format())
        if not next(
            (find_job
             for find_job in test['failJobs'] if find_job['id'] == job.id),
                False):
            test['failJobs'].append(job_to_dict(job))

    # Each line of the sorted list that is returned here represents one test file per platform/
    # config.  Each line will have at least one failing job, but may have several
    # passing/failing jobs associated with it.
    return sorted(tests.values(), key=lambda k: k['testName'])
def get_push_failures(push, option_map):
    # Using .distinct(<fields>) here would help by removing duplicate FailureLines
    # for the same job (with different sub-tests), but it's only supported by
    # postgres.  Just using .distinct() has no effect.
    new_failure_lines = FailureLine.objects.filter(
        action='test_result',
        job_log__job__push=push,
        job_log__job__result='testfailed',
        job_log__job__tier=1).exclude(test=None).select_related(
            'job_log__job__job_type', 'job_log__job__machine_platform')

    # using a dict here to avoid duplicates due to multiple failure_lines for
    # each job.
    tests = {}
    for failure_line in new_failure_lines:
        test_name = clean_test(failure_line.test)
        if not test_name:
            continue
        job = failure_line.job_log.job
        config = clean_config(option_map[job.option_collection_hash])
        platform = clean_platform(job.machine_platform.platform)
        jobName = job.job_type.name
        jobSymbol = job.job_type.symbol
        test_key = '{}{}{}{}'.format(test_name, config, platform, jobName)

        if test_key not in tests:
            line = {
                'testName': test_name,
                'jobName': jobName,
                'jobSymbol': jobSymbol,
                'platform': platform,
                'config': config,
                'key': test_key,
                'jobs': [],
                'logLines': [],
                'suggestedClassification': 'New Failure',
                'confidence': 0,
            }
            tests[test_key] = line
        test = tests[test_key]
        test['logLines'].append(failure_line.to_mozlog_format())
        if not next(
            (find_job
             for find_job in test['jobs'] if find_job['id'] == job.id), False):
            test['jobs'].append(model_to_dict(job))

    return sorted(tests.values(), key=lambda k: k['testName'])
Esempio n. 4
0
def get_history(failure_classification_id,
                push_date,
                num_days,
                option_map,
                repository_ids,
                force_update=False):
    start_date = push_date - datetime.timedelta(days=num_days)
    end_date = push_date - datetime.timedelta(days=2)
    cache_key = f'{CACHE_KEY_ROOT}:{failure_classification_id}:{push_date}'
    previous_failures_json = cache.get(cache_key)

    if not previous_failures_json or force_update:
        failure_lines = (FailureLine.objects.filter(
            job_log__job__result='testfailed',
            job_log__job__tier__lte=2,
            job_log__job__failure_classification_id=failure_classification_id,
            job_log__job__push__repository_id__in=repository_ids,
            job_log__job__push__time__gt=start_date,
            job_log__job__push__time__lt=end_date,
        ).exclude(test=None).select_related(
            'job_log__job__machine_platform', 'job_log__job__push').values(
                'action',
                'test',
                'signature',
                'message',
                'job_log__job__machine_platform__platform',
                'job_log__job__option_collection_hash',
            ).distinct())
        previous_failures = defaultdict(
            lambda: defaultdict(lambda: defaultdict(int)))
        for line in failure_lines:
            previous_failures[clean_test(
                line['action'], line['test'], line['signature'],
                line['message'])][clean_platform(
                    line['job_log__job__machine_platform__platform']
                )][clean_config(option_map[
                    line['job_log__job__option_collection_hash']])] += 1

        cache.set(cache_key, json.dumps(previous_failures),
                  ONE_WEEK_IN_SECONDS)
    else:
        previous_failures = json.loads(previous_failures_json)

    return previous_failures
Esempio n. 5
0
def get_history(failure_classification_id, push_date, num_days, option_map, repository_ids, force_update=False):
    start_date = push_date - datetime.timedelta(days=num_days)
    end_date = push_date - datetime.timedelta(days=2)
    cache_key = 'failure_history:{}:{}'.format(failure_classification_id, push_date)
    previous_failures_json = cache.get(cache_key)

    if not previous_failures_json or force_update:
        failure_lines = FailureLine.objects.filter(
            job_log__job__result='testfailed',
            job_log__job__tier=1,
            job_log__job__failure_classification_id=failure_classification_id,
            job_log__job__push__repository_id__in=repository_ids,
            job_log__job__push__time__gt=start_date,
            job_log__job__push__time__lt=end_date,
        ).exclude(
            test=None
        ).select_related(
            'job_log__job__machine_platform', 'job_log__job__push'
        ).values(
            'test',
            'job_log__job__machine_platform__platform',
            'job_log__job__option_collection_hash'
        ).distinct()
        previous_failures = defaultdict(lambda: defaultdict(lambda: defaultdict(int)))
        for line in failure_lines:
            previous_failures[
                clean_test(line['test'])
            ][
                clean_platform(line['job_log__job__machine_platform__platform'])
            ][
                clean_config(option_map[line['job_log__job__option_collection_hash']])
            ] += 1

        cache.set(cache_key, json.dumps(previous_failures), ONE_WEEK_IN_SECONDS)
    else:
        previous_failures = json.loads(previous_failures_json)

    return previous_failures, cache_key
Esempio n. 6
0
def test_clean_test(action, test, signature, message, expected):
    assert expected == clean_test(action, test, signature, message)
Esempio n. 7
0
def get_push_failures(push, option_map):
    # Using .distinct(<fields>) here would help by removing duplicate FailureLines
    # for the same job (with different sub-tests), but it's only supported by
    # postgres.  Just using .distinct() has no effect.
    new_failure_lines = FailureLine.objects.filter(
        action='test_result',
        job_log__job__push=push,
        job_log__job__result='testfailed',
        job_log__job__tier=1
    ).exclude(
        test=None
    ).select_related(
        'job_log__job__job_type', 'job_log__job__machine_platform'
    )

    # using a dict here to avoid duplicates due to multiple failure_lines for
    # each job.
    tests = {}
    all_failed_jobs = {}
    for failure_line in new_failure_lines:
        test_name = clean_test(failure_line.test)
        if not test_name:
            continue
        job = failure_line.job_log.job
        config = clean_config(option_map[job.option_collection_hash])
        platform = clean_platform(job.machine_platform.platform)
        job_name = job.job_type.name
        job_symbol = job.job_type.symbol
        job.job_key = '{}{}{}'.format(config, platform, job_name)
        all_failed_jobs[job.id] = job
        test_key = '{}{}{}{}'.format(test_name, config, platform, job_name)

        if test_key not in tests:
            line = {
                'testName': test_name,
                'jobName': job_name,
                'jobSymbol': job_symbol,
                'platform': platform,
                'config': config,
                'key': test_key,
                'jobKey': job.job_key,
                'failJobs': [],
                'passJobs': [],
                'passInFailedJobs': [],  # This test passed in a job that failed for another test
                'logLines': [],
                'suggestedClassification': 'New Failure',
                'confidence': 0,
            }
            tests[test_key] = line

        # This ``test`` was either just added above, or already existed in the ``tests``
        # list in a previous iteration through ``failure_lines``
        test = tests[test_key]
        test['logLines'].append(failure_line.to_mozlog_format())
        if not has_job(job, test['failJobs']):
            test['failJobs'].append(job_to_dict(job))

    # Check each test to find jobs where it passed, even if the job itself failed due to another test
    for test in tests.values():
        for failed_job in all_failed_jobs.values():
            if not has_job(failed_job, test['failJobs']) and test['jobKey'] == failed_job.job_key:
                test['passInFailedJobs'].append(job_to_dict(failed_job))

    # Each line of the sorted list that is returned here represents one test file per platform/
    # config.  Each line will have at least one failing job, but may have several
    # passing/failing jobs associated with it.
    return sorted(tests.values(), key=lambda k: k['testName'])
Esempio n. 8
0
def get_current_test_failures(push, option_map):
    all_testfailed = Job.objects.filter(
        push=push,
        tier__lte=2,
        result='testfailed',
    ).exclude(machine_platform__platform='lint')
    # Using .distinct(<fields>) here would help by removing duplicate FailureLines
    # for the same job (with different sub-tests), but it's only supported by
    # postgres.  Just using .distinct() has no effect.
    new_failure_lines = FailureLine.objects.filter(
        action__in=['test_result', 'log', 'crash'],
        job_log__job__push=push,
        job_log__job__result='testfailed',
        job_log__job__tier__lte=2).select_related(
            'job_log__job__job_type', 'job_log__job__job_group',
            'job_log__job__machine_platform')

    # using a dict here to avoid duplicates due to multiple failure_lines for
    # each job.
    tests = {}
    all_failed_jobs = {}
    for failure_line in new_failure_lines:
        test_name = clean_test(failure_line.action, failure_line.test,
                               failure_line.signature, failure_line.message)
        if not test_name:
            continue
        job = failure_line.job_log.job
        config = clean_config(option_map[job.option_collection_hash])
        platform = clean_platform(job.machine_platform.platform)
        job_name = job.job_type.name
        job_symbol = job.job_type.symbol
        job_group = job.job_group.name
        job_group_symbol = job.job_group.symbol
        job.job_key = '{}{}{}{}'.format(config, platform, job_name, job_group)
        all_failed_jobs[job.id] = job
        test_key = re.sub(
            r'\W+', '', '{}{}{}{}{}'.format(test_name, config, platform,
                                            job_name, job_group))

        if test_key not in tests:
            line = {
                'testName': test_name,
                'action': failure_line.action.split('_')[0],
                'jobName': job_name,
                'jobSymbol': job_symbol,
                'jobGroup': job_group,
                'jobGroupSymbol': job_group_symbol,
                'platform': platform,
                'config': config,
                'key': test_key,
                'jobKey': job.job_key,
                'inProgressJobs': [],
                'failJobs': [],
                'passJobs': [],
                'passInFailedJobs':
                [],  # This test passed in a job that failed for another test
                'logLines': [],
                'suggestedClassification': 'New Failure',
                'confidence': 0,
                'tier': job.tier,
            }
            tests[test_key] = line

        # This ``test`` was either just added above, or already existed in the ``tests``
        # list in a previous iteration through ``failure_lines``
        test = tests[test_key]
        if not has_line(failure_line, test['logLines']):
            test['logLines'].append(failure_line.to_mozlog_format())

        if not has_job(job, test['failJobs']):
            test['failJobs'].append(job_to_dict(job))

    # Check each test to find jobs where it passed, even if the job itself failed due to another test
    for test in tests.values():
        for failed_job in all_failed_jobs.values():
            if not has_job(
                    failed_job,
                    test['failJobs']) and test['jobKey'] == failed_job.job_key:
                test['passInFailedJobs'].append(job_to_dict(failed_job))

    # filter out testfailed jobs that are supported by failureline to get unsupported jobs
    supported_job_ids = all_failed_jobs.keys()
    unsupported_jobs = [
        job_to_dict(job) for job in all_testfailed
        if job.id not in supported_job_ids
    ]

    # Each line of the sorted list that is returned here represents one test file per platform/
    # config.  Each line will have at least one failing job, but may have several
    # passing/failing jobs associated with it.
    return (sorted(tests.values(),
                   key=lambda k: k['testName']), unsupported_jobs)
Esempio n. 9
0
def get_current_test_failures(push, option_map, jobs):
    # Using .distinct(<fields>) here would help by removing duplicate FailureLines
    # for the same job (with different sub-tests), but it's only supported by
    # postgres.  Just using .distinct() has no effect.
    new_failure_lines = FailureLine.objects.filter(
        action__in=['test_result', 'log', 'crash'],
        job_log__job__push=push,
        job_log__job__result='testfailed',
        job_log__job__tier__lte=2,
    ).select_related(
        'job_log__job__job_type',
        'job_log__job__job_group',
        'job_log__job__machine_platform',
        'job_log__job__taskcluster_metadata',
    )
    # using a dict here to avoid duplicates due to multiple failure_lines for
    # each job.
    tests = {}
    all_failed_jobs = {}
    for failure_line in new_failure_lines:
        test_name = clean_test(
            failure_line.action, failure_line.test, failure_line.signature, failure_line.message
        )
        if not test_name:
            continue
        job = failure_line.job_log.job
        config = clean_config(option_map[job.option_collection_hash])
        platform = clean_platform(job.machine_platform.platform)
        job_name = job.job_type.name
        job_symbol = job.job_type.symbol
        job_group = job.job_group.name
        job_group_symbol = job.job_group.symbol
        job.job_key = '{}{}{}{}'.format(config, platform, job_name, job_group)
        all_failed_jobs[job.id] = job
        # The 't' ensures the key starts with a character, as required for a query selector
        test_key = re.sub(
            r'\W+', '', 't{}{}{}{}{}'.format(test_name, config, platform, job_name, job_group)
        )
        countPassed = len(list(filter(lambda x: x['result'] == 'success', jobs[job_name])))
        passFailRatio = (
            countPassed / countPassed
            + len(list(filter(lambda x: x['result'] == 'testfailed', jobs[job_name])))
            if countPassed
            else 0
        )
        isClassifiedIntermittent = any(
            job['failure_classification_id'] == 4 for job in jobs[job_name]
        )

        if test_key not in tests:
            line = {
                'testName': test_name,
                'action': failure_line.action.split('_')[0],
                'jobName': job_name,
                'jobSymbol': job_symbol,
                'jobGroup': job_group,
                'jobGroupSymbol': job_group_symbol,
                'platform': platform,
                'config': config,
                'key': test_key,
                'jobKey': job.job_key,
                'suggestedClassification': 'New Failure',
                'confidence': 0,
                'tier': job.tier,
                'failedInParent': False,
                'passFailRatio': passFailRatio,
                'isClassifiedIntermittent': isClassifiedIntermittent,
            }
            tests[test_key] = line

    # Each line of the sorted list that is returned here represents one test file per platform/
    # config.  Each line will have at least one failing job, but may have several
    # passing/failing jobs associated with it.
    return sorted(tests.values(), key=lambda k: k['testName'])
Esempio n. 10
0
def get_test_failures(push, failed_jobs, likely_regression_labels,
                      result_status):
    # option_map is used to map platforms for the job.option_collection_hash
    option_map = OptionCollection.objects.get_option_collection_map()
    failed_job_labels = list(failed_jobs.keys())
    # using a dict here to avoid duplicates due to multiple failure_lines for
    # each job.
    regressions = {
        'tests': {},
        'unstructuredFailures': [],
    }
    known_issues = {
        'tests': {},
        'unstructuredFailures': [],
    }

    if not len(failed_job_labels):
        return ('none', {
            'needInvestigation': regressions,
            'knownIssues': known_issues
        })

    failure_lines = (FailureLine.objects.filter(
        action__in=['test_result', 'log', 'crash'],
        job_log__job__push=push,
        job_log__job__job_type__name__in=failed_job_labels,
        job_log__job__result='testfailed',
    ).select_related(
        'job_log',
        'job_log__job',
        'job_log__job__job_type',
    ).values('job_log__job__job_type__name', 'test', 'signature', 'message',
             'action'))

    # using a dict here to avoid duplicates due to multiple failure_lines for
    # each job.
    regressions = {
        'tests': {},
        'unstructuredFailures': [],
    }
    known_issues = {
        'tests': {},
        'unstructuredFailures': [],
    }
    investigatedTests = InvestigatedTests.objects.filter(push=push)
    # Keep track of these so that we can add them to the 'otherJobs'
    labels_without_failure_lines = failed_job_labels.copy()

    for failure_line in failure_lines:
        test_name = clean_test(failure_line['test'], failure_line['signature'],
                               failure_line['message'])
        if not test_name:
            continue
        action = failure_line['action'].split('_')[0]
        job_name = failure_line['job_log__job__job_type__name']

        classification = known_issues

        if job_name in likely_regression_labels:
            classification = regressions

        if job_name in labels_without_failure_lines:
            labels_without_failure_lines.remove(job_name)

        line = get_line(test_name, action, failed_jobs[job_name][0],
                        option_map, investigatedTests)
        if line['key'] not in classification['tests']:
            classification['tests'][line['key']] = line

    # Any labels that were not in a FailureLine should go into the appropriate bucket 'otherJobs' list.
    for label in labels_without_failure_lines:
        bucket = regressions if label in likely_regression_labels else known_issues
        bucket['unstructuredFailures'].append(
            get_line(None, None, failed_jobs[label][0], option_map,
                     investigatedTests))

    regressions['tests'] = regressions['tests'].values()
    known_issues['tests'] = known_issues['tests'].values()

    result = 'pass'
    if len(regressions['tests']):
        result = 'fail'
    elif 'unknown' in result_status:
        result = 'unknown'

    return (result, {
        'needInvestigation': regressions,
        'knownIssues': known_issues
    })