Python store_result_set_data Examples, treeherder.etl.resultset.store_result_set_data Python Examples

Example #1

0

Show file

File: resultset.py Project: SebastinSanty/treeherder

    def create(self, request, project):
        """
        POST method implementation
        """
        try:
            repository = Repository.objects.get(name=project)
        except Repository.DoesNotExist:
            return Response({
                "detail": "No project with name {}".format(project)
            }, status=HTTP_404_NOT_FOUND)

        # check if any revisions are shorter than the expected 40 characters
        # The volume of resultsets is fairly low, so this loop won't be
        # onerous.
        for resultset in request.data:
            for revision in resultset['revisions']:
                try:
                    if len(revision['revision']) < 40:
                        raise ValueError("Revision < 40 characters")
                except ValueError:
                    # The id of the submitter will be automatically included
                    # in the params via the ``hawk_lookup`` call
                    params = {
                        "revision": revision["revision"]
                    }
                    newrelic.agent.record_exception(params=params)

        store_result_set_data(repository, request.data)

        return Response({"message": "well-formed JSON stored"})

Example #2

0

Show file

def test_create_error_summary(failure_classifications, jobs_with_local_log,
                              sample_resultset, test_repository):
    """
    check that a bug suggestions artifact gets inserted when running
    a parse_log task for a failed job, and that the number of
    bug search terms/suggestions matches the number of error lines.
    """
    store_result_set_data(test_repository, sample_resultset)

    jobs = jobs_with_local_log
    for job in jobs:
        job['job']['result'] = "testfailed"
        job['revision'] = sample_resultset[0]['revision']

    store_job_data(test_repository, jobs)

    bug_suggestions = get_error_summary(Job.objects.get(id=1))

    # we must have one bugs item per error in bug_suggestions.
    # errors with no bug suggestions will just have an empty
    # bugs list
    assert TextLogError.objects.count() == len(bug_suggestions)

    # We really need to add some tests that check the values of each entry
    # in bug_suggestions, but for now this is better than nothing.
    expected_keys = set(["search", "search_terms", "bugs"])
    for failure_line in bug_suggestions:
        assert set(failure_line.keys()) == expected_keys

Example #3

0

Show file

File: test_resultset_api.py Project: SebastinSanty/treeherder

def test_resultset_list_single_long_revision_stored_long(webapp, sample_resultset, test_repository):
    """
    test retrieving a resultset list with store long revision, filtered by a single long revision
    """
    long_revision = "21fb3eed1b5f3456789012345678901234567890"

    # store a resultset with long revision
    resultset = copy.deepcopy(sample_resultset[0])
    resultset["revisions"][0]["revision"] = long_revision
    store_result_set_data(test_repository, [resultset])

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name}),
        {"revision": long_revision}
    )
    assert resp.status_int == 200
    results = resp.json['results']
    meta = resp.json['meta']
    assert len(results) == 1
    assert set([rs["revision"] for rs in results]) == {sample_resultset[0]['revision']}
    assert(meta == {
        'count': 1,
        'revision': long_revision,
        'filter_params': {
            'revisions_long_revision': long_revision
        },
        'repository': test_repository.name}
    )

Example #4

0

Show file

    def create(self, request, project):
        """
        POST method implementation
        """
        try:
            repository = Repository.objects.get(name=project)
        except Repository.DoesNotExist:
            return Response(
                {"detail": "No project with name {}".format(project)},
                status=HTTP_404_NOT_FOUND)

        # check if any revisions are shorter than the expected 40 characters
        # The volume of resultsets is fairly low, so this loop won't be
        # onerous.
        for resultset in request.data:
            for revision in resultset['revisions']:
                try:
                    if len(revision['revision']) < 40:
                        raise ValueError("Revision < 40 characters")
                except ValueError:
                    # The id of the submitter will be automatically included
                    # in the params via the ``hawk_lookup`` call
                    params = {"revision": revision["revision"]}
                    newrelic.agent.record_exception(params=params)

        store_result_set_data(repository, request.data)

        return Response({"message": "well-formed JSON stored"})

Example #5

0

Show file

File: test_resultset_api.py Project: SebastinSanty/treeherder

def test_resultset_create(test_repository, sample_resultset,
                          mock_post_json):
    """
    test posting data to the resultset endpoint via webtest.
    extected result are:
    - return code 200
    - return message successful
    - 1 resultset stored in the jobs schema
    """

    assert Push.objects.count() == 0

    # store the first two, so we submit all, but should properly not re-
    # add the others.
    store_result_set_data(test_repository, sample_resultset[:2])
    assert Push.objects.count() == 2

    trsc = TreeherderResultSetCollection()
    exp_revision_hashes = set()
    for rs in sample_resultset:
        rs.update({'author': 'John Doe'})
        result_set = trsc.get_resultset(rs)
        trsc.add(result_set)
        exp_revision_hashes.add(rs["revision"])

    test_utils.post_collection(test_repository.name, trsc)

    assert Push.objects.count() == len(sample_resultset)
    assert set(Push.objects.values_list('revision', flat=True)) == set(
        [rs['revision'] for rs in sample_resultset])

Example #6

0

Show file

File: resultset_loader.py Project: kmoir/treeherder

    def process(self, message_body, exchange):
        try:
            transformer = self.get_transformer_class(exchange)(message_body)
            try:
                repo = Repository.objects.get(url=transformer.repo_url,
                                              branch=transformer.branch,
                                              active_status="active")

            except ObjectDoesNotExist:
                repo_info = message_body.get("details",
                                             message_body["payload"])
                newrelic.agent.record_custom_event("skip_unknown_repository",
                                                   repo_info)
                logger.warn("Skipping unsupported repo: {} {}".format(
                    transformer.repo_url,
                    transformer.branch))
                return

            transformed_data = transformer.transform(repo.name)

            logger.info("Storing resultset for {} {} {}".format(
                repo.name,
                transformer.repo_url,
                transformer.branch))
            store_result_set_data(repo, [transformed_data])

        except Exception as ex:
            newrelic.agent.record_exception(exc=ex)
            logger.exception("Error transforming resultset", exc_info=ex)

Example #7

0

Show file

File: resultset_loader.py Project: huokedu/treeherder

    def process(self, message_body, exchange):
        try:
            transformer = self.get_transformer_class(exchange)(message_body)
            try:
                repo = Repository.objects.get(url=transformer.repo_url,
                                              branch=transformer.branch,
                                              active_status="active")

            except ObjectDoesNotExist:
                repo_info = message_body.get("details",
                                             message_body["payload"])
                newrelic.agent.record_custom_event("skip_unknown_repository",
                                                   repo_info)
                logger.warn("Skipping unsupported repo: {} {}".format(
                    transformer.repo_url, transformer.branch))
                return

            transformed_data = transformer.transform(repo.name)

            logger.info("Storing resultset for {} {} {}".format(
                repo.name, transformer.repo_url, transformer.branch))
            store_result_set_data(repo, [transformed_data])

        except Exception as ex:
            newrelic.agent.record_exception(exc=ex)
            logger.exception("Error transforming resultset", exc_info=ex)

Example #8

0

Show file

File: test_tasks.py Project: SebastinSanty/treeherder

def test_create_error_summary(failure_classifications,
                              jobs_with_local_log, sample_resultset,
                              test_repository):
    """
    check that a bug suggestions artifact gets inserted when running
    a parse_log task for a failed job, and that the number of
    bug search terms/suggestions matches the number of error lines.
    """
    store_result_set_data(test_repository, sample_resultset)

    jobs = jobs_with_local_log
    for job in jobs:
        job['job']['result'] = "testfailed"
        job['revision'] = sample_resultset[0]['revision']

    store_job_data(test_repository, jobs)

    bug_suggestions = get_error_summary(Job.objects.get(id=1))

    # we must have one bugs item per error in bug_suggestions.
    # errors with no bug suggestions will just have an empty
    # bugs list
    assert TextLogError.objects.count() == len(bug_suggestions)

    # We really need to add some tests that check the values of each entry
    # in bug_suggestions, but for now this is better than nothing.
    expected_keys = set(["search", "search_terms", "bugs"])
    for failure_line in bug_suggestions:
        assert set(failure_line.keys()) == expected_keys

Example #9

0

Show file

File: conftest.py Project: SebastinSanty/treeherder

def push_with_three_jobs(sample_data, sample_resultset, test_repository):
    """
    Stores a number of jobs in the same resultset.
    """
    num_jobs = 3
    resultset = sample_resultset[0]
    jobs = copy.deepcopy(sample_data.job_data[0:num_jobs])

    # Only store data for the first resultset....
    store_result_set_data(test_repository, [resultset])

    blobs = []
    for index, blob in enumerate(jobs):
        # Modify job structure to sync with the resultset sample data
        if 'sources' in blob:
            del blob['sources']

        # Skip log references since they do not work correctly in pending state.
        if 'log_references' in blob['job']:
            del blob['job']['log_references']

        blob['revision'] = resultset['revision']
        blob['job']['state'] = 'pending'
        blobs.append(blob)

    # Store and process the jobs so they are present in the tables.
    store_job_data(test_repository, blobs)
    return Push.objects.get(repository=test_repository,
                            revision=resultset['revision'])

Example #10

0

Show file

File: test_job_ingestion.py Project: SebastinSanty/treeherder

def test_ingest_running_to_retry_to_success_sample_job(test_repository,
                                                       failure_classifications,
                                                       sample_data,
                                                       sample_resultset,
                                                       mock_log_parser,
                                                       ingestion_cycles):
    # verifies that retries to success work, no matter how jobs are batched
    store_result_set_data(test_repository, sample_resultset)

    job_datum = copy.deepcopy(sample_data.job_data[0])
    job_datum['revision'] = sample_resultset[0]['revision']

    job = job_datum['job']
    job_guid_root = job['job_guid']

    job_data = []
    for (state, result, job_guid) in [
            ('running', 'unknown', job_guid_root),
            ('completed', 'retry',
             job_guid_root + "_" + str(job['end_timestamp'])[-5:]),
            ('completed', 'success', job_guid_root)]:
        new_job_datum = copy.deepcopy(job_datum)
        new_job_datum['job']['state'] = state
        new_job_datum['job']['result'] = result
        new_job_datum['job']['job_guid'] = job_guid
        job_data.append(new_job_datum)

    for (i, j) in ingestion_cycles:
        store_job_data(test_repository, job_data[i:j])

    assert Job.objects.count() == 2
    assert Job.objects.get(id=1).result == 'retry'
    assert Job.objects.get(id=2).result == 'success'
    assert JobLog.objects.count() == 2

Example #11

0

Show file

File: conftest.py Project: SebastinSanty/treeherder

def eleven_job_blobs(sample_data, sample_resultset, test_repository, mock_log_parser):
    store_result_set_data(test_repository, sample_resultset)

    num_jobs = 11
    jobs = sample_data.job_data[0:num_jobs]

    max_index = len(sample_resultset) - 1
    resultset_index = 0

    blobs = []
    for index, blob in enumerate(jobs):

        if resultset_index > max_index:
            resultset_index = 0

        # Modify job structure to sync with the resultset sample data
        if 'sources' in blob:
            del blob['sources']

        blob['revision'] = sample_resultset[resultset_index]['revision']

        blobs.append(blob)

        resultset_index += 1
    return blobs

Example #12

0

Show file

File: conftest.py Project: imbstack/treeherder

def eleven_job_blobs(sample_data, sample_resultset, test_repository,
                     mock_log_parser):
    store_result_set_data(test_repository, sample_resultset)

    num_jobs = 11
    jobs = sample_data.job_data[0:num_jobs]

    max_index = len(sample_resultset) - 1
    resultset_index = 0

    blobs = []
    for index, blob in enumerate(jobs):

        if resultset_index > max_index:
            resultset_index = 0

        # Modify job structure to sync with the resultset sample data
        if 'sources' in blob:
            del blob['sources']

        blob['revision'] = sample_resultset[resultset_index]['revision']

        blobs.append(blob)

        resultset_index += 1
    return blobs

Example #13

0

Show file

    def process(self, message_body, exchange):
        transformer = self.get_transformer_class(exchange)(message_body)
        try:
            newrelic.agent.add_custom_parameter("url", transformer.repo_url)
            newrelic.agent.add_custom_parameter("branch", transformer.branch)
            repo = Repository.objects.get(url=transformer.repo_url,
                                          branch=transformer.branch,
                                          active_status="active")
            newrelic.agent.add_custom_parameter("repository", repo.name)

        except ObjectDoesNotExist:
            repo_info = transformer.get_info()
            repo_info.update({
                "url": transformer.repo_url,
                "branch": transformer.branch,
            })
            newrelic.agent.record_custom_event("skip_unknown_repository",
                                               repo_info)
            logger.warn("Skipping unsupported repo: {} {}".format(
                transformer.repo_url, transformer.branch))
            return

        transformed_data = transformer.transform(repo.name)

        logger.info("Storing resultset for {} {} {}".format(
            repo.name, transformer.repo_url, transformer.branch))
        store_result_set_data(repo, [transformed_data])

Example #14

0

Show file

File: conftest.py Project: imbstack/treeherder

def push_with_three_jobs(sample_data, sample_resultset, test_repository):
    """
    Stores a number of jobs in the same resultset.
    """
    num_jobs = 3
    resultset = sample_resultset[0]
    jobs = copy.deepcopy(sample_data.job_data[0:num_jobs])

    # Only store data for the first resultset....
    store_result_set_data(test_repository, [resultset])

    blobs = []
    for index, blob in enumerate(jobs):
        # Modify job structure to sync with the resultset sample data
        if 'sources' in blob:
            del blob['sources']

        # Skip log references since they do not work correctly in pending state.
        if 'log_references' in blob['job']:
            del blob['job']['log_references']

        blob['revision'] = resultset['revision']
        blob['job']['state'] = 'pending'
        blobs.append(blob)

    # Store and process the jobs so they are present in the tables.
    store_job_data(test_repository, blobs)
    return Push.objects.get(repository=test_repository,
                            revision=resultset['revision'])

Example #15

0

Show file

def test_resultset_create(test_repository, sample_resultset, mock_post_json):
    """
    test posting data to the resultset endpoint via webtest.
    extected result are:
    - return code 200
    - return message successful
    - 1 resultset stored in the jobs schema
    """

    assert Push.objects.count() == 0

    # store the first two, so we submit all, but should properly not re-
    # add the others.
    store_result_set_data(test_repository, sample_resultset[:2])
    assert Push.objects.count() == 2

    trsc = TreeherderResultSetCollection()
    exp_revision_hashes = set()
    for rs in sample_resultset:
        rs.update({'author': 'John Doe'})
        result_set = trsc.get_resultset(rs)
        trsc.add(result_set)
        exp_revision_hashes.add(rs["revision"])

    test_utils.post_collection(test_repository.name, trsc)

    assert Push.objects.count() == len(sample_resultset)
    assert set(Push.objects.values_list('revision', flat=True)) == set(
        [rs['revision'] for rs in sample_resultset])

Example #16

0

Show file

def test_resultset_list_single_long_revision_stored_long(
        webapp, sample_resultset, test_repository):
    """
    test retrieving a resultset list with store long revision, filtered by a single long revision
    """
    long_revision = "21fb3eed1b5f3456789012345678901234567890"

    # store a resultset with long revision
    resultset = copy.deepcopy(sample_resultset[0])
    resultset["revisions"][0]["revision"] = long_revision
    store_result_set_data(test_repository, [resultset])

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name}),
        {"revision": long_revision})
    assert resp.status_int == 200
    results = resp.json['results']
    meta = resp.json['meta']
    assert len(results) == 1
    assert set([rs["revision"]
                for rs in results]) == {sample_resultset[0]['revision']}
    assert (meta == {
        'count': 1,
        'revision': long_revision,
        'filter_params': {
            'revisions_long_revision': long_revision
        },
        'repository': test_repository.name
    })

Example #17

0

Show file

File: test_job_ingestion.py Project: ydidwania/treeherder

def test_ingest_running_to_retry_to_success_sample_job(
        test_repository, failure_classifications, sample_data,
        sample_resultset, mock_log_parser, ingestion_cycles):
    # verifies that retries to success work, no matter how jobs are batched
    store_result_set_data(test_repository, sample_resultset)

    job_datum = copy.deepcopy(sample_data.job_data[0])
    job_datum['revision'] = sample_resultset[0]['revision']

    job = job_datum['job']
    job_guid_root = job['job_guid']

    job_data = []
    for (state, result,
         job_guid) in [('running', 'unknown', job_guid_root),
                       ('completed', 'retry',
                        job_guid_root + "_" + str(job['end_timestamp'])[-5:]),
                       ('completed', 'success', job_guid_root)]:
        new_job_datum = copy.deepcopy(job_datum)
        new_job_datum['job']['state'] = state
        new_job_datum['job']['result'] = result
        new_job_datum['job']['job_guid'] = job_guid
        job_data.append(new_job_datum)

    for (i, j) in ingestion_cycles:
        store_job_data(test_repository, job_data[i:j])

    assert Job.objects.count() == 2
    assert Job.objects.get(id=1).result == 'retry'
    assert Job.objects.get(id=2).result == 'success'
    assert JobLog.objects.count() == 2

Example #18

0

Show file

File: test_job_ingestion.py Project: ydidwania/treeherder

def test_bad_date_value_ingestion(test_repository, failure_classifications,
                                  sample_resultset, mock_log_parser):
    """
    Test ingesting a job blob with bad date value

    """
    blob = job_data(start_timestamp="foo",
                    revision=sample_resultset[0]['revision'])

    store_result_set_data(test_repository, sample_resultset[:1])
    store_job_data(test_repository, [blob])

Example #19

0

Show file

File: test_job_ingestion.py Project: SebastinSanty/treeherder

def test_bad_date_value_ingestion(test_repository, failure_classifications,
                                  sample_resultset, mock_log_parser):
    """
    Test ingesting a job blob with bad date value

    """
    blob = job_data(start_timestamp="foo",
                    revision=sample_resultset[0]['revision'])

    store_result_set_data(test_repository, sample_resultset[:1])
    store_job_data(test_repository, [blob])

Example #20

0

Show file

def test_resultset_list_empty_rs_still_show(webapp, sample_resultset,
                                            test_repository):
    """
    test retrieving a resultset list, when the resultset has no jobs.
    should show.
    """
    store_result_set_data(test_repository, sample_resultset)

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name}), )
    assert resp.status_int == 200
    assert len(resp.json['results']) == 10

Example #21

0

Show file

File: test_resultset_api.py Project: SebastinSanty/treeherder

def test_resultset_list_empty_rs_still_show(webapp, sample_resultset, test_repository):
    """
    test retrieving a resultset list, when the resultset has no jobs.
    should show.
    """
    store_result_set_data(test_repository, sample_resultset)

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name}),
    )
    assert resp.status_int == 200
    assert len(resp.json['results']) == 10

Example #22

0

Show file

File: test_tasks.py Project: SebastinSanty/treeherder

def test_parse_log(test_repository, failure_classifications, jobs_with_local_log, sample_resultset):
    """
    check that 2 job_artifacts get inserted when running a parse_log task for
    a successful job and that JobDetail objects get created
    """

    store_result_set_data(test_repository, sample_resultset)

    jobs = jobs_with_local_log
    for job in jobs:
        # make this a successful job, to check it's still parsed for errors
        job['job']['result'] = "success"
        job['revision'] = sample_resultset[0]['revision']

    store_job_data(test_repository, jobs)

    # this log generates 4 job detail objects at present
    print JobDetail.objects.count() == 4

Example #23

0

Show file

File: test_job_ingestion.py Project: ydidwania/treeherder

def test_ingest_running_to_retry_sample_job(test_repository,
                                            failure_classifications,
                                            sample_data, sample_resultset,
                                            mock_log_parser,
                                            same_ingestion_cycle):
    """Process a single job structure in the job_data.txt file"""
    store_result_set_data(test_repository, sample_resultset)

    job_data = copy.deepcopy(sample_data.job_data[:1])
    job = job_data[0]['job']
    job_data[0]['revision'] = sample_resultset[0]['revision']
    job['state'] = 'running'
    job['result'] = 'unknown'

    def _simulate_retry_job(job):
        job['state'] = 'completed'
        job['result'] = 'retry'
        # convert the job_guid to what it would be on a retry
        job['job_guid'] = job['job_guid'] + "_" + str(
            job['end_timestamp'])[-5:]
        return job

    if same_ingestion_cycle:
        # now we simulate the complete version of the job coming in (on the
        # same push)
        new_job_datum = copy.deepcopy(job_data[0])
        new_job_datum['job'] = _simulate_retry_job(new_job_datum['job'])
        job_data.append(new_job_datum)
        store_job_data(test_repository, job_data)
    else:
        # store the job in the initial state
        store_job_data(test_repository, job_data)

        # now we simulate the complete version of the job coming in and
        # ingest a second time
        job = _simulate_retry_job(job)
        store_job_data(test_repository, job_data)

    assert Job.objects.count() == 1
    job = Job.objects.get(id=1)
    assert job.result == 'retry'
    # guid should be the retry one
    assert job.guid == job_data[-1]['job']['job_guid']

Example #24

0

Show file

File: test_job_ingestion.py Project: SebastinSanty/treeherder

def test_ingest_running_to_retry_sample_job(test_repository,
                                            failure_classifications,
                                            sample_data,
                                            sample_resultset,
                                            mock_log_parser,
                                            same_ingestion_cycle):
    """Process a single job structure in the job_data.txt file"""
    store_result_set_data(test_repository, sample_resultset)

    job_data = copy.deepcopy(sample_data.job_data[:1])
    job = job_data[0]['job']
    job_data[0]['revision'] = sample_resultset[0]['revision']
    job['state'] = 'running'
    job['result'] = 'unknown'

    def _simulate_retry_job(job):
        job['state'] = 'completed'
        job['result'] = 'retry'
        # convert the job_guid to what it would be on a retry
        job['job_guid'] = job['job_guid'] + "_" + str(job['end_timestamp'])[-5:]
        return job

    if same_ingestion_cycle:
        # now we simulate the complete version of the job coming in (on the
        # same push)
        new_job_datum = copy.deepcopy(job_data[0])
        new_job_datum['job'] = _simulate_retry_job(new_job_datum['job'])
        job_data.append(new_job_datum)
        store_job_data(test_repository, job_data)
    else:
        # store the job in the initial state
        store_job_data(test_repository, job_data)

        # now we simulate the complete version of the job coming in and
        # ingest a second time
        job = _simulate_retry_job(job)
        store_job_data(test_repository, job_data)

    assert Job.objects.count() == 1
    job = Job.objects.get(id=1)
    assert job.result == 'retry'
    # guid should be the retry one
    assert job.guid == job_data[-1]['job']['job_guid']

Example #25

0

Show file

def test_parse_log(test_repository, failure_classifications,
                   jobs_with_local_log, sample_resultset):
    """
    check that 2 job_artifacts get inserted when running a parse_log task for
    a successful job and that JobDetail objects get created
    """

    store_result_set_data(test_repository, sample_resultset)

    jobs = jobs_with_local_log
    for job in jobs:
        # make this a successful job, to check it's still parsed for errors
        job['job']['result'] = "success"
        job['revision'] = sample_resultset[0]['revision']

    store_job_data(test_repository, jobs)

    # this log generates 4 job detail objects at present
    print JobDetail.objects.count() == 4

Example #26

0

Show file

File: test_job_ingestion.py Project: SebastinSanty/treeherder

def test_ingest_job_revision_hash_blank_revision(test_repository,
                                                 failure_classifications,
                                                 sample_data, mock_log_parser,
                                                 sample_resultset):

    # Given a resultset with a revision_hash value that is NOT the
    # top revision SHA, ingest a job with a different revision_hash, but a
    # matching revision SHA.  Ensure the job still goes to the right resultset.
    rs_revision_hash = "12345abc"
    resultset = sample_resultset[0].copy()
    resultset["revision_hash"] = rs_revision_hash
    store_result_set_data(test_repository, [resultset])

    first_job = sample_data.job_data[0]
    first_job["revision_hash"] = rs_revision_hash
    first_job["revision"] = ""
    store_job_data(test_repository, [first_job])

    assert Job.objects.count() == 1
    assert Job.objects.get(id=1).push_id == Push.objects.values_list(
        'id', flat=True).get(revision_hash=rs_revision_hash)

Example #27

0

Show file

File: test_job_ingestion.py Project: ydidwania/treeherder

def test_ingest_job_revision_hash_blank_revision(test_repository,
                                                 failure_classifications,
                                                 sample_data, mock_log_parser,
                                                 sample_resultset):

    # Given a resultset with a revision_hash value that is NOT the
    # top revision SHA, ingest a job with a different revision_hash, but a
    # matching revision SHA.  Ensure the job still goes to the right resultset.
    rs_revision_hash = "12345abc"
    resultset = sample_resultset[0].copy()
    resultset["revision_hash"] = rs_revision_hash
    store_result_set_data(test_repository, [resultset])

    first_job = sample_data.job_data[0]
    first_job["revision_hash"] = rs_revision_hash
    first_job["revision"] = ""
    store_job_data(test_repository, [first_job])

    assert Job.objects.count() == 1
    assert Job.objects.get(id=1).push_id == Push.objects.values_list(
        'id', flat=True).get(revision_hash=rs_revision_hash)

Example #28

0

Show file

def test_resultset_list_filter_by_date(webapp, test_repository,
                                       sample_resultset):
    """
    test retrieving a resultset list, filtered by a date range
    """
    for (i, datestr) in zip(
        [3, 4, 5, 6, 7],
        ["2013-08-09", "2013-08-10", "2013-08-11", "2013-08-12", "2013-08-13"
         ]):
        sample_resultset[i]['push_timestamp'] = utils.to_timestamp(
            utils.to_datetime(datestr))

    store_result_set_data(test_repository, sample_resultset)

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name}), {
            "startdate": "2013-08-10",
            "enddate": "2013-08-13"
        })
    assert resp.status_int == 200
    results = resp.json['results']
    meta = resp.json['meta']
    assert len(results) == 4
    assert set([rs["revision"] for rs in results]) == {
        u'ce17cad5d554cfffddee13d1d8421ae9ec5aad82',
        u'7f417c3505e3d2599ac9540f02e3dbee307a3963',
        u'a69390334818373e2d7e6e9c8d626a328ed37d47',
        u'f361dcb60bbedaa01257fbca211452972f7a74b2'
    }
    assert (meta == {
        u'count': 4,
        u'enddate': u'2013-08-13',
        u'filter_params': {
            u'push_timestamp__gte': 1376092800.0,
            u'push_timestamp__lt': 1376438400.0
        },
        u'repository': test_repository.name,
        u'startdate': u'2013-08-10'
    })

Example #29

0

Show file

def test_resultset_list_without_jobs(webapp, test_repository,
                                     sample_resultset):
    """
    test retrieving a resultset list without jobs
    """
    store_result_set_data(test_repository, sample_resultset)

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name}))
    assert resp.status_int == 200

    results = resp.json['results']
    assert len(results) == 10
    assert all([('platforms' not in result) for result in results])

    meta = resp.json['meta']

    assert meta == {
        u'count': len(results),
        u'filter_params': {},
        u'repository': test_repository.name
    }

Example #30

0

Show file

File: test_job_ingestion.py Project: SebastinSanty/treeherder

def test_ingest_running_to_retry_to_success_sample_job_multiple_retries(
        test_repository, failure_classifications, sample_data, sample_resultset,
        mock_log_parser, ingestion_cycles):
    # this verifies that if we ingest multiple retries:
    # (1) nothing errors out
    # (2) we end up with three jobs (the original + 2 retry jobs)

    store_result_set_data(test_repository, sample_resultset)

    job_datum = copy.deepcopy(sample_data.job_data[0])
    job_datum['revision'] = sample_resultset[0]['revision']

    job = job_datum['job']
    job_guid_root = job['job_guid']

    job_data = []
    for (state, result, job_guid) in [
            ('running', 'unknown', job_guid_root),
            ('completed', 'retry',
             job_guid_root + "_" + str(job['end_timestamp'])[-5:]),
            ('completed', 'retry',
             job_guid_root + "_12345"),
            ('completed', 'success', job_guid_root)]:
        new_job_datum = copy.deepcopy(job_datum)
        new_job_datum['job']['state'] = state
        new_job_datum['job']['result'] = result
        new_job_datum['job']['job_guid'] = job_guid
        job_data.append(new_job_datum)

    for (i, j) in ingestion_cycles:
        ins = job_data[i:j]
        store_job_data(test_repository, ins)

    assert Job.objects.count() == 3
    assert Job.objects.get(id=1).result == 'retry'
    assert Job.objects.get(id=2).result == 'retry'
    assert Job.objects.get(id=3).result == 'success'
    assert JobLog.objects.count() == 3

Example #31

0

Show file

File: test_job_ingestion.py Project: ydidwania/treeherder

def test_ingest_job_with_revision_hash(test_repository,
                                       failure_classifications, sample_data,
                                       mock_log_parser, sample_resultset):
    """
    Test ingesting a job with only a revision hash, no revision.  And the
    revision_hash must NOT be the same SHA value as the top revision.

    This can happen if a user submits a new resultset in the API with their
    own revision_hash value.  If we just use the latest revision value, then
    their subsequent job submissions with the revision_hash they generated
    will fail and the jobs will be skipped.
    """
    revision_hash = "12345abc"
    resultset = sample_resultset[0].copy()
    resultset["revision_hash"] = revision_hash
    store_result_set_data(test_repository, [resultset])

    first_job = sample_data.job_data[0]
    first_job["revision_hash"] = revision_hash
    del first_job["revision"]
    store_job_data(test_repository, [first_job])

    assert Job.objects.count() == 1

Example #32

0

Show file

File: test_resultset_api.py Project: SebastinSanty/treeherder

def test_resultset_list_without_jobs(webapp, test_repository,
                                     sample_resultset):
    """
    test retrieving a resultset list without jobs
    """
    store_result_set_data(test_repository, sample_resultset)

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name})
    )
    assert resp.status_int == 200

    results = resp.json['results']
    assert len(results) == 10
    assert all([('platforms' not in result) for result in results])

    meta = resp.json['meta']

    assert meta == {
        u'count': len(results),
        u'filter_params': {},
        u'repository': test_repository.name
    }

Example #33

0

Show file

File: test_job_ingestion.py Project: SebastinSanty/treeherder

def test_ingest_job_with_revision_hash(test_repository,
                                       failure_classifications, sample_data,
                                       mock_log_parser, sample_resultset):
    """
    Test ingesting a job with only a revision hash, no revision.  And the
    revision_hash must NOT be the same SHA value as the top revision.

    This can happen if a user submits a new resultset in the API with their
    own revision_hash value.  If we just use the latest revision value, then
    their subsequent job submissions with the revision_hash they generated
    will fail and the jobs will be skipped.
    """
    revision_hash = "12345abc"
    resultset = sample_resultset[0].copy()
    resultset["revision_hash"] = revision_hash
    store_result_set_data(test_repository, [resultset])

    first_job = sample_data.job_data[0]
    first_job["revision_hash"] = revision_hash
    del first_job["revision"]
    store_job_data(test_repository, [first_job])

    assert Job.objects.count() == 1

Example #34

0

Show file

File: test_resultset_api.py Project: SebastinSanty/treeherder

def test_resultset_list_filter_by_date(webapp, test_repository,
                                       sample_resultset):
    """
    test retrieving a resultset list, filtered by a date range
    """
    for (i, datestr) in zip([3, 4, 5, 6, 7], ["2013-08-09", "2013-08-10",
                                              "2013-08-11", "2013-08-12",
                                              "2013-08-13"]):
        sample_resultset[i]['push_timestamp'] = utils.to_timestamp(
            utils.to_datetime(datestr))

    store_result_set_data(test_repository, sample_resultset)

    resp = webapp.get(
        reverse("resultset-list", kwargs={"project": test_repository.name}),
        {"startdate": "2013-08-10", "enddate": "2013-08-13"}
    )
    assert resp.status_int == 200
    results = resp.json['results']
    meta = resp.json['meta']
    assert len(results) == 4
    assert set([rs["revision"] for rs in results]) == {
        u'ce17cad5d554cfffddee13d1d8421ae9ec5aad82',
        u'7f417c3505e3d2599ac9540f02e3dbee307a3963',
        u'a69390334818373e2d7e6e9c8d626a328ed37d47',
        u'f361dcb60bbedaa01257fbca211452972f7a74b2'
    }
    assert(meta == {
        u'count': 4,
        u'enddate': u'2013-08-13',
        u'filter_params': {
            u'push_timestamp__gte': 1376092800.0,
            u'push_timestamp__lt': 1376438400.0
        },
        u'repository': test_repository.name,
        u'startdate': u'2013-08-10'}
    )

Example #35

0

Show file

File: test_job_ingestion.py Project: ydidwania/treeherder

def test_ingest_running_to_retry_to_success_sample_job_multiple_retries(
        test_repository, failure_classifications, sample_data,
        sample_resultset, mock_log_parser, ingestion_cycles):
    # this verifies that if we ingest multiple retries:
    # (1) nothing errors out
    # (2) we end up with three jobs (the original + 2 retry jobs)

    store_result_set_data(test_repository, sample_resultset)

    job_datum = copy.deepcopy(sample_data.job_data[0])
    job_datum['revision'] = sample_resultset[0]['revision']

    job = job_datum['job']
    job_guid_root = job['job_guid']

    job_data = []
    for (state, result,
         job_guid) in [('running', 'unknown', job_guid_root),
                       ('completed', 'retry',
                        job_guid_root + "_" + str(job['end_timestamp'])[-5:]),
                       ('completed', 'retry', job_guid_root + "_12345"),
                       ('completed', 'success', job_guid_root)]:
        new_job_datum = copy.deepcopy(job_datum)
        new_job_datum['job']['state'] = state
        new_job_datum['job']['result'] = result
        new_job_datum['job']['job_guid'] = job_guid
        job_data.append(new_job_datum)

    for (i, j) in ingestion_cycles:
        ins = job_data[i:j]
        store_job_data(test_repository, ins)

    assert Job.objects.count() == 3
    assert Job.objects.get(id=1).result == 'retry'
    assert Job.objects.get(id=2).result == 'retry'
    assert Job.objects.get(id=3).result == 'success'
    assert JobLog.objects.count() == 3

Example #36

0

Show file

File: test_job_ingestion.py Project: SebastinSanty/treeherder

def test_ingest_retry_sample_job_no_running(test_repository,
                                            failure_classifications,
                                            sample_data, sample_resultset,
                                            mock_log_parser):
    """Process a single job structure in the job_data.txt file"""
    job_data = copy.deepcopy(sample_data.job_data[:1])
    job = job_data[0]['job']
    job_data[0]['revision'] = sample_resultset[0]['revision']

    store_result_set_data(test_repository, sample_resultset)

    # complete version of the job coming in
    job['state'] = 'completed'
    job['result'] = 'retry'
    # convert the job_guid to what it would be on a retry
    retry_guid = job['job_guid'] + "_" + str(job['end_timestamp'])[-5:]
    job['job_guid'] = retry_guid

    store_job_data(test_repository, job_data)

    assert Job.objects.count() == 1
    job = Job.objects.get(id=1)
    assert job.result == 'retry'
    assert job.guid == retry_guid

Example #37

0

Show file

File: test_job_ingestion.py Project: ydidwania/treeherder

def test_ingest_retry_sample_job_no_running(test_repository,
                                            failure_classifications,
                                            sample_data, sample_resultset,
                                            mock_log_parser):
    """Process a single job structure in the job_data.txt file"""
    job_data = copy.deepcopy(sample_data.job_data[:1])
    job = job_data[0]['job']
    job_data[0]['revision'] = sample_resultset[0]['revision']

    store_result_set_data(test_repository, sample_resultset)

    # complete version of the job coming in
    job['state'] = 'completed'
    job['result'] = 'retry'
    # convert the job_guid to what it would be on a retry
    retry_guid = job['job_guid'] + "_" + str(job['end_timestamp'])[-5:]
    job['job_guid'] = retry_guid

    store_job_data(test_repository, job_data)

    assert Job.objects.count() == 1
    job = Job.objects.get(id=1)
    assert job.result == 'retry'
    assert job.guid == retry_guid

Example #38

0

Show file

File: test_utils.py Project: SebastinSanty/treeherder

def do_job_ingestion(test_repository, job_data, sample_resultset,
                     verify_data=True):
    """
    Ingest ``job_data`` which will be JSON job blobs.

    ``verify_data`` - whether or not to run the ingested jobs
                      through the verifier.
    """
    store_result_set_data(test_repository, sample_resultset)

    max_index = len(sample_resultset) - 1
    resultset_index = 0

    # Structures to test if we stored everything
    build_platforms_ref = set()
    machine_platforms_ref = set()

    machines_ref = set()
    options_ref = set()
    job_types_ref = set()
    products_ref = set()
    result_sets_ref = set()
    log_urls_ref = set()
    coalesced_job_guids = {}
    artifacts_ref = {}

    blobs = []
    for index, blob in enumerate(job_data):

        if resultset_index > max_index:
            resultset_index = 0

        # Modify job structure to sync with the resultset sample data
        if 'sources' in blob:
            del blob['sources']

        blob['revision'] = sample_resultset[resultset_index]['revision']

        blobs.append(blob)

        resultset_index += 1

        # Build data structures to confirm everything is stored
        # as expected
        if verify_data:

            job_guid = blob['job']['job_guid']

            job = blob['job']

            build_platforms_ref.add(
                "-".join([
                    job.get('build_platform', {}).get('os_name', 'unknown'),
                    job.get('build_platform', {}).get('platform', 'unknown'),
                    job.get('build_platform', {}).get('architecture', 'unknown')
                ]))

            machine_platforms_ref.add(
                "-".join([
                    job.get('machine_platform', {}).get('os_name', 'unknown'),
                    job.get('machine_platform', {}).get('platform', 'unknown'),
                    job.get('machine_platform', {}).get('architecture', 'unknown')
                ]))

            machines_ref.add(job.get('machine', 'unknown'))

            options_ref = options_ref.union(job.get('option_collection', []).keys())

            job_types_ref.add(job.get('name', 'unknown'))
            products_ref.add(job.get('product_name', 'unknown'))
            result_sets_ref.add(blob['revision'])

            log_url_list = job.get('log_references', [])
            for log_data in log_url_list:
                log_urls_ref.add(log_data['url'])

            artifact_name = job.get('artifact', {}).get('name')
            if artifact_name:
                artifacts_ref[artifact_name] = job.get('artifact')

            coalesced = blob.get('coalesced', [])
            if coalesced:
                coalesced_job_guids[job_guid] = coalesced

    # Store the modified json blobs
    store_job_data(test_repository, blobs)

    if verify_data:
        # Confirms stored data matches whats in the reference data structs
        verify_build_platforms(build_platforms_ref)
        verify_machine_platforms(machine_platforms_ref)
        verify_machines(machines_ref)
        verify_options(options_ref)
        verify_job_types(job_types_ref)
        verify_products(products_ref)
        verify_result_sets(test_repository, result_sets_ref)
        verify_log_urls(test_repository, log_urls_ref)
        verify_coalesced(coalesced_job_guids)

Example #39

0

Show file

File: conftest.py Project: SebastinSanty/treeherder

def result_set_stored(test_repository, sample_resultset):
    store_result_set_data(test_repository, sample_resultset)

    return sample_resultset

Example #40

0

Show file

File: pushlog.py Project: SebastinSanty/treeherder

    def run(self, source_url, repository_name, changeset=None, last_push_id=None):
        print repository_name
        if not last_push_id:
            # get the last object seen from cache. this will
            # reduce the number of pushes processed every time
            last_push_id = cache.get("{0}:last_push_id".format(repository_name))

        if not changeset and last_push_id:
            startid_url = "{}&startID={}".format(source_url, last_push_id)
            logger.info("Extracted last push for '%s', '%s', from cache, "
                        "attempting to get changes only from that point at: %s" %
                        (repository_name, last_push_id, startid_url))
            # Use the cached ``last_push_id`` value (saved from the last time
            # this API was called) for this repo.  Use that value as the
            # ``startID`` to get all new pushes from that point forward.
            extracted_content = self.extract(startid_url)

            if extracted_content['lastpushid'] < last_push_id:
                # Push IDs from Mercurial are incremental.  If we cached a value
                # from one call to this API, and a subsequent call told us that
                # the ``lastpushid`` is LOWER than the one we have cached, then
                # the Mercurial IDs were reset.
                # In this circumstance, we can't rely on the cached id, so must
                # throw it out and get the latest 10 pushes.
                logger.warning(("Got a ``lastpushid`` value of {} lower than "
                                "the cached value of {} due to Mercurial repo reset.  "
                                "Getting latest changes for '{}' instead").format(
                                    extracted_content['lastpushid'],
                                    last_push_id,
                                    repository_name
                                    )
                               )
                cache.delete("{0}:last_push_id".format(repository_name))
                extracted_content = self.extract(source_url)
        else:
            if changeset:
                logger.info("Getting all pushes for '%s' corresponding to "
                            "changeset '%s'" % (repository_name, changeset))
                extracted_content = self.extract(source_url + "&changeset=" +
                                                 changeset)
            else:
                logger.warning("Unable to get last push from cache for '%s', "
                               "getting all pushes" % repository_name)
                extracted_content = self.extract(source_url)

        # ``pushes`` could be empty if there are no new ones since we last
        # fetched
        pushes = extracted_content['pushes']

        if not pushes:
            return None

        last_push_id = max(map(lambda x: int(x), pushes.keys()))
        last_push = pushes[str(last_push_id)]
        top_revision = last_push["changesets"][-1]["node"]
        # TODO: further remove the use of client types here
        transformed = self.transform(pushes, repository_name)

        errors = []
        repository = Repository.objects.get(name=repository_name)
        for collection in transformed[repository_name].get_chunks(chunk_size=1):
            try:
                collection.validate()
                store_result_set_data(repository, collection.get_collection_data())
            except Exception:
                newrelic.agent.record_exception()
                errors.append({
                    "project": repository,
                    "collection": "result_set",
                    "message": traceback.format_exc()
                })

        if errors:
            raise CollectionNotStoredException(errors)

        if not changeset:
            # only cache the last push if we're not fetching a specific
            # changeset
            cache.set("{0}:last_push_id".format(repository_name), last_push_id)

        return top_revision

Example #41

0

Show file

File: conftest.py Project: imbstack/treeherder

def result_set_stored(test_repository, sample_resultset):
    store_result_set_data(test_repository, sample_resultset)

    return sample_resultset

Example #42

0

Show file

 def retry_mock(exc=None, countdown=None):
     assert isinstance(exc, MissingPushException)
     thread_data.retries += 1
     store_result_set_data(test_repository, [rs])
     return orig_retry(exc=exc, countdown=countdown)

Example #43

0

Show file

File: pushlog.py Project: kapy2010/treeherder

    def run(self,
            source_url,
            repository_name,
            changeset=None,
            last_push_id=None):
        print repository_name
        if not last_push_id:
            # get the last object seen from cache. this will
            # reduce the number of pushes processed every time
            last_push_id = cache.get(
                "{0}:last_push_id".format(repository_name))

        if not changeset and last_push_id:
            startid_url = "{}&startID={}".format(source_url, last_push_id)
            logger.info(
                "Extracted last push for '%s', '%s', from cache, "
                "attempting to get changes only from that point at: %s" %
                (repository_name, last_push_id, startid_url))
            # Use the cached ``last_push_id`` value (saved from the last time
            # this API was called) for this repo.  Use that value as the
            # ``startID`` to get all new pushes from that point forward.
            extracted_content = self.extract(startid_url)

            if extracted_content['lastpushid'] < last_push_id:
                # Push IDs from Mercurial are incremental.  If we cached a value
                # from one call to this API, and a subsequent call told us that
                # the ``lastpushid`` is LOWER than the one we have cached, then
                # the Mercurial IDs were reset.
                # In this circumstance, we can't rely on the cached id, so must
                # throw it out and get the latest 10 pushes.
                logger.warning(
                    ("Got a ``lastpushid`` value of {} lower than "
                     "the cached value of {} due to Mercurial repo reset.  "
                     "Getting latest changes for '{}' instead").format(
                         extracted_content['lastpushid'], last_push_id,
                         repository_name))
                cache.delete("{0}:last_push_id".format(repository_name))
                extracted_content = self.extract(source_url)
        else:
            if changeset:
                logger.info("Getting all pushes for '%s' corresponding to "
                            "changeset '%s'" % (repository_name, changeset))
                extracted_content = self.extract(source_url + "&changeset=" +
                                                 changeset)
            else:
                logger.warning("Unable to get last push from cache for '%s', "
                               "getting all pushes" % repository_name)
                extracted_content = self.extract(source_url)

        # ``pushes`` could be empty if there are no new ones since we last
        # fetched
        pushes = extracted_content['pushes']

        if not pushes:
            return None

        last_push_id = max(map(lambda x: int(x), pushes.keys()))
        last_push = pushes[str(last_push_id)]
        top_revision = last_push["changesets"][-1]["node"]
        # TODO: further remove the use of client types here
        transformed = self.transform(pushes, repository_name)

        errors = []
        repository = Repository.objects.get(name=repository_name)
        for collection in transformed[repository_name].get_chunks(
                chunk_size=1):
            try:
                collection.validate()
                store_result_set_data(repository,
                                      collection.get_collection_data())
            except Exception:
                newrelic.agent.record_exception()
                errors.append({
                    "project": repository,
                    "collection": "result_set",
                    "message": traceback.format_exc()
                })

        if errors:
            raise CollectionNotStoredException(errors)

        if not changeset:
            # only cache the last push if we're not fetching a specific
            # changeset
            cache.set("{0}:last_push_id".format(repository_name), last_push_id)

        return top_revision

Example #44

0

Show file

def do_job_ingestion(test_repository,
                     job_data,
                     sample_resultset,
                     verify_data=True):
    """
    Ingest ``job_data`` which will be JSON job blobs.

    ``verify_data`` - whether or not to run the ingested jobs
                      through the verifier.
    """
    store_result_set_data(test_repository, sample_resultset)

    max_index = len(sample_resultset) - 1
    resultset_index = 0

    # Structures to test if we stored everything
    build_platforms_ref = set()
    machine_platforms_ref = set()

    machines_ref = set()
    options_ref = set()
    job_types_ref = set()
    products_ref = set()
    result_sets_ref = set()
    log_urls_ref = set()
    coalesced_job_guids = {}
    artifacts_ref = {}

    blobs = []
    for index, blob in enumerate(job_data):

        if resultset_index > max_index:
            resultset_index = 0

        # Modify job structure to sync with the resultset sample data
        if 'sources' in blob:
            del blob['sources']

        blob['revision'] = sample_resultset[resultset_index]['revision']

        blobs.append(blob)

        resultset_index += 1

        # Build data structures to confirm everything is stored
        # as expected
        if verify_data:

            job_guid = blob['job']['job_guid']

            job = blob['job']

            build_platforms_ref.add("-".join([
                job.get('build_platform', {}).get('os_name', 'unknown'),
                job.get('build_platform', {}).get('platform', 'unknown'),
                job.get('build_platform', {}).get('architecture', 'unknown')
            ]))

            machine_platforms_ref.add("-".join([
                job.get('machine_platform', {}).get('os_name', 'unknown'),
                job.get('machine_platform', {}).get('platform', 'unknown'),
                job.get('machine_platform', {}).get('architecture', 'unknown')
            ]))

            machines_ref.add(job.get('machine', 'unknown'))

            options_ref = options_ref.union(
                job.get('option_collection', []).keys())

            job_types_ref.add(job.get('name', 'unknown'))
            products_ref.add(job.get('product_name', 'unknown'))
            result_sets_ref.add(blob['revision'])

            log_url_list = job.get('log_references', [])
            for log_data in log_url_list:
                log_urls_ref.add(log_data['url'])

            artifact_name = job.get('artifact', {}).get('name')
            if artifact_name:
                artifacts_ref[artifact_name] = job.get('artifact')

            coalesced = blob.get('coalesced', [])
            if coalesced:
                coalesced_job_guids[job_guid] = coalesced

    # Store the modified json blobs
    store_job_data(test_repository, blobs)

    if verify_data:
        # Confirms stored data matches whats in the reference data structs
        verify_build_platforms(build_platforms_ref)
        verify_machine_platforms(machine_platforms_ref)
        verify_machines(machines_ref)
        verify_options(options_ref)
        verify_job_types(job_types_ref)
        verify_products(products_ref)
        verify_result_sets(test_repository, result_sets_ref)
        verify_log_urls(test_repository, log_urls_ref)
        verify_coalesced(coalesced_job_guids)