def project_info(request, project):
    try:
        jm = JobsModel(project)
        return HttpResponse(json.dumps({'max_job_id': jm.get_max_job_id()}),
                            mimetype='application/json')
    except DatasetNotFoundError:
        return HttpResponseNotFound('Project does not exist')
def cycle_data(max_iterations=50, debug=False):

    projects = Repository.objects.all().values_list('name', flat=True)

    for project in projects:

        jm = JobsModel(project)

        sql_targets = {}

        if debug:
            print "Cycling Database: {0}".format(project)

        cycle_iterations = max_iterations

        while cycle_iterations > 0:

            sql_targets = jm.cycle_data(sql_targets)

            if debug:
                print "Iterations: {0}".format(str(cycle_iterations))
                print "sql_targets"
                print sql_targets

            cycle_iterations -= 1

            # No more items to delete
            if sql_targets['total_count'] == 0:
                cycle_iterations = 0

        jm.disconnect()
Example #3
0
def cycle_data(max_iterations=50, debug=False):

    projects = Repository.objects.all().values_list('name', flat=True)

    for project in projects:

        jm = JobsModel(project)

        sql_targets = {}

        if debug:
            print "Cycling Database: {0}".format(project)

        cycle_iterations = max_iterations

        while cycle_iterations > 0:

            sql_targets = jm.cycle_data(sql_targets)

            if debug:
                print "Iterations: {0}".format(str(cycle_iterations))
                print "sql_targets"
                print sql_targets

            cycle_iterations -= 1

            # No more items to delete
            if sql_targets['total_count'] == 0:
                cycle_iterations = 0

        jm.disconnect()
Example #4
0
    def use_jobs_model(*args, **kwargs):

        project = kwargs["project"]
        jm = JobsModel(project)
        try:
            return model_func(*args, jm=jm, **kwargs)
        finally:
            jm.disconnect()
Example #5
0
    def use_jobs_model(*args, **kwargs):

        project = kwargs["project"]
        jm = JobsModel(project)
        try:
            return model_func(*args, jm=jm, **kwargs)
        finally:
            jm.disconnect()
def populate_performance_series(project, series_type, series_data):

    jm = JobsModel(project)
    for t_range in settings.TREEHERDER_PERF_SERIES_TIME_RANGES:
        for signature in series_data:
            jm.store_performance_series(
                t_range['seconds'], series_type, signature,
                series_data[signature]
            )
    jm.disconnect()
Example #7
0
def publish_to_pulse(project, ids, data_type):

    jm = JobsModel(project)

    # Get appropriate data for data_type
    # using the ids provided
    data = []
    if data_type == 'result_set':
        data = jm.get_result_set_list_by_ids(ids)

    jm.disconnect()
Example #8
0
def project_info(request, project):
    try:
        jm = JobsModel(project)
        return HttpResponse(content=json.dumps({'max_job_id': jm.get_max_job_id(),
                                                'max_performance_artifact_id':
                                                jm.get_max_performance_artifact_id()}
                                               ),
                            content_type='application/json'
                            )
    except DatasetNotFoundError:
        return HttpResponseNotFound('Project does not exist')
def calculate_eta(sample_window_seconds=21600, debug=False):

    projects = Repository.objects.all().values_list('name', flat=True)

    for project in projects:

        jm = JobsModel(project)

        jm.calculate_eta(sample_window_seconds, debug)

        jm.disconnect()
Example #10
0
def process_objects(limit=None):
    """
    Process a number of objects from the objectstore
    and load them to the jobs store
    """
    # default limit to 100
    limit = limit or 100
    for ds in Datasource.objects.all():
        jm = JobsModel(ds.project)
        try:
            jm.process_objects(limit)
        finally:
            jm.disconnect()
Example #11
0
def test_load_single_artifact(test_project, eleven_jobs_processed,
                              mock_post_collection, mock_error_summary,
                              sample_data):
    """
    test loading a single artifact

    """

    with JobsModel(test_project) as jobs_model:
        job = jobs_model.get_job_list(0, 1)[0]
    bs_blob = ["flim", "flam"]

    bs_artifact = {
        'type': 'json',
        'name': 'Bug suggestions',
        'blob': json.dumps(bs_blob),
        'job_guid': job['job_guid']
    }

    with ArtifactsModel(test_project) as artifacts_model:
        artifacts_model.load_job_artifacts([bs_artifact],
                                           {bs_artifact['job_guid']: job})

        artifacts = artifacts_model.get_job_artifact_list(
            0, 10, conditions={'job_id': {('=', job["id"])}})

    assert len(artifacts) == 1
    artifact_names = {x['name'] for x in artifacts}
    act_bs_obj = [
        x['blob'] for x in artifacts if x['name'] == 'Bug suggestions'
    ][0]

    assert set(artifact_names) == {'Bug suggestions'}
    assert bs_blob == act_bs_obj
Example #12
0
def test_load_long_job_details(test_project, eleven_jobs_stored):
    # job details should still load even if excessively long,
    # they'll just be truncated
    with JobsModel(test_project) as jobs_model:
        job = jobs_model.get_job_list(0, 1)[0]

    max_field_length = JobDetail.MAX_FIELD_LENGTH

    (long_title, long_value, long_url) = ('t' * (2 * max_field_length),
                                          'v' * (2 * max_field_length),
                                          'https://' + ('u' * (2 * max_field_length)))
    ji_artifact = {
        'type': 'json',
        'name': 'Job Info',
        'blob': json.dumps({
            'job_details': [{
                'title': long_title,
                'value': long_value,
                'url': long_url
            }]
        }),
        'job_guid': job['job_guid']
    }
    with ArtifactsModel(test_project) as am:
        am.load_job_artifacts([ji_artifact])

    assert JobDetail.objects.count() == 1

    jd = JobDetail.objects.all()[0]
    assert jd.title == long_title[:max_field_length]
    assert jd.value == long_value[:max_field_length]
    assert jd.url == long_url[:max_field_length]
Example #13
0
def test_artifact_create_text_log_summary(webapp, test_project, eleven_jobs_stored,
                                          mock_post_json, mock_error_summary,
                                          sample_data):
    """
    test submitting a text_log_summary artifact which auto-generates bug suggestions
    """
    with JobsModel(test_project) as jobs_model:
        job = jobs_model.get_job_list(0, 1)[0]
    tls = sample_data.text_log_summary

    tac = client.TreeherderArtifactCollection()
    ta = client.TreeherderArtifact({
        'type': 'json',
        'name': 'text_log_summary',
        'blob': json.dumps(tls['blob']),
        'job_guid': job['job_guid']
    })
    tac.add(ta)

    cli = client.TreeherderClient(protocol='http', host='localhost')
    cli.post_collection(test_project,  tac)

    with ArtifactsModel(test_project) as artifacts_model:
        artifacts = artifacts_model.get_job_artifact_list(0, 10, conditions={
            'job_id': {('=', job["id"])}
        })

    artifact_names = {x['name'] for x in artifacts}
    act_bs_obj = [x['blob'] for x in artifacts if x['name'] == 'Bug suggestions'][0]

    assert set(artifact_names) == {'Bug suggestions', 'text_log_summary'}
    assert mock_error_summary == act_bs_obj
Example #14
0
    def handle(self, *args, **options):
        self.is_debug = options['debug']

        if options['os_cycle_interval']:
            os_cycle_interval = datetime.timedelta(days=options['os_cycle_interval'])
        else:
            os_cycle_interval = settings.OBJECTSTORE_CYCLE_INTERVAL

        if options['cycle_interval']:
            cycle_interval = datetime.timedelta(days=options['cycle_interval'])
        else:
            cycle_interval = settings.DATA_CYCLE_INTERVAL

        self.debug("cycle interval... objectstore: {}, jobs: {}".format(os_cycle_interval,
                                                                        cycle_interval))

        projects = Datasource.objects\
            .filter(contenttype='jobs')\
            .values_list('project', flat=True)
        for project in projects:
            self.debug("Cycling Database: {0}".format(project))
            with JobsModel(project) as jm:
                os_deleted, rs_deleted = jm.cycle_data(os_cycle_interval,
                                                       cycle_interval,
                                                       options['os_chunk_size'],
                                                       options['chunk_size'],
                                                       options['sleep_time'])
                self.debug("Deleted {} objectstore rows and {} resultsets from {}".format(
                           os_deleted, rs_deleted, project))
Example #15
0
def test_post_job_with_default_tier(test_project, result_set_stored,
                                    mock_post_json):
    """test submitting a job with no tier specified gets default"""

    tjc = client.TreeherderJobCollection()
    job_guid = 'd22c74d4aa6d2a1dcba96d95dccbd5fdca70cf33'
    tj = client.TreeherderJob({
        'project':
        test_project,
        'revision_hash':
        result_set_stored[0]['revision_hash'],
        'job': {
            'job_guid': job_guid,
            'state': 'completed',
        }
    })
    tjc.add(tj)

    do_post_collection(test_project, tjc)

    with JobsModel(test_project) as jobs_model:
        job = [
            x for x in jobs_model.get_job_list(0, 20)
            if x['job_guid'] == job_guid
        ][0]
        assert job['tier'] == 1
    def handle(self, *args, **options):

        if not len(args) == 3:
            raise CommandError('3 arguments required, %s given' % len(args))
        log_response = requests.get(args[0], timeout=30)
        log_response.raise_for_status()

        if log_response.text:
            log_content = StringIO(log_response.text)

            try:
                repository = Repository.objects.get(name=args[2], active_status='active')
            except Repository.DoesNotExist:
                raise CommandError('Unknown repository %s' % args[2])

            log_iter = reader.read(log_content)

            failure_lines_cutoff = settings.FAILURE_LINES_CUTOFF
            log_iter = list(islice(log_iter, failure_lines_cutoff+1))

            if len(log_iter) > failure_lines_cutoff:
                # Alter the N+1th log line to indicate the list was truncated.
                log_iter[-1].update(action='truncated')

            with JobsModel(args[2]) as jobs_model:
                job_id = jobs_model.get_job_ids_by_guid([args[1]])

                if not job_id:
                    raise CommandError('No job found with guid %s in the %s repository' % (args[1], args[2]))

            FailureLine.objects.bulk_create(
                [FailureLine(repository=repository, job_guid=args[1], **failure_line)
                 for failure_line in log_iter]
            )
Example #17
0
def check_artifacts(test_project,
                    job_guid,
                    parse_status,
                    num_artifacts,
                    exp_artifact_names=None,
                    exp_error_summary=None):

    with JobsModel(test_project) as jobs_model:
        job_id = [
            x['id'] for x in jobs_model.get_job_list(0, 20)
            if x['job_guid'] == job_guid
        ][0]
        job_log_list = jobs_model.get_job_log_url_list([job_id])

        assert len(job_log_list) == 1
        assert job_log_list[0]['parse_status'] == parse_status

    with ArtifactsModel(test_project) as artifacts_model:
        artifacts = artifacts_model.get_job_artifact_list(
            0, 10, conditions={'job_id': {('=', job_id)}})

        assert len(artifacts) == num_artifacts

        if exp_artifact_names:
            artifact_names = {x['name'] for x in artifacts}
            assert set(artifact_names) == exp_artifact_names

        if exp_error_summary:
            act_bs_obj = [
                x['blob'] for x in artifacts if x['name'] == 'Bug suggestions'
            ][0]
            assert exp_error_summary == act_bs_obj
Example #18
0
    def create(self, request, project):
        artifacts = ArtifactsModel.serialize_artifact_json_blobs(request.DATA)

        job_guids = [x['job_guid'] for x in artifacts]
        with JobsModel(project) as jobs_model, ArtifactsModel(
                project) as artifacts_model:

            job_id_lookup = jobs_model.get_job_ids_by_guid(job_guids)

            artifacts_model.load_job_artifacts(artifacts, job_id_lookup)

            # If a ``text_log_summary`` and ``Bug suggestions`` artifact are
            # posted here together, for the same ``job_guid``, then just load
            # them.  This is how it is done internally in our log parser
            # so there is no delay in creation and the bug suggestions show
            # as soon as the log is parsed.
            #
            # If a ``text_log_summary`` is posted WITHOUT an accompanying
            # ``Bug suggestions`` artifact, then schedule to create it
            # asynchronously so that this api does not take too long.

            tls_list = get_artifacts_that_need_bug_suggestions(artifacts)

            # tls_list will contain all ``text_log_summary`` artifacts that
            # do NOT have an accompanying ``Bug suggestions`` artifact in this
            # current list of artifacts.  If it's empty, then we don't need
            # to schedule anything.
            if tls_list:
                populate_error_summary.apply_async(
                    args=[project, tls_list, job_id_lookup],
                    routing_key='error_summary')

            return Response({'message': 'Artifacts stored successfully'})
def unclassified_failure_count(projects=None):

    if not projects:
        projects = Repository.objects.all().values_list('name', flat=True)
    unclassified_failure_publisher = UnclassifiedFailureCountPublisher(settings.BROKER_URL)

    for project in projects:

        jm = JobsModel(project)
        count = jm.get_unclassified_failure_count()
        count_excluded = jm.get_unclassified_failure_count_excluded()

        unclassified_failure_publisher.publish(project, count, count_excluded)
        jm.disconnect()

    unclassified_failure_publisher.disconnect()
Example #20
0
def test_load_long_job_details(test_project, eleven_jobs_stored):
    # job details should still load even if excessively long,
    # they'll just be truncated
    with JobsModel(test_project) as jobs_model:
        job = jobs_model.get_job_list(0, 1)[0]

    def max_length(field):
        """Get the field's max_length for the JobDetail model"""
        return JobDetail._meta.get_field(field).max_length

    (long_title, long_value, long_url) = ('t' * (2 * max_length("title")),
                                          'v' * (2 * max_length("value")),
                                          'https://' + ('u' * (2 * max_length("url"))))
    ji_artifact = {
        'type': 'json',
        'name': 'Job Info',
        'blob': json.dumps({
            'job_details': [{
                'title': long_title,
                'value': long_value,
                'url': long_url
            }]
        }),
        'job_guid': job['job_guid']
    }
    with ArtifactsModel(test_project) as am:
        am.load_job_artifacts([ji_artifact])

    assert JobDetail.objects.count() == 1

    jd = JobDetail.objects.all()[0]
    assert jd.title == long_title[:max_length("title")]
    assert jd.value == long_value[:max_length("value")]
    assert jd.url == long_url[:max_length("url")]
Example #21
0
    def unstructured_bugs(self):
        """
        Get bugs that match this line in the Bug Suggestions artifact for this job.
        """
        components = self._serialized_components()
        if not components:
            return []

        # Importing this at the top level causes circular import misery
        from treeherder.model.derived import JobsModel, ArtifactsModel
        with JobsModel(self.repository.name) as jm, \
                ArtifactsModel(self.repository.name) as am:
            job_id = jm.get_job_ids_by_guid([self.job_guid
                                             ])[self.job_guid]["id"]
            bug_suggestions = am.filter_bug_suggestions(
                am.bug_suggestions(job_id))

        rv = []
        ids_seen = set()
        for item in bug_suggestions:
            if all(component in item["search"] for component in components):
                for suggestion in itertools.chain(item["bugs"]["open_recent"],
                                                  item["bugs"]["all_others"]):
                    if suggestion["id"] not in ids_seen:
                        ids_seen.add(suggestion["id"])
                        rv.append(suggestion)

        return rv
Example #22
0
def publish_to_pulse(project, ids, data_type):
    # If we don't have a publisher (because of missing configs), then we can't
    # publish any pulse messages. This is okay, local installs etc. doesn't
    # need to publish on pulse, and requiring a pulse user is adding more
    # overhead to an already large development setup process.
    if not publisher:
        return

    jm = JobsModel(project)

    try:
        # Publish messages with new result-sets
        if data_type == 'result_set':
            # Get appropriate data for data_type
            # using the ids provided
            for entry in jm.get_result_set_list_by_ids(ids):
                repository = jm.refdata_model.get_repository_info(entry['repository_id'])
                entry['repository_url'] = repository['url']

                # Don't expose these properties, they are internal, at least that's
                # what I think without documentation I have no clue... what any of
                # this is
                del entry['revisions']      # Not really internal, but too big
                del entry['repository_id']

                # Set required properties
                entry['version'] = 1
                entry['project'] = project
                # Property revision_hash should already be there, I suspect it is the
                # result-set identifier...

                # publish the data to pulse
                publisher.new_result_set(
                    message         = entry,
                    revision_hash   = entry['revision_hash'],
                    project         = project
                )

            # Basically, I have no idea what context this runs and was inherently
            # unable to make kombu with or without pyamqp, etc. confirm-publish,
            # so we're stuck with this super ugly hack where we just close the
            # connection so that if the process context is destroyed then at least
            # messages will still get published... Well, assuming nothing goes
            # wrong, because we're not using confirm channels for publishing...
            publisher.connection.release()
    finally:
        jm.disconnect()
Example #23
0
    def handle(self, *args, **options):

        if not len(args) == 2:
            raise CommandError('2 arguments required, %s given' % len(args))
        repository, job_guid = args

        with JobsModel(repository) as jm:
            match_errors(repository, jm, job_guid)
Example #24
0
    def retrieve(self, request, *args, **kwargs):
        request = th_serializers.RepositorySerializer(
            self.queryset.get(pk=kwargs['pk']))
        new_request = request.data.copy()
        with JobsModel(request.data['name']) as jobs_model:
            new_request.update({'max_job_id': jobs_model.get_max_job_id()})

        return Response(new_request)
Example #25
0
def project_info(request, project):
    try:
        with JobsModel(project) as jobs_model:
            return HttpResponse(content=json.dumps(
                {'max_job_id': jobs_model.get_max_job_id()}),
                                content_type='application/json')
    except DatasetNotFoundError:
        return HttpResponseNotFound('Project does not exist')
Example #26
0
    def generate_request_body(self):
        """
        Create the data structure that will be sent to Elasticsearch.
        """
        with JobsModel(self.project) as jobs_model, ArtifactsModel(
                self.project) as artifacts_model:
            job_data = jobs_model.get_job(self.job_id)[0]
            option_collection = jobs_model.refdata_model.get_all_option_collections(
            )
            revision_list = jobs_model.get_resultset_revisions_list(
                job_data["result_set_id"])
            buildapi_artifact = artifacts_model.get_job_artifact_list(
                0, 1, {
                    'job_id': set([("=", self.job_id)]),
                    'name': set([("=", "buildapi")])
                })
            if buildapi_artifact:
                buildname = buildapi_artifact[0]["blob"]["buildername"]
            else:
                # OrangeFactor needs a buildname to be set or it skips the failure
                # classification, so we make one up for non-buildbot jobs.
                buildname = 'non-buildbot %s test %s' % (
                    job_data["platform"], job_data["job_type_name"])

        self.body = {
            "buildname":
            buildname,
            "machinename":
            job_data["machine_name"],
            "os":
            job_data["platform"],
            # I'm using the request time date here, as start time is not
            # available for pending jobs
            "date":
            datetime.fromtimestamp(int(
                job_data["submit_timestamp"])).strftime("%Y-%m-%d"),
            "type":
            job_data["job_type_name"],
            "buildtype":
            option_collection[job_data["option_collection_hash"]]["opt"],
            # Intentionally using strings for starttime, bug, timestamp for compatibility
            # with TBPL's legacy output format.
            "starttime":
            str(job_data["start_timestamp"]),
            "tree":
            self.project,
            "rev":
            revision_list[0]["revision"],
            "bug":
            str(self.bug_id),
            "who":
            self.who,
            "timestamp":
            str(self.classification_timestamp),
            "treeherder_job_id":
            self.job_id,
        }
Example #27
0
def publish_to_pulse(project, ids, data_type):
    # If we don't have a publisher (because of missing configs), then we can't
    # publish any pulse messages. This is okay, local installs etc. doesn't
    # need to publish on pulse, and requiring a pulse user is adding more
    # overhead to an already large development setup process.
    if not publisher:
        return

    jm = JobsModel(project)

    try:
        # Publish messages with new result-sets
        if data_type == 'result_set':
            # Get appropriate data for data_type
            # using the ids provided
            for entry in jm.get_result_set_list_by_ids(ids):
                # Don't expose these properties, they are internal, at least that's
                # what I think without documentation I have no clue... what any of
                # this is
                del entry['revisions']      # Not really internal, but too big
                del entry['repository_id']

                # Set required properties
                entry['version'] = 1
                entry['project'] = project
                # Property revision_hash should already be there, I suspect it is the
                # result-set identifier...

                # publish the data to pulse
                publisher.new_result_set(
                    message         = entry,
                    revision_hash   = entry['revision_hash'],
                    project         = project
                )

            # Basically, I have no idea what context this runs and was inherently
            # unable to make kombu with or without pyamqp, etc. confirm-publish,
            # so we're stuck with this super ugly hack where we just close the
            # connection so that if the process context is destroyed then at least
            # messages will still get published... Well, assuming nothing goes
            # wrong, because we're not using confirm channels for publishing...
            publisher.connection.release()
    finally:
        jm.disconnect()
Example #28
0
def test_load_non_ascii_textlog_errors(test_project, eleven_jobs_stored):
    with JobsModel(test_project) as jobs_model:
        job = jobs_model.get_job_list(0, 1)[0]

    text_log_summary_artifact = {
        'type':
        'json',
        'name':
        'text_log_summary',
        'blob':
        json.dumps({
            'step_data': {
                "steps": [{
                    'name':
                    'foo',
                    'started':
                    '2016-05-10 12:44:23.103904',
                    'started_linenumber':
                    8,
                    'finished_linenumber':
                    10,
                    'finished':
                    '2016-05-10 12:44:23.104394',
                    'result':
                    'success',
                    'errors': [
                        {
                            # non-ascii character
                            "line":
                            '07:51:28  WARNING - \U000000c3'.encode('utf-8'),
                            "linenumber":
                            1587
                        },
                        {
                            # astral character (i.e. higher than ucs2)
                            "line":
                            '07:51:29  WARNING - \U0001d400'.encode('utf-8'),
                            "linenumber":
                            1588
                        }
                    ]
                }]
            }
        }),
        'job_guid':
        job['job_guid']
    }
    with ArtifactsModel(test_project) as am:
        am.load_job_artifacts([text_log_summary_artifact])

    assert TextLogError.objects.count() == 2
    assert TextLogError.objects.get(
        line_number=1587).line == '07:51:28  WARNING - \U000000c3'
    assert TextLogError.objects.get(
        line_number=1588).line == '07:51:29  WARNING - <U+01D400>'
Example #29
0
    def generate_request_body(self):
        """
        Create the data structure required by tbpl's submitBuildStar.php script
        It's used by both the bug_job_map endpoint and the job note endpoint.
        """
        jm = JobsModel(self.project)
        try:
            buildapi_artifact = jm.get_job_artifact_list(0, 1, {
                'job_id': set([("=", self.job_id)]),
                'name': set([("=", "buildapi_complete")])
            })[0]
            job_data = jm.get_job(self.job_id)[0]
        finally:
            jm.disconnect()

        note = ""
        if self.bug_id:
            note = "Bug {0}".format(self.bug_id)
        if self.classification_id:
            if note:
                note += " - "
            note += FailureClassification.objects.get(
                id=self.classification_id).name
            if self.note:
                if note:
                    note += " - "
                note += self.note

        self.body = {
            "id": buildapi_artifact["blob"]["id"],
            "machinename": job_data["machine_name"],
            "starttime": int(job_data["start_timestamp"]),
            "note": note,
            "who": self.who
        }
Example #30
0
    def use_jobs_model(*args, **kwargs):
        project = kwargs["project"]
        try:
            jm = JobsModel(project)
            return model_func(*args, jm=jm, **kwargs)

        except DatasetNotFoundError as e:
            return Response(
                {"message": "No project with name {0}".format(project)},
                status=404,
            )
        except ObjectNotFoundException as e:
            return Response({"message": unicode(e)}, status=404)
        except Exception as e:  # pragma nocover
            msg = {"message": unicode(e)}
            if settings.DEBUG:
                import traceback
                msg["traceback"] = traceback.format_exc()

            return Response(msg, status=500)
        finally:
            jm.disconnect()
Example #31
0
def populate_performance_series(project, series_type, series_data):

    jm = JobsModel(project)
    for t_range in settings.TREEHERDER_PERF_SERIES_TIME_RANGES:
        for signature in series_data:
            jm.store_performance_series(t_range['seconds'], series_type,
                                        signature, series_data[signature])
    jm.disconnect()
Example #32
0
def test_artifact_create_text_log_summary_and_bug_suggestions(
        webapp, test_project, eleven_jobs_stored, mock_post_json,
        mock_error_summary, sample_data):
    """
    test submitting text_log_summary and Bug suggestions artifacts

    This should NOT generate a Bug suggestions artifact, just post the one
    submitted.
    """

    with JobsModel(test_project) as jobs_model:
        job = jobs_model.get_job_list(0, 1)[0]
    tls = sample_data.text_log_summary
    bs_blob = ["flim", "flam"]

    tac = client.TreeherderArtifactCollection()
    tac.add(
        client.TreeherderArtifact({
            'type': 'json',
            'name': 'text_log_summary',
            'blob': json.dumps(tls['blob']),
            'job_guid': job['job_guid']
        }))
    tac.add(
        client.TreeherderArtifact({
            'type': 'json',
            'name': 'Bug suggestions',
            'blob': bs_blob,
            'job_guid': job['job_guid']
        }))

    credentials = OAuthCredentials.get_credentials(test_project)
    auth = TreeherderAuth(credentials['consumer_key'],
                          credentials['consumer_secret'], test_project)
    cli = client.TreeherderClient(protocol='http', host='localhost', auth=auth)
    cli.post_collection(test_project, tac)

    with ArtifactsModel(test_project) as artifacts_model:
        artifacts = artifacts_model.get_job_artifact_list(
            0, 10, conditions={'job_id': {('=', job["id"])}})

    assert len(artifacts) == 2
    artifact_names = {x['name'] for x in artifacts}
    act_bs_obj = [
        x['blob'] for x in artifacts if x['name'] == 'Bug suggestions'
    ][0]

    assert set(artifact_names) == {'Bug suggestions', 'text_log_summary'}
    assert bs_blob == act_bs_obj
    def generate_request_body(self):
        """
        Create the data structure that will be sent to Elasticsearch.
        """
        with JobsModel(self.project) as jobs_model:
            job_data = jobs_model.get_job(self.job_id)[0]
            buildtype = " ".join(
                sorted(
                    OptionCollection.objects.values_list(
                        'option__name',
                        flat=True).filter(option_collection_hash=job_data[
                            "option_collection_hash"])))
            revision = Push.objects.values_list(
                'revision', flat=True).get(id=job_data['push_id'])
            ref_data_name = job_data["ref_data_name"]

        self.body = {
            "buildname":
            ref_data_name,
            "machinename":
            job_data["machine_name"],
            "os":
            job_data["platform"],
            # I'm using the request time date here, as start time is not
            # available for pending jobs
            "date":
            datetime.utcfromtimestamp(
                job_data["submit_timestamp"]).strftime("%Y-%m-%d"),
            "type":
            job_data["job_type_name"],
            "buildtype":
            buildtype,
            # Intentionally using strings for starttime, bug, timestamp for compatibility
            # with TBPL's legacy output format.
            "starttime":
            str(job_data["start_timestamp"]),
            "tree":
            self.project,
            "rev":
            revision,
            "bug":
            str(self.bug_id),
            "who":
            self.who,
            "timestamp":
            str(self.classification_timestamp),
            "treeherder_job_id":
            self.job_id,
        }
Example #34
0
    def handle(self, *args, **options):
        self.is_debug = options['debug']

        cycle_interval = datetime.timedelta(days=options['days'])

        self.debug("cycle interval... {}".format(cycle_interval))

        projects = Datasource.objects.values_list('project', flat=True)
        for project in projects:
            self.debug("Cycling Database: {0}".format(project))
            with JobsModel(project) as jm:
                rs_deleted = jm.cycle_data(cycle_interval,
                                           options['chunk_size'],
                                           options['sleep_time'])
                self.debug("Deleted {} jobs from {}".format(rs_deleted, project))
Example #35
0
    def generate_request_body(self):
        """
        Create the data structure required by tbpl's starcomment.php script
        """
        jm = JobsModel(self.project)

        try:
            buildapi_artifact = jm.get_job_artifact_list(
                0, 1, {
                    'job_id': set([("=", self.job_id)]),
                    'name': set([("=", "buildapi")])
                })[0]
            job_data = jm.get_job(self.job_id)[0]
            option_collection = jm.refdata_model.get_all_option_collections()
            revision_list = jm.get_resultset_revisions_list(
                job_data["result_set_id"])
        finally:
            jm.disconnect()

        self.body = {
            "buildname":
            buildapi_artifact["blob"]["buildername"],
            "machinename":
            job_data["machine_name"],
            "os":
            job_data["platform"],
            # I'm using the request time date here, as start time is not
            # available for pending jobs
            "date":
            datetime.fromtimestamp(int(
                job_data["submit_timestamp"])).strftime("%Y-%m-%d"),
            "type":
            job_data["job_type_name"],
            "buildtype":
            option_collection[job_data["option_collection_hash"]]["opt"],
            "starttime":
            int(job_data["start_timestamp"]),
            # "logfile": "",
            "tree":
            self.project,
            "rev":
            revision_list[0]["revision"],
            "comment":
            "Bug {0}".format(self.bug_id),
            "who":
            self.who,
            "timestamp":
            self.submit_timestamp,
            "logfile":
            "00000000"
        }
    def handle(self, *args, **options):
        self.is_debug = options['debug']

        if options['cycle_interval']:
            cycle_interval = datetime.timedelta(days=options['cycle_interval'])
        else:
            cycle_interval = settings.DATA_CYCLE_INTERVAL

        self.debug("cycle interval: {0}".format(cycle_interval))

        projects = Datasource.objects\
            .filter(contenttype='jobs')\
            .values_list('project', flat=True)
        for project in projects:
            self.debug("Cycling Database: {0}".format(project))
            jm = JobsModel(project)
            try:
                num_deleted = jm.cycle_data(cycle_interval,
                                            options['chunk_size'],
                                            options['sleep_time'])
                self.debug("Deleted {0} resultsets from {1}".format(
                           num_deleted, project))
            finally:
                jm.disconnect()
Example #37
0
    def handle(self, *args, **options):
        self.is_debug = options['debug']

        if options['cycle_interval']:
            cycle_interval = datetime.timedelta(days=options['cycle_interval'])
        else:
            cycle_interval = settings.DATA_CYCLE_INTERVAL

        self.debug("cycle interval: {0}".format(cycle_interval))

        projects = Datasource.objects\
            .filter(contenttype='jobs')\
            .values_list('project', flat=True)
        for project in projects:
            self.debug("Cycling Database: {0}".format(project))
            jm = JobsModel(project)
            try:
                num_deleted = jm.cycle_data(cycle_interval,
                                            options['chunk_size'],
                                            options['sleep_time'])
                self.debug("Deleted {0} resultsets from {1}".format(
                    num_deleted, project))
            finally:
                jm.disconnect()
Example #38
0
def test_load_artifact_second_time_fails(
        test_project, eleven_jobs_stored,
        mock_post_json, mock_error_summary,
        sample_data):
    """
    test loading two of the same named artifact only gets the first one

    """

    with JobsModel(test_project) as jobs_model:
        job = jobs_model.get_job_list(0, 1)[0]
    bs_blob = ["flim", "flam"]

    bs_artifact1 = {
        'type': 'json',
        'name': 'Bug suggestions',
        'blob': json.dumps(bs_blob),
        'job_guid': job['job_guid']
    }
    bs_artifact2 = {
        'type': 'json',
        'name': 'Bug suggestions',
        'blob': json.dumps(["me", "you"]),
        'job_guid': job['job_guid']
    }

    with ArtifactsModel(test_project) as artifacts_model:
        artifacts_model.load_job_artifacts(
            [bs_artifact1],
            {bs_artifact1['job_guid']: job}
        )

        artifacts_model.load_job_artifacts(
            [bs_artifact2],
            {bs_artifact2['job_guid']: job}
        )

        artifacts = artifacts_model.get_job_artifact_list(0, 10, conditions={
            'job_id': {('=', job["id"])}
        })

    assert len(artifacts) == 1
    artifact_names = {x['name'] for x in artifacts}
    act_bs_obj = [x['blob'] for x in artifacts
                  if x['name'] == 'Bug suggestions'][0]

    assert set(artifact_names) == {'Bug suggestions'}
    assert bs_blob == act_bs_obj
Example #39
0
def match_errors(job):
    # Only try to autoclassify where we have a failure status; sometimes there can be
    # error lines even in jobs marked as passing.

    with JobsModel(job.repository.name) as jm:
        ds_job = jm.get_job(job.project_specific_id)[0]
        if ds_job["result"] not in ["testfailed", "busted", "exception"]:
            return

    unmatched_failures = set(FailureLine.objects.unmatched_for_job(job))

    if not unmatched_failures:
        return

    matches, all_matched = find_matches(unmatched_failures)
    update_db(job, matches, all_matched)
Example #40
0
def lookup_revisions(revision_dict):
    """
    Retrieve a list of revision->resultset lookups
    """
    from treeherder.model.derived import JobsModel

    lookup = dict()
    for project, revisions in revision_dict.items():
        revision_list = list(set(revisions))

        with JobsModel(project) as jm:
            lookup_content = jm.get_resultset_all_revision_lookup(revision_list)

        if lookup_content:
            lookup[project] = lookup_content
    return lookup
Example #41
0
    def generate_request_body(self):
        """
        Create the data structure required by tbpl's starcomment.php script
        """
        jm = JobsModel(self.project)

        try:
            buildapi_artifact = jm.get_job_artifact_list(0, 1, {
                'job_id': set([("=", self.job_id)]),
                'name': set([("=", "buildapi")])
            })[0]
            job_data = jm.get_job(self.job_id)[0]
            option_collection = jm.refdata_model.get_all_option_collections()
            revision_list = jm.get_resultset_revisions_list(job_data["result_set_id"])
        finally:
            jm.disconnect()

        self.body = {
            "buildname": buildapi_artifact["blob"]["buildername"],
            "machinename": job_data["machine_name"],
            "os": job_data["platform"],
            # I'm using the request time date here, as start time is not
            # available for pending jobs
            "date": datetime.fromtimestamp(
                int(job_data["submit_timestamp"])).strftime("%Y-%m-%d"),
            "type": job_data["job_type_name"],
            "buildtype": option_collection[
                job_data["option_collection_hash"]
            ]["opt"],
            "starttime": int(job_data["start_timestamp"]),
            # "logfile": "",
            "tree": self.project,
            "rev": revision_list[0]["revision"],
            "comment": "Bug {0}".format(self.bug_id),
            "who": self.who,
            "timestamp": self.submit_timestamp,
            "logfile": "00000000"
        }
Example #42
0
def parse_log(project, job_id, result_set_id, check_errors=False):
    """
    Call ArtifactBuilderCollection on the given job.
    """
    pattern_obj = re.compile('\d+:\d+:\d+\s+')

    jm = JobsModel(project=project)
    rdm = RefDataManager()

    open_bugs_cache = {}
    closed_bugs_cache = {}

    status_publisher = JobStatusPublisher(settings.BROKER_URL)
    failure_publisher = JobFailurePublisher(settings.BROKER_URL)

    try:
        # return the resultset with the job id to identify if the UI wants
        # to fetch the whole thing.
        resultset = jm.get_result_set_by_id(result_set_id=result_set_id)[0]
        del(resultset["active_status"])
        del(resultset["revision_hash"])

        log_references = jm.get_log_references(job_id)

        # we may have many log references per job
        for log in log_references:

            # parse a log given its url
            artifact_bc = ArtifactBuilderCollection(
                log['url'],
                check_errors=check_errors,
            )
            artifact_bc.parse()

            artifact_list = []
            for name, artifact in artifact_bc.artifacts.items():
                artifact_list.append((job_id, name, 'json', json.dumps(artifact)))

            if check_errors:
                # I'll try to begin with a full_text search on the entire row

                all_errors = artifact_bc.artifacts['Structured Log']['step_data']['all_errors']

                open_bugs_suggestions = {}
                closed_bugs_suggestions = {}

                for err in all_errors:

                    # remove timestamp
                    clean_line = pattern_obj.sub('', err['line'])

                    if clean_line not in open_bugs_cache:
                        open_bugs_cache[clean_line] = rdm.get_suggested_bugs(
                            clean_line)

                    if clean_line not in closed_bugs_cache:
                        closed_bugs_cache[clean_line] = rdm.get_suggested_bugs(
                            clean_line, open_bugs=False)

                    open_bugs_suggestions[ err['line'] ] = open_bugs_cache[clean_line]
                    closed_bugs_suggestions[ err['line'] ] = closed_bugs_cache[clean_line]

                artifact_list.append((job_id, 'Open bugs', 'json', json.dumps(open_bugs_suggestions)))
                artifact_list.append((job_id, 'Closed bugs', 'json', json.dumps(closed_bugs_suggestions)))

            # store the artifacts generated
            jm.store_job_artifact(artifact_list)
        status_publisher.publish(job_id, resultset, project, 'processed')
        if check_errors:
            failure_publisher.publish(job_id, project)

    finally:
        rdm.disconnect()
        jm.disconnect()
        status_publisher.disconnect()
        failure_publisher.disconnect()
Example #43
0
 def _process_all_objects_for_project(self, project):
     jm = JobsModel(project)
     while jm.get_num_unprocessed_objects() > 0:
         process_objects.delay(project=project)
Example #44
0
    def generate_request_body(self):
        """
        Create the data structure required by tbpl's submitBugzillaComment.php script
        This is triggered by a new bug-job association.
        """
        jm = JobsModel(self.project)
        try:
            job = jm.get_job(self.job_id)[0]
            failures_artifacts = jm.get_job_artifact_list(0, 1, {
                'job_id': set([('=', job['id'])]),
                'name': set([('=', 'Bug suggestions')]),
            })
            error_lines = []
            for artifact in failures_artifacts:
                # a bug suggestion aritfact looks like this:
                # [{ "search": "my-error-line", "bugs": ....}]
                error_lines += [line["search"] for line in artifact["blob"]]
            bug_job_map = jm.get_bug_job_map_detail(self.job_id, self.bug_id)

            revision_list = jm.get_resultset_revisions_list(
                job["result_set_id"]
            )

            buildapi_info = jm.get_job_artifact_list(0, 1, {
                'job_id': set([("=", self.job_id)]),
                'name': set([("=", "buildapi")])
            })
        finally:
            jm.disconnect()

        who = bug_job_map["who"]\
            .replace("@", "[at]")\
            .replace(".", "[dot]")
        submit_date = datetime.fromtimestamp(bug_job_map["submit_timestamp"])\
            .replace(microsecond=0)\
            .isoformat()

        job_description = {
            'repository': self.project,
            'who': who,
            'submit_timestamp': submit_date,
            'log': "{0}{1}/logviewer.html#?repo={2}&job_id={3}".format(
                settings.SITE_URL,
                settings.UI_PREFIX,
                self.project,
                self.job_id
            ),
            'machine': job["machine_name"],
            'revision': revision_list[0]["revision"],
        }

        if buildapi_info:
            job_description['buildname'] = buildapi_info[0]["blob"]["buildername"]


        body_comment = '\n'.join(
            ["{0}: {1}".format(k, v) for k, v in job_description.items()])

        body_comment += '\n\n'
        body_comment += '\n'.join(error_lines)

        self.body = {
            "id": self.bug_id,
            "comment": body_comment
        }