def test_claim_objects(jm, sample_data):
    """``claim_objects`` claims & returns unclaimed rows up to a limit."""

    blobs = []
    blob_lookup = set()
    for job in sample_data.job_data[:3]:
        blobs.append(job)
        blob_lookup.add(json.dumps(job))

    jm.store_job_data(blobs)

    rows1 = jm.claim_objects(2)

    # a separate worker with a separate connection
    from treeherder.model.derived.jobs import JobsModel
    jm2 = JobsModel(jm.project)

    rows2 = jm2.claim_objects(2)

    loading_rows = jm.get_dhub(jm.CT_OBJECTSTORE).execute(
        proc="objectstore_test.counts.loading")[0]["loading_count"]

    jm.disconnect()
    jm2.disconnect()

    assert len(rows1) == 2
    # second worker asked for two rows but only got one that was left
    assert len(rows2) == 1

    # all three blobs were fetched by one of the workers
    for r in rows1 + rows2:
        assert r['json_blob'] in blob_lookup

    # the blobs are all marked as "loading" in the database
    assert loading_rows == 3
Exemple #2
0
def test_claim_objects(jm, sample_data):
    """``claim_objects`` claims & returns unclaimed rows up to a limit."""

    blobs = []
    blob_lookup = set()
    for job in sample_data.job_data[:3]:
        blobs.append(job)
        blob_lookup.add(json.dumps(job))

    jm.store_job_data(blobs)

    rows1 = jm.claim_objects(2)

    # a separate worker with a separate connection
    from treeherder.model.derived.jobs import JobsModel
    jm2 = JobsModel(jm.project)

    rows2 = jm2.claim_objects(2)

    loading_rows = jm.get_dhub(jm.CT_OBJECTSTORE).execute(
        proc="objectstore_test.counts.loading")[0]["loading_count"]

    jm.disconnect()
    jm2.disconnect()

    assert len(rows1) == 2
    # second worker asked for two rows but only got one that was left
    assert len(rows2) == 1

    # all three blobs were fetched by one of the workers
    for r in rows1 + rows2:
        assert r['json_blob'] in blob_lookup

    # the blobs are all marked as "loading" in the database
    assert loading_rows == 3
Exemple #3
0
    def unstructured_bugs(self):
        """
        Get bugs that match this line in the Bug Suggestions artifact for this job.
        """
        components = self._serialized_components()
        if not components:
            return []

        # Importing this at the top level causes circular import misery
        from treeherder.model.derived import JobsModel, ArtifactsModel
        with JobsModel(self.repository.name) as jm, \
                ArtifactsModel(self.repository.name) as am:
            job_id = jm.get_job_ids_by_guid([self.job_guid
                                             ])[self.job_guid]["id"]
            bug_suggestions = am.filter_bug_suggestions(
                am.bug_suggestions(job_id))

        rv = []
        ids_seen = set()
        for item in bug_suggestions:
            if all(component in item["search"] for component in components):
                for suggestion in itertools.chain(item["bugs"]["open_recent"],
                                                  item["bugs"]["all_others"]):
                    if suggestion["id"] not in ids_seen:
                        ids_seen.add(suggestion["id"])
                        rv.append(suggestion)

        return rv
Exemple #4
0
def publish_job_action(project, action, job_id, requester):
    """
    Generic task to issue pulse notifications when jobs actions occur
    (retrigger/cancel)

    :param project str: The name of the project this action was requested for.
    :param action str: The type of action performed (retrigger/cancel/etc..)
    :param job_id str: The job id the action was requested for.
    :param requester str: The email address associated with the request.
    """
    publisher = pulse_connection.get_publisher()
    if not publisher:
        return

    from treeherder.model.derived.jobs import JobsModel

    with JobsModel(project) as jm:
        job = jm.get_job(job_id)[0]
        refdata = jm.get_job_reference_data(job['signature'])

        publisher.job_action(
            version=1,
            build_system_type=refdata['build_system_type'],
            project=project,
            action=action,
            job_guid=job['job_guid'],
            # Job id is included for convenience as you need it in some cases
            # instead of job_guid...
            job_id=job['id'],
            requester=requester)
Exemple #5
0
    def save(self, *args, **kwargs):
        super(BugJobMap, self).save(*args, **kwargs)

        # FIXME: using the JobsModel here is pretty horrible -- remove
        # when we move jobs table to central db
        from treeherder.model.derived.jobs import JobsModel
        from treeherder.etl.tasks import submit_elasticsearch_doc

        with JobsModel(self.job.repository.name) as jm:
            if settings.ORANGEFACTOR_HAWK_KEY:
                ds_job = jm.get_job(self.job.project_specific_id)[0]
                if ds_job["state"] == "completed":
                    # Submit bug associations to Elasticsearch using an async
                    # task.
                    submit_elasticsearch_doc.apply_async(
                        args=[
                            self.job.repository.name,
                            self.job.project_specific_id, self.bug_id,
                            int(time.mktime(self.created.timetuple())),
                            self.who
                        ],
                        routing_key='classification_mirroring')

            # if we have a user, then update the autoclassification relations
            if self.user:
                jm.update_autoclassification_bug(self.job.project_specific_id,
                                                 self.bug_id)
Exemple #6
0
def jm(request, test_repository, jobs_ds):
    """ Give a test access to a JobsModel instance. """
    model = JobsModel(jobs_ds.project)

    # patch in additional test-only procs on the datasources
    add_test_procs_file(
        model.get_dhub(),
        model.get_datasource().key,
        "jobs_test.json",
    )

    def fin():
        model.disconnect()
    request.addfinalizer(fin)

    return model
Exemple #7
0
def jm(request, test_repository, jobs_ds):
    """ Give a test access to a JobsModel instance. """
    model = JobsModel(jobs_ds.project)

    # patch in additional test-only procs on the datasources
    add_test_procs_file(
        model.get_dhub(),
        model.get_datasource().key,
        "jobs_test.json",
    )

    def fin():
        model.disconnect()
    request.addfinalizer(fin)

    return model
Exemple #8
0
    def process_job_list(self, all_jobs_list):
        if not isinstance(all_jobs_list, list):
            all_jobs_list = [all_jobs_list]

        validated_jobs = self._get_validated_jobs_by_project(all_jobs_list)

        for project, job_list in validated_jobs.items():
            newrelic.agent.add_custom_parameter("project", project)
            with JobsModel(project) as jobs_model:
                storeable_job_list = []
                for pulse_job in job_list:
                    if pulse_job["state"] != "unscheduled":
                        try:
                            self.clean_revision(pulse_job, jobs_model)
                            storeable_job_list.append(
                                self.transform(pulse_job))
                        except AttributeError:
                            logger.warn("Skipping job due to bad attribute",
                                        exc_info=1)

                try:
                    jobs_model.store_job_data(storeable_job_list)
                except DatasetNotFoundError:
                    logger.warn(
                        "Job with unsupported project: {}".format(project))
Exemple #9
0
def publish_job_action(project, action, job_id, requester):
    """
    Generic task to issue pulse notifications when jobs actions occur
    (retrigger/cancel)

    :param project str: The name of the project this action was requested for.
    :param action str: The type of action performed (retrigger/cancel/etc..)
    :param job_id str: The job id the action was requested for.
    :param requester str: The email address associated with the request.
    """
    newrelic.agent.add_custom_parameter("project", project)
    newrelic.agent.add_custom_parameter("action", action)
    newrelic.agent.add_custom_parameter("job_id", job_id)
    newrelic.agent.add_custom_parameter("requester", requester)
    publisher = pulse_connection.get_publisher()
    if not publisher:
        return

    from treeherder.model.derived.jobs import JobsModel

    with JobsModel(project) as jm:
        job = jm.get_job(job_id)[0]

        publisher.job_action(
            version=1,
            build_system_type=ReferenceDataSignatures.objects.values_list(
                'build_system_type',
                flat=True).get(signature=job['signature']),
            project=project,
            action=action,
            job_guid=job['job_guid'],
            # Job id is included for convenience as you need it in some cases
            # instead of job_guid...
            job_id=job['id'],
            requester=requester)
Exemple #10
0
def populate_performance_series(project, series_type, series_data):
    from treeherder.model.derived.jobs import JobsModel

    with JobsModel(project) as jm:
        for t_range in settings.TREEHERDER_PERF_SERIES_TIME_RANGES:
            for signature in series_data:
                jm.store_performance_series(t_range['seconds'], series_type,
                                            signature, series_data[signature])
Exemple #11
0
 def _lock_unlock():
     with JobsModel(test_project) as jm2:
         jm2.get_dhub().execute(
             proc='generic.locks.get_lock',
             placeholders=[FakePerfData.get_fake_lock_string()])
         time.sleep(1)
         jm2.get_dhub().execute(
             proc='generic.locks.release_lock',
             placeholders=[FakePerfData.get_fake_lock_string()])
    def _calculate_etag(view_instance, view_method,
                        request, args, kwargs):
        project, interval = (kwargs.get('project'),
                             request.QUERY_PARAMS.get('interval'))
        if project and interval:
            return cache.get(JobsModel.get_performance_series_cache_key(
                project, interval, hash=True))

        return None
Exemple #13
0
def calculate_durations(sample_window_seconds=21600, debug=False):
    from treeherder.model.derived.jobs import JobsModel

    projects = Repository.objects.filter(active_status='active').values_list('name', flat=True)

    for project in projects:

        with JobsModel(project) as jm:
            jm.calculate_durations(sample_window_seconds, debug)
Exemple #14
0
    def _calculate_etag(view_instance, view_method, request, args, kwargs):
        project, interval = (kwargs.get('project'),
                             request.QUERY_PARAMS.get('interval'))
        if project and interval:
            return cache.get(
                JobsModel.get_performance_series_cache_key(project,
                                                           interval,
                                                           hash=True))

        return None
Exemple #15
0
    def _update_failure_classification(self):
        # update the job classification
        from treeherder.model.derived.jobs import JobsModel
        with JobsModel(self.job.repository.name) as jm:
            jm.update_last_job_classification(self.job.project_specific_id)

        # if a manually filed job, update the autoclassification information
        if self.user:
            if self.failure_classification.name in [
                    "intermittent", "intermittent needs filing"]:
                failure_line = jm.get_manual_classification_line(
                    self.job.project_specific_id)
                if failure_line:
                    failure_line.update_autoclassification()
def _add_series(server_params, project, time_intervals, signature_hash,
                signature_props, mysql_debug, verbose):
    with JobsModel(project) as jm:
        jm.DEBUG = mysql_debug
        if verbose:
            print(signature_hash)

        jm.set_series_signature(signature_hash, signature_props)
        for time_interval in time_intervals:
            pc = PerfherderClient(protocol=server_params.scheme,
                                  host=server_params.netloc)
            series = pc.get_performance_series(project,
                                               signature_hash,
                                               time_interval=time_interval)
            jm.store_performance_series(time_interval, 'talos_data',
                                        str(signature_hash), series)
Exemple #17
0
def store_jobs(job_collections, chunk_size):
    errors = []
    for repository, jobs in job_collections.iteritems():
        with JobsModel(repository) as jm:
            for collection in jobs.get_chunks(chunk_size=chunk_size):
                try:
                    collection.validate()
                    jm.store_job_data(collection.get_collection_data())
                except Exception:
                    newrelic.agent.record_exception()
                    errors.append({
                        "project": repository,
                        "collection": "job",
                        "message": traceback.format_exc()
                    })

    if errors:
        raise common.CollectionNotStoredException(errors)
Exemple #18
0
def publish_resultset(project, ids):
    # If we don't have a publisher (because of missing configs), then we can't
    # publish any pulse messages. This is okay, local installs etc. doesn't
    # need to publish on pulse, and requiring a pulse user is adding more
    # overhead to an already large development setup process.
    publisher = pulse_connection.get_publisher()
    if not publisher:
        return

    from treeherder.model.derived.jobs import JobsModel

    with JobsModel(project) as jm:
        # Publish messages with new result-sets
        for entry in jm.get_result_set_list_by_ids(ids):
            repository = jm.refdata_model.get_repository_info(
                entry['repository_id'])

            if repository is None:
                return

            entry['repository_url'] = repository['url']

            # Don't expose these properties, they are internal, at least that's
            # what I think without documentation I have no clue... what any of
            # this is
            del entry['revisions']  # Not really internal, but too big
            del entry['repository_id']

            # Set required properties
            entry['version'] = 1
            entry['project'] = project
            # Property revision_hash should already be there, I suspect it is the
            # result-set identifier...

            # publish the data to pulse
            publisher.new_result_set(**entry)

            # Basically, I have no idea what context this runs and was inherently
            # unable to make kombu with or without pyamqp, etc. confirm-publish,
            # so we're stuck with this super ugly hack where we just close the
            # connection so that if the process context is destroyed then at least
            # messages will still get published... Well, assuming nothing goes
            # wrong, because we're not using confirm channels for publishing...
            publisher.connection.release()
Exemple #19
0
def jm(request):
    """ Give a test access to a JobsModel instance. """
    from django.conf import settings
    from treeherder.model.derived.jobs import JobsModel
    model = JobsModel.create(settings.DATABASES["default"]["TEST_NAME"])

    # patch in additional test-only procs on the datasources
    add_test_procs_file(
        model.get_dhub(),
        model.get_datasource().key,
        "jobs_test.json",
    )

    def fin():
        model.disconnect()

    request.addfinalizer(fin)

    return model
Exemple #20
0
def jm(request):
    """ Give a test access to a JobsModel instance. """
    from django.conf import settings
    from treeherder.model.derived.jobs import JobsModel
    model = JobsModel.create(settings.DATABASES["default"]["TEST_NAME"])

    # patch in additional test-only procs on the datasources
    add_test_procs_file(
        model.get_dhub(),
        model.get_datasource().key,
        "jobs_test.json",
    )

    def fin():
        model.disconnect()

    request.addfinalizer(fin)

    return model
Exemple #21
0
def process_objects(limit=None, project=None):
    """
    Process a number of objects from the objectstore
    and load them to the jobs store
    """
    from treeherder.model.derived.jobs import JobsModel

    # default limit to 100
    limit = limit or 100

    if project:
        projects_to_process = [project]
    else:
        projects_to_process = Datasource.objects.values_list(
            'project', flat=True).distinct()

    for project in projects_to_process:
        with JobsModel(project) as jm:
            jm.process_objects(limit)
Exemple #22
0
    def process_job_list(self, all_jobs_list):
        if not isinstance(all_jobs_list, list):
            all_jobs_list = [all_jobs_list]

        validated_jobs = self._get_validated_jobs_by_project(all_jobs_list)

        for project, job_list in validated_jobs.items():
            with JobsModel(project) as jobs_model:
                storeable_job_list = []
                for pulse_job in job_list:
                    if pulse_job["state"] != "unscheduled":
                        try:
                            storeable_job_list.append(
                                self.transform(pulse_job))
                        except AttributeError:
                            logger.warn("Skipping job due to bad attribute",
                                        exc_info=1)

                jobs_model.store_job_data(storeable_job_list)
Exemple #23
0
    def process(self, message_body, exchange):
        try:
            transformer = self.get_transformer_class(exchange)(message_body)
            repo = Repository.objects.get(url=transformer.repo_url,
                                          branch=transformer.branch,
                                          active_status="active")
            transformed_data = transformer.transform(repo.name)

            with JobsModel(repo.name) as jobs_model:
                logger.info("Storing resultset for {} {} {}".format(
                    repo.name, transformer.repo_url, transformer.branch))
                jobs_model.store_result_set_data([transformed_data])

        except ObjectDoesNotExist:
            newrelic.agent.record_custom_event("skip_unknown_repository",
                                               message_body["details"])
            logger.warn("Skipping unsupported repo: {} {}".format(
                transformer.repo_url, transformer.branch))
        except Exception as ex:
            newrelic.agent.record_exception(exc=ex)
            logger.exception("Error transforming resultset", exc_info=ex)
Exemple #24
0
    def _rewrite_data(self, project, mysql_debug):

        signature_mapping = {}
        extra_subtest_signatures = {}

        with JobsModel(project) as jm:
            jm.DEBUG = mysql_debug
            summary = jm.get_performance_series_summary(
                max(PerformanceTimeInterval.all_valid_time_intervals()))
            # first pass: rewrite non-summary tests
            for (signature_hash, signature_properties) in summary.iteritems():
                if self._signature_needs_rewriting(signature_properties,
                                                   signature_hash) and \
                        'subtest_signatures' not in signature_properties:
                    new_hash = self._rewrite_series(jm, signature_hash,
                                                    signature_properties, None,
                                                    None)
                    signature_mapping[signature_hash] = new_hash
                elif (not signature_properties.get('subtest_signatures')
                      and signature_properties.get('test')
                      not in Command.COUNTER_TESTS):
                    # in case this script got interrupted, keep track of
                    # subtest signatures which have already been converted
                    suitekey = self._get_suitekey(signature_properties)

                    if extra_subtest_signatures.get(suitekey):
                        extra_subtest_signatures[suitekey].append(
                            signature_hash)
                    else:
                        extra_subtest_signatures[suitekey] = [signature_hash]

            # second pass: rewrite summary tests
            for (signature_hash, signature_properties) in summary.iteritems():
                if (self._signature_needs_rewriting(signature_properties,
                                                    signature_hash)
                        and signature_properties.get('subtest_signatures')):
                    self._rewrite_series(jm, signature_hash,
                                         signature_properties,
                                         signature_mapping,
                                         extra_subtest_signatures)
Exemple #25
0
    def process_job_list(self, all_jobs_list):
        if not isinstance(all_jobs_list, list):
            all_jobs_list = [all_jobs_list]

        validated_jobs = self._get_validated_jobs_by_project(all_jobs_list)

        for project, job_list in validated_jobs.items():
            with JobsModel(project) as jobs_model:
                # todo: Continue using short revisions until Bug 1199364
                rs_lookup = jobs_model.get_revision_resultset_lookup(
                    [x["origin"]["revision"][:12] for x in job_list])
                storeable_job_list = []
                for pulse_job in job_list:
                    if pulse_job["state"] != "unscheduled":
                        try:
                            storeable_job_list.append(
                                self.transform(pulse_job, rs_lookup))
                        except AttributeError:
                            logger.warn("Skipping job due to bad attribute",
                                        exc_info=1)

                jobs_model.store_job_data(storeable_job_list)
Exemple #26
0
    def run(self, source_url, repository, changeset=None):

        # get the last object seen from cache. this will
        # reduce the number of pushes processed every time
        last_push_id = cache.get("{0}:last_push_id".format(repository))
        if not changeset and last_push_id:
            startid_url = "{}&startID={}".format(source_url, last_push_id)
            logger.info(
                "Extracted last push for '%s', '%s', from cache, "
                "attempting to get changes only from that point at: %s" %
                (repository, last_push_id, startid_url))
            # Use the cached ``last_push_id`` value (saved from the last time
            # this API was called) for this repo.  Use that value as the
            # ``startID`` to get all new pushes from that point forward.
            extracted_content = self.extract(startid_url)

            if extracted_content['lastpushid'] < last_push_id:
                # Push IDs from Mercurial are incremental.  If we cached a value
                # from one call to this API, and a subsequent call told us that
                # the ``lastpushid`` is LOWER than the one we have cached, then
                # the Mercurial IDs were reset.
                # In this circumstance, we can't rely on the cached id, so must
                # throw it out and get the latest 10 pushes.
                logger.warning(
                    ("Got a ``lastpushid`` value of {} lower than "
                     "the cached value of {} due to Mercurial repo reset.  "
                     "Getting latest changes for '{}' instead").format(
                         extracted_content['lastpushid'], last_push_id,
                         repository))
                cache.delete("{0}:last_push_id".format(repository))
                extracted_content = self.extract(source_url)
        else:
            if changeset:
                logger.info("Getting all pushes for '%s' corresponding to "
                            "changeset '%s'" % (repository, changeset))
                extracted_content = self.extract(source_url + "&changeset=" +
                                                 changeset)
            else:
                logger.warning("Unable to get last push from cache for '%s', "
                               "getting all pushes" % repository)
                extracted_content = self.extract(source_url)

        # ``pushes`` could be empty if there are no new ones since we last
        # fetched
        pushes = extracted_content['pushes']

        if not pushes:
            return None

        last_push_id = max(map(lambda x: int(x), pushes.keys()))
        last_push = pushes[str(last_push_id)]
        top_revision = last_push["changesets"][-1]["node"]
        # TODO: further remove the use of client types here
        transformed = self.transform(pushes, repository)

        errors = []
        with JobsModel(repository) as jm:
            for collection in transformed[repository].get_chunks(chunk_size=1):
                try:
                    collection.validate()
                    jm.store_result_set_data(collection.get_collection_data())
                except Exception:
                    newrelic.agent.record_exception()
                    errors.append({
                        "project": repository,
                        "collection": "result_set",
                        "message": traceback.format_exc()
                    })

        if errors:
            raise CollectionNotStoredException(errors)

        if not changeset:
            # only cache the last push if we're not fetching a specific
            # changeset
            cache.set("{0}:last_push_id".format(repository), last_push_id)

        return top_revision