def _load_job(repository, job_datum, push_id, lower_tier_signatures): """ Load a job into the treeherder database If the job is a ``retry`` the ``job_guid`` will have a special suffix on it. But the matching ``pending``/``running`` job will not. So we append the suffixed ``job_guid`` to ``retry_job_guids`` so that we can update the job_id_lookup later with the non-suffixed ``job_guid`` (root ``job_guid``). Then we can find the right ``pending``/``running`` job and update it with this ``retry`` job. """ build_platform, _ = BuildPlatform.objects.get_or_create( os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('build_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('build_platform', {}).get('architecture', 'unknown')) machine_platform, _ = MachinePlatform.objects.get_or_create( os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('machine_platform', {}).get('architecture', 'unknown')) option_names = job_datum.get('option_collection', []) option_collection_hash = OptionCollection.calculate_hash( option_names) if not OptionCollection.objects.filter( option_collection_hash=option_collection_hash).exists(): # in the unlikely event that we haven't seen this set of options # before, add the appropriate database rows options = [] for option_name in option_names: option, _ = Option.objects.get_or_create(name=option_name) options.append(option) for option in options: OptionCollection.objects.create( option_collection_hash=option_collection_hash, option=option) machine, _ = Machine.objects.get_or_create( name=job_datum.get('machine', 'unknown')) # if a job with this symbol and name exists, always # use its default group (even if that group is different # from that specified) job_type, _ = JobType.objects.get_or_create( symbol=job_datum.get('job_symbol') or 'unknown', name=job_datum.get('name') or 'unknown') if job_type.job_group: job_group = job_type.job_group else: job_group, _ = JobGroup.objects.get_or_create( name=job_datum.get('group_name') or 'unknown', symbol=job_datum.get('group_symbol') or 'unknown') job_type.job_group = job_group job_type.save(update_fields=['job_group']) product_name = job_datum.get('product_name', 'unknown') if len(product_name.strip()) == 0: product_name = 'unknown' product, _ = Product.objects.get_or_create(name=product_name) job_guid = job_datum['job_guid'] job_guid = job_guid[0:50] who = job_datum.get('who') or 'unknown' who = who[0:50] reason = job_datum.get('reason') or 'unknown' reason = reason[0:125] state = job_datum.get('state') or 'unknown' state = state[0:25] build_system_type = job_datum.get('build_system_type', 'buildbot') reference_data_name = job_datum.get('reference_data_name', None) default_failure_classification = FailureClassification.objects.get( name='not classified') sh = sha1() sh.update(''.join( map(lambda x: str(x), [build_system_type, repository.name, build_platform.os_name, build_platform.platform, build_platform.architecture, machine_platform.os_name, machine_platform.platform, machine_platform.architecture, job_group.name, job_group.symbol, job_type.name, job_type.symbol, option_collection_hash, reference_data_name]))) signature_hash = sh.hexdigest() # Should be the buildername in the case of buildbot (if not provided # default to using the signature hash) if not reference_data_name: reference_data_name = signature_hash signature, created = ReferenceDataSignatures.objects.get_or_create( name=reference_data_name, signature=signature_hash, build_system_type=build_system_type, repository=repository.name, defaults={ 'first_submission_timestamp': time.time(), 'build_os_name': build_platform.os_name, 'build_platform': build_platform.platform, 'build_architecture': build_platform.architecture, 'machine_os_name': machine_platform.os_name, 'machine_platform': machine_platform.platform, 'machine_architecture': machine_platform.architecture, 'job_group_name': job_group.name, 'job_group_symbol': job_group.symbol, 'job_type_name': job_type.name, 'job_type_symbol': job_type.symbol, 'option_collection_hash': option_collection_hash }) if created: # A new ReferenceDataSignature has been added, so we need # to reload lower tier exclusions lower_tier_signatures = _get_lower_tier_signatures(repository) tier = job_datum.get('tier') or 1 # job tier signatures override the setting from the job structure # Check the signatures list for any supported lower tiers that have # an active exclusion profile. result = job_datum.get('result', 'unknown') # As stated elsewhere, a job will end up in the lowest tier where its # signature belongs. So if a signature is in Tier-2 and Tier-3, it # will end up in 3. for tier_info in lower_tier_signatures: if signature_hash in tier_info["signatures"]: tier = tier_info["tier"] try: duration = JobDuration.objects.values_list( 'average_duration', flat=True).get( repository=repository, signature=signature_hash) except JobDuration.DoesNotExist: duration = 0 submit_time = datetime.fromtimestamp( _get_number(job_datum.get('submit_timestamp'))) start_time = datetime.fromtimestamp( _get_number(job_datum.get('start_timestamp'))) end_time = datetime.fromtimestamp( _get_number(job_datum.get('end_timestamp'))) # first, try to create the job with the given guid (if it doesn't # exist yet) job_guid_root = get_guid_root(job_guid) if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists(): # this could theoretically throw an exception if we were processing # several updates simultaneously, but that should never happen -- # and if it does it's better just to error out Job.objects.create( guid=job_guid, repository=repository, signature=signature, build_platform=build_platform, machine_platform=machine_platform, machine=machine, option_collection_hash=option_collection_hash, job_type=job_type, product=product, failure_classification=default_failure_classification, who=who, reason=reason, result=result, state=state, tier=tier, submit_time=submit_time, start_time=start_time, end_time=end_time, last_modified=datetime.now(), running_eta=duration, push_id=push_id) try: job = Job.objects.get(guid=job_guid_root) except ObjectDoesNotExist: job = Job.objects.get(guid=job_guid) # add taskcluster metadata if applicable if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]): try: TaskclusterMetadata.objects.create( job=job, task_id=job_datum['taskcluster_task_id'], retry_id=job_datum['taskcluster_retry_id']) except IntegrityError: pass # Update job with any data that would have changed Job.objects.filter(id=job.id).update( guid=job_guid, signature=signature, build_platform=build_platform, machine_platform=machine_platform, machine=machine, option_collection_hash=option_collection_hash, job_type=job_type, product=product, failure_classification=default_failure_classification, who=who, reason=reason, result=result, state=state, tier=tier, submit_time=submit_time, start_time=start_time, end_time=end_time, last_modified=datetime.now(), running_eta=duration, push_id=push_id) artifacts = job_datum.get('artifacts', []) has_text_log_summary = any(x for x in artifacts if x['name'] == 'text_log_summary') if artifacts: artifacts = serialize_artifact_json_blobs(artifacts) # need to add job guid to artifacts, since they likely weren't # present in the beginning for artifact in artifacts: if not all(k in artifact for k in ("name", "type", "blob")): raise ValueError( "Artifact missing properties: {}".format(artifact)) # Ensure every artifact has a ``job_guid`` value. # It is legal to submit an artifact that doesn't have a # ``job_guid`` value. But, if missing, it should inherit that # value from the job itself. if "job_guid" not in artifact: artifact["job_guid"] = job_guid store_job_artifacts(artifacts) log_refs = job_datum.get('log_references', []) job_logs = [] if log_refs: for log in log_refs: name = log.get('name') or 'unknown' name = name[0:50] url = log.get('url') or 'unknown' url = url[0:255] # this indicates that a summary artifact was submitted with # this job that corresponds to the buildbot_text log url. # Therefore, the log does not need parsing. So we should # ensure that it's marked as already parsed. if has_text_log_summary and name == 'buildbot_text': parse_status = JobLog.PARSED else: parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES]) mapped_status = parse_status_map.get( log.get('parse_status')) if mapped_status: parse_status = mapped_status else: parse_status = JobLog.PENDING jl, _ = JobLog.objects.get_or_create( job=job, name=name, url=url, defaults={ 'status': parse_status }) job_logs.append(jl) _schedule_log_parsing(job, job_logs, result) return (job_guid, signature_hash)
def _load_job(repository, job_datum, push_id, lower_tier_signatures): """ Load a job into the treeherder database If the job is a ``retry`` the ``job_guid`` will have a special suffix on it. But the matching ``pending``/``running`` job will not. So we append the suffixed ``job_guid`` to ``retry_job_guids`` so that we can update the job_id_lookup later with the non-suffixed ``job_guid`` (root ``job_guid``). Then we can find the right ``pending``/``running`` job and update it with this ``retry`` job. """ build_platform, _ = BuildPlatform.objects.get_or_create( os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('build_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('build_platform', {}).get('architecture', 'unknown')) machine_platform, _ = MachinePlatform.objects.get_or_create( os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('machine_platform', {}).get('architecture', 'unknown')) option_names = job_datum.get('option_collection', []) option_collection_hash = OptionCollection.calculate_hash(option_names) if not OptionCollection.objects.filter( option_collection_hash=option_collection_hash).exists(): # in the unlikely event that we haven't seen this set of options # before, add the appropriate database rows options = [] for option_name in option_names: option, _ = Option.objects.get_or_create(name=option_name) options.append(option) for option in options: OptionCollection.objects.create( option_collection_hash=option_collection_hash, option=option) machine, _ = Machine.objects.get_or_create( name=job_datum.get('machine', 'unknown')) job_type, _ = JobType.objects.get_or_create( symbol=job_datum.get('job_symbol') or 'unknown', name=job_datum.get('name') or 'unknown') job_group, _ = JobGroup.objects.get_or_create( name=job_datum.get('group_name') or 'unknown', symbol=job_datum.get('group_symbol') or 'unknown') product_name = job_datum.get('product_name', 'unknown') if len(product_name.strip()) == 0: product_name = 'unknown' product, _ = Product.objects.get_or_create(name=product_name) job_guid = job_datum['job_guid'] job_guid = job_guid[0:50] who = job_datum.get('who') or 'unknown' who = who[0:50] reason = job_datum.get('reason') or 'unknown' reason = reason[0:125] state = job_datum.get('state') or 'unknown' state = state[0:25] build_system_type = job_datum.get('build_system_type', 'buildbot') reference_data_name = job_datum.get('reference_data_name', None) default_failure_classification = FailureClassification.objects.get( name='not classified') sh = sha1() sh.update(''.join( map(lambda x: str(x), [ build_system_type, repository.name, build_platform.os_name, build_platform.platform, build_platform.architecture, machine_platform.os_name, machine_platform.platform, machine_platform.architecture, job_group.name, job_group.symbol, job_type.name, job_type.symbol, option_collection_hash, reference_data_name ]))) signature_hash = sh.hexdigest() # Should be the buildername in the case of buildbot (if not provided # default to using the signature hash) if not reference_data_name: reference_data_name = signature_hash signature, created = ReferenceDataSignatures.objects.get_or_create( name=reference_data_name, signature=signature_hash, build_system_type=build_system_type, repository=repository.name, defaults={ 'first_submission_timestamp': time.time(), 'build_os_name': build_platform.os_name, 'build_platform': build_platform.platform, 'build_architecture': build_platform.architecture, 'machine_os_name': machine_platform.os_name, 'machine_platform': machine_platform.platform, 'machine_architecture': machine_platform.architecture, 'job_group_name': job_group.name, 'job_group_symbol': job_group.symbol, 'job_type_name': job_type.name, 'job_type_symbol': job_type.symbol, 'option_collection_hash': option_collection_hash }) tier = job_datum.get('tier') or 1 result = job_datum.get('result', 'unknown') # Job tier signatures override the setting from the job structure # Check the signatures list for any supported lower tiers that should # have an overridden tier. if lower_tier_signatures and signature_hash in lower_tier_signatures: tier = lower_tier_signatures[signature_hash] submit_time = datetime.fromtimestamp( _get_number(job_datum.get('submit_timestamp'))) start_time = datetime.fromtimestamp( _get_number(job_datum.get('start_timestamp'))) end_time = datetime.fromtimestamp( _get_number(job_datum.get('end_timestamp'))) # first, try to create the job with the given guid (if it doesn't # exist yet) job_guid_root = get_guid_root(job_guid) if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists(): # This could theoretically already have been created by another process # that is running updates simultaneously. So just attempt to create # it, but allow it to skip if it's the same guid. The odds are # extremely high that this is a pending and running job that came in # quick succession and are being processed by two different workers. Job.objects.get_or_create(guid=job_guid, defaults={ "repository": repository, "signature": signature, "build_platform": build_platform, "machine_platform": machine_platform, "machine": machine, "option_collection_hash": option_collection_hash, "job_type": job_type, "job_group": job_group, "product": product, "failure_classification": default_failure_classification, "who": who, "reason": reason, "result": result, "state": state, "tier": tier, "submit_time": submit_time, "start_time": start_time, "end_time": end_time, "last_modified": datetime.now(), "push_id": push_id }) # Can't just use the ``job`` we would get from the ``get_or_create`` # because we need to try the job_guid_root instance first for update, # rather than a possible retry job instance. try: job = Job.objects.get(guid=job_guid_root) except ObjectDoesNotExist: job = Job.objects.get(guid=job_guid) # add taskcluster metadata if applicable if all([ k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id'] ]): try: TaskclusterMetadata.objects.create( job=job, task_id=job_datum['taskcluster_task_id'], retry_id=job_datum['taskcluster_retry_id']) except IntegrityError: pass # Update job with any data that would have changed Job.objects.filter(id=job.id).update( guid=job_guid, signature=signature, build_platform=build_platform, machine_platform=machine_platform, machine=machine, option_collection_hash=option_collection_hash, job_type=job_type, job_group=job_group, product=product, failure_classification=default_failure_classification, who=who, reason=reason, result=result, state=state, tier=tier, submit_time=submit_time, start_time=start_time, end_time=end_time, last_modified=datetime.now(), push_id=push_id) artifacts = job_datum.get('artifacts', []) has_text_log_summary = any(x for x in artifacts if x['name'] == 'text_log_summary') if artifacts: artifacts = serialize_artifact_json_blobs(artifacts) # need to add job guid to artifacts, since they likely weren't # present in the beginning for artifact in artifacts: if not all(k in artifact for k in ("name", "type", "blob")): raise ValueError( "Artifact missing properties: {}".format(artifact)) # Ensure every artifact has a ``job_guid`` value. # It is legal to submit an artifact that doesn't have a # ``job_guid`` value. But, if missing, it should inherit that # value from the job itself. if "job_guid" not in artifact: artifact["job_guid"] = job_guid store_job_artifacts(artifacts) log_refs = job_datum.get('log_references', []) job_logs = [] if log_refs: for log in log_refs: name = log.get('name') or 'unknown' name = name[0:50] url = log.get('url') or 'unknown' url = url[0:255] # this indicates that a summary artifact was submitted with # this job that corresponds to the buildbot_text log url. # Therefore, the log does not need parsing. So we should # ensure that it's marked as already parsed. if has_text_log_summary and name == 'buildbot_text': parse_status = JobLog.PARSED else: parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES]) mapped_status = parse_status_map.get(log.get('parse_status')) if mapped_status: parse_status = mapped_status else: parse_status = JobLog.PENDING jl, _ = JobLog.objects.get_or_create( job=job, name=name, url=url, defaults={'status': parse_status}) job_logs.append(jl) _schedule_log_parsing(job, job_logs, result) return (job_guid, signature_hash)
def _load_job(repository, job_datum, push_id, lower_tier_signatures): """ Load a job into the treeherder database If the job is a ``retry`` the ``job_guid`` will have a special suffix on it. But the matching ``pending``/``running`` job will not. So we append the suffixed ``job_guid`` to ``retry_job_guids`` so that we can update the job_id_lookup later with the non-suffixed ``job_guid`` (root ``job_guid``). Then we can find the right ``pending``/``running`` job and update it with this ``retry`` job. """ build_platform, _ = BuildPlatform.objects.get_or_create( os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('build_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('build_platform', {}).get('architecture', 'unknown')) machine_platform, _ = MachinePlatform.objects.get_or_create( os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('machine_platform', {}).get('architecture', 'unknown')) option_names = job_datum.get('option_collection', []) option_collection_hash = OptionCollection.calculate_hash( option_names) if not OptionCollection.objects.filter( option_collection_hash=option_collection_hash).exists(): # in the unlikely event that we haven't seen this set of options # before, add the appropriate database rows options = [] for option_name in option_names: option, _ = Option.objects.get_or_create(name=option_name) options.append(option) for option in options: OptionCollection.objects.create( option_collection_hash=option_collection_hash, option=option) machine, _ = Machine.objects.get_or_create( name=job_datum.get('machine', 'unknown')) job_type, _ = JobType.objects.get_or_create( symbol=job_datum.get('job_symbol') or 'unknown', name=job_datum.get('name') or 'unknown') job_group, _ = JobGroup.objects.get_or_create( name=job_datum.get('group_name') or 'unknown', symbol=job_datum.get('group_symbol') or 'unknown') product_name = job_datum.get('product_name', 'unknown') if len(product_name.strip()) == 0: product_name = 'unknown' product, _ = Product.objects.get_or_create(name=product_name) job_guid = job_datum['job_guid'] job_guid = job_guid[0:50] who = job_datum.get('who') or 'unknown' who = who[0:50] reason = job_datum.get('reason') or 'unknown' reason = reason[0:125] state = job_datum.get('state') or 'unknown' state = state[0:25] build_system_type = job_datum.get('build_system_type', 'buildbot') reference_data_name = job_datum.get('reference_data_name', None) default_failure_classification = FailureClassification.objects.get( name='not classified') sh = sha1() sh.update(''.join( map(lambda x: str(x), [build_system_type, repository.name, build_platform.os_name, build_platform.platform, build_platform.architecture, machine_platform.os_name, machine_platform.platform, machine_platform.architecture, job_group.name, job_group.symbol, job_type.name, job_type.symbol, option_collection_hash, reference_data_name]))) signature_hash = sh.hexdigest() # Should be the buildername in the case of buildbot (if not provided # default to using the signature hash) if not reference_data_name: reference_data_name = signature_hash signature, created = ReferenceDataSignatures.objects.get_or_create( name=reference_data_name, signature=signature_hash, build_system_type=build_system_type, repository=repository.name, defaults={ 'first_submission_timestamp': time.time(), 'build_os_name': build_platform.os_name, 'build_platform': build_platform.platform, 'build_architecture': build_platform.architecture, 'machine_os_name': machine_platform.os_name, 'machine_platform': machine_platform.platform, 'machine_architecture': machine_platform.architecture, 'job_group_name': job_group.name, 'job_group_symbol': job_group.symbol, 'job_type_name': job_type.name, 'job_type_symbol': job_type.symbol, 'option_collection_hash': option_collection_hash }) tier = job_datum.get('tier') or 1 result = job_datum.get('result', 'unknown') # Job tier signatures override the setting from the job structure # Check the signatures list for any supported lower tiers that should # have an overridden tier. if lower_tier_signatures and signature_hash in lower_tier_signatures: tier = lower_tier_signatures[signature_hash] submit_time = datetime.fromtimestamp( _get_number(job_datum.get('submit_timestamp'))) start_time = datetime.fromtimestamp( _get_number(job_datum.get('start_timestamp'))) end_time = datetime.fromtimestamp( _get_number(job_datum.get('end_timestamp'))) # first, try to create the job with the given guid (if it doesn't # exist yet) job_guid_root = get_guid_root(job_guid) if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists(): # This could theoretically already have been created by another process # that is running updates simultaneously. So just attempt to create # it, but allow it to skip if it's the same guid. The odds are # extremely high that this is a pending and running job that came in # quick succession and are being processed by two different workers. Job.objects.get_or_create( guid=job_guid, defaults={ "repository": repository, "signature": signature, "build_platform": build_platform, "machine_platform": machine_platform, "machine": machine, "option_collection_hash": option_collection_hash, "job_type": job_type, "job_group": job_group, "product": product, "failure_classification": default_failure_classification, "who": who, "reason": reason, "result": result, "state": state, "tier": tier, "submit_time": submit_time, "start_time": start_time, "end_time": end_time, "last_modified": datetime.now(), "push_id": push_id } ) # Can't just use the ``job`` we would get from the ``get_or_create`` # because we need to try the job_guid_root instance first for update, # rather than a possible retry job instance. try: job = Job.objects.get(guid=job_guid_root) except ObjectDoesNotExist: job = Job.objects.get(guid=job_guid) # add taskcluster metadata if applicable if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]): try: TaskclusterMetadata.objects.create( job=job, task_id=job_datum['taskcluster_task_id'], retry_id=job_datum['taskcluster_retry_id']) except IntegrityError: pass # Update job with any data that would have changed Job.objects.filter(id=job.id).update( guid=job_guid, signature=signature, build_platform=build_platform, machine_platform=machine_platform, machine=machine, option_collection_hash=option_collection_hash, job_type=job_type, job_group=job_group, product=product, failure_classification=default_failure_classification, who=who, reason=reason, result=result, state=state, tier=tier, submit_time=submit_time, start_time=start_time, end_time=end_time, last_modified=datetime.now(), push_id=push_id) artifacts = job_datum.get('artifacts', []) has_text_log_summary = any(x for x in artifacts if x['name'] == 'text_log_summary') if artifacts: artifacts = serialize_artifact_json_blobs(artifacts) # need to add job guid to artifacts, since they likely weren't # present in the beginning for artifact in artifacts: if not all(k in artifact for k in ("name", "type", "blob")): raise ValueError( "Artifact missing properties: {}".format(artifact)) # Ensure every artifact has a ``job_guid`` value. # It is legal to submit an artifact that doesn't have a # ``job_guid`` value. But, if missing, it should inherit that # value from the job itself. if "job_guid" not in artifact: artifact["job_guid"] = job_guid store_job_artifacts(artifacts) log_refs = job_datum.get('log_references', []) job_logs = [] if log_refs: for log in log_refs: name = log.get('name') or 'unknown' name = name[0:50] url = log.get('url') or 'unknown' url = url[0:255] # this indicates that a summary artifact was submitted with # this job that corresponds to the buildbot_text log url. # Therefore, the log does not need parsing. So we should # ensure that it's marked as already parsed. if has_text_log_summary and name == 'buildbot_text': parse_status = JobLog.PARSED else: parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES]) mapped_status = parse_status_map.get( log.get('parse_status')) if mapped_status: parse_status = mapped_status else: parse_status = JobLog.PENDING jl, _ = JobLog.objects.get_or_create( job=job, name=name, url=url, defaults={ 'status': parse_status }) job_logs.append(jl) _schedule_log_parsing(job, job_logs, result) return (job_guid, signature_hash)
def _load_job(repository, job_datum, push_id): """ Load a job into the treeherder database If the job is a ``retry`` the ``job_guid`` will have a special suffix on it. But the matching ``pending``/``running`` job will not. So we append the suffixed ``job_guid`` to ``retry_job_guids`` so that we can update the job_id_lookup later with the non-suffixed ``job_guid`` (root ``job_guid``). Then we can find the right ``pending``/``running`` job and update it with this ``retry`` job. """ build_platform, _ = BuildPlatform.objects.get_or_create( os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('build_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('build_platform', {}).get('architecture', 'unknown'), ) machine_platform, _ = MachinePlatform.objects.get_or_create( os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('machine_platform', {}).get('architecture', 'unknown'), ) option_names = job_datum.get('option_collection', []) option_collection_hash = OptionCollection.calculate_hash(option_names) if not OptionCollection.objects.filter( option_collection_hash=option_collection_hash).exists(): # in the unlikely event that we haven't seen this set of options # before, add the appropriate database rows options = [] for option_name in option_names: option, _ = Option.objects.get_or_create(name=option_name) options.append(option) for option in options: OptionCollection.objects.create( option_collection_hash=option_collection_hash, option=option) machine, _ = Machine.objects.get_or_create( name=job_datum.get('machine', 'unknown')) job_type, _ = JobType.objects.get_or_create( symbol=job_datum.get('job_symbol') or 'unknown', name=job_datum.get('name') or 'unknown') job_group, _ = JobGroup.objects.get_or_create( name=job_datum.get('group_name') or 'unknown', symbol=job_datum.get('group_symbol') or 'unknown', ) product_name = job_datum.get('product_name', 'unknown') if not product_name.strip(): product_name = 'unknown' product, _ = Product.objects.get_or_create(name=product_name) job_guid = job_datum['job_guid'] job_guid = job_guid[0:50] who = job_datum.get('who') or 'unknown' who = who[0:50] reason = job_datum.get('reason') or 'unknown' reason = reason[0:125] state = job_datum.get('state') or 'unknown' state = state[0:25] build_system_type = job_datum.get('build_system_type', 'buildbot') reference_data_name = job_datum.get('reference_data_name', None) default_failure_classification = FailureClassification.objects.get( name='not classified') sh = sha1() sh.update(''.join( map( str, [ build_system_type, repository.name, build_platform.os_name, build_platform.platform, build_platform.architecture, machine_platform.os_name, machine_platform.platform, machine_platform.architecture, job_group.name, job_group.symbol, job_type.name, job_type.symbol, option_collection_hash, reference_data_name, ], )).encode('utf-8')) signature_hash = sh.hexdigest() # Should be the buildername in the case of buildbot (if not provided # default to using the signature hash) if not reference_data_name: reference_data_name = signature_hash signature, _ = ReferenceDataSignatures.objects.get_or_create( name=reference_data_name, signature=signature_hash, build_system_type=build_system_type, repository=repository.name, defaults={ 'first_submission_timestamp': time.time(), 'build_os_name': build_platform.os_name, 'build_platform': build_platform.platform, 'build_architecture': build_platform.architecture, 'machine_os_name': machine_platform.os_name, 'machine_platform': machine_platform.platform, 'machine_architecture': machine_platform.architecture, 'job_group_name': job_group.name, 'job_group_symbol': job_group.symbol, 'job_type_name': job_type.name, 'job_type_symbol': job_type.symbol, 'option_collection_hash': option_collection_hash, }, ) tier = job_datum.get('tier') or 1 result = job_datum.get('result', 'unknown') submit_time = datetime.fromtimestamp( _get_number(job_datum.get('submit_timestamp'))) start_time = datetime.fromtimestamp( _get_number(job_datum.get('start_timestamp'))) end_time = datetime.fromtimestamp( _get_number(job_datum.get('end_timestamp'))) # first, try to create the job with the given guid (if it doesn't # exist yet) job_guid_root = get_guid_root(job_guid) if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists(): # This could theoretically already have been created by another process # that is running updates simultaneously. So just attempt to create # it, but allow it to skip if it's the same guid. The odds are # extremely high that this is a pending and running job that came in # quick succession and are being processed by two different workers. Job.objects.get_or_create( guid=job_guid, defaults={ "repository": repository, "signature": signature, "build_platform": build_platform, "machine_platform": machine_platform, "machine": machine, "option_collection_hash": option_collection_hash, "job_type": job_type, "job_group": job_group, "product": product, "failure_classification": default_failure_classification, "who": who, "reason": reason, "result": result, "state": state, "tier": tier, "submit_time": submit_time, "start_time": start_time, "end_time": end_time, "last_modified": datetime.now(), "push_id": push_id, }, ) # Can't just use the ``job`` we would get from the ``get_or_create`` # because we need to try the job_guid_root instance first for update, # rather than a possible retry job instance. try: job = Job.objects.get(guid=job_guid_root) except ObjectDoesNotExist: job = Job.objects.get(guid=job_guid) # add taskcluster metadata if applicable if all([ k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id'] ]): try: TaskclusterMetadata.objects.create( job=job, task_id=job_datum['taskcluster_task_id'], retry_id=job_datum['taskcluster_retry_id'], ) except IntegrityError: pass # Update job with any data that would have changed Job.objects.filter(id=job.id).update( guid=job_guid, signature=signature, build_platform=build_platform, machine_platform=machine_platform, machine=machine, option_collection_hash=option_collection_hash, job_type=job_type, job_group=job_group, product=product, result=result, state=state, tier=tier, submit_time=submit_time, start_time=start_time, end_time=end_time, last_modified=datetime.now(), push_id=push_id, ) log_refs = job_datum.get('log_references', []) job_logs = [] if log_refs: for log in log_refs: name = log.get('name') or 'unknown' name = name[0:50] url = log.get('url') or 'unknown' url = url[0:255] parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES]) mapped_status = parse_status_map.get(log.get('parse_status')) if mapped_status: parse_status = mapped_status else: parse_status = JobLog.PENDING jl, _ = JobLog.objects.get_or_create( job=job, name=name, url=url, defaults={'status': parse_status}) job_logs.append(jl) _schedule_log_parsing(job, job_logs, result, repository) return job_guid
def _load_job(repository, job_datum, push_id, lower_tier_signatures): """ Load a job into the treeherder database If the job is a ``retry`` the ``job_guid`` will have a special suffix on it. But the matching ``pending``/``running`` job will not. So we append the suffixed ``job_guid`` to ``retry_job_guids`` so that we can update the job_id_lookup later with the non-suffixed ``job_guid`` (root ``job_guid``). Then we can find the right ``pending``/``running`` job and update it with this ``retry`` job. """ build_platform, _ = BuildPlatform.objects.get_or_create( os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('build_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('build_platform', {}).get('architecture', 'unknown')) machine_platform, _ = MachinePlatform.objects.get_or_create( os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('machine_platform', {}).get('architecture', 'unknown')) option_names = job_datum.get('option_collection', []) option_collection_hash = OptionCollection.calculate_hash( option_names) if not OptionCollection.objects.filter( option_collection_hash=option_collection_hash).exists(): # in the unlikely event that we haven't seen this set of options # before, add the appropriate database rows options = [] for option_name in option_names: option, _ = Option.objects.get_or_create(name=option_name) options.append(option) for option in options: OptionCollection.objects.create( option_collection_hash=option_collection_hash, option=option) machine, _ = Machine.objects.get_or_create( name=job_datum.get('machine', 'unknown')) # if a job with this symbol and name exists, always # use its default group (even if that group is different # from that specified) job_type, _ = JobType.objects.get_or_create( symbol=job_datum.get('job_symbol') or 'unknown', name=job_datum.get('name') or 'unknown') if job_type.job_group: job_group = job_type.job_group else: job_group, _ = JobGroup.objects.get_or_create( name=job_datum.get('group_name') or 'unknown', symbol=job_datum.get('group_symbol') or 'unknown') job_type.job_group = job_group job_type.save(update_fields=['job_group']) product_name = job_datum.get('product_name', 'unknown') if len(product_name.strip()) == 0: product_name = 'unknown' product, _ = Product.objects.get_or_create(name=product_name) job_guid = job_datum['job_guid'] job_guid = job_guid[0:50] who = job_datum.get('who') or 'unknown' who = who[0:50] reason = job_datum.get('reason') or 'unknown' reason = reason[0:125] state = job_datum.get('state') or 'unknown' state = state[0:25] build_system_type = job_datum.get('build_system_type', 'buildbot') reference_data_name = job_datum.get('reference_data_name', None) default_failure_classification = FailureClassification.objects.get( name='not classified') sh = sha1() sh.update(''.join( map(lambda x: str(x), [build_system_type, repository.name, build_platform.os_name, build_platform.platform, build_platform.architecture, machine_platform.os_name, machine_platform.platform, machine_platform.architecture, job_group.name, job_group.symbol, job_type.name, job_type.symbol, option_collection_hash, reference_data_name]))) signature_hash = sh.hexdigest() # Should be the buildername in the case of buildbot (if not provided # default to using the signature hash) if not reference_data_name: reference_data_name = signature_hash signature, created = ReferenceDataSignatures.objects.get_or_create( name=reference_data_name, signature=signature_hash, build_system_type=build_system_type, repository=repository.name, defaults={ 'first_submission_timestamp': time.time(), 'build_os_name': build_platform.os_name, 'build_platform': build_platform.platform, 'build_architecture': build_platform.architecture, 'machine_os_name': machine_platform.os_name, 'machine_platform': machine_platform.platform, 'machine_architecture': machine_platform.architecture, 'job_group_name': job_group.name, 'job_group_symbol': job_group.symbol, 'job_type_name': job_type.name, 'job_type_symbol': job_type.symbol, 'option_collection_hash': option_collection_hash }) if created: # A new ReferenceDataSignature has been added, so we need # to reload lower tier exclusions lower_tier_signatures = _get_lower_tier_signatures(repository) tier = job_datum.get('tier') or 1 # job tier signatures override the setting from the job structure # Check the signatures list for any supported lower tiers that have # an active exclusion profile. result = job_datum.get('result', 'unknown') # As stated elsewhere, a job will end up in the lowest tier where its # signature belongs. So if a signature is in Tier-2 and Tier-3, it # will end up in 3. for tier_info in lower_tier_signatures: if signature_hash in tier_info["signatures"]: tier = tier_info["tier"] try: duration = JobDuration.objects.values_list( 'average_duration', flat=True).get( repository=repository, signature=signature_hash) except JobDuration.DoesNotExist: duration = 0 submit_time = datetime.fromtimestamp( _get_number(job_datum.get('submit_timestamp'))) start_time = datetime.fromtimestamp( _get_number(job_datum.get('start_timestamp'))) end_time = datetime.fromtimestamp( _get_number(job_datum.get('end_timestamp'))) # first, try to create the job with the given guid (if it doesn't # exist yet) job_guid_root = get_guid_root(job_guid) if not Job.objects.filter(guid__in=[job_guid, job_guid_root]).exists(): # this could theoretically throw an exception if we were processing # several updates simultaneously, but that should never happen -- # and if it does it's better just to error out Job.objects.create( guid=job_guid, repository=repository, signature=signature, build_platform=build_platform, machine_platform=machine_platform, machine=machine, option_collection_hash=option_collection_hash, job_type=job_type, product=product, failure_classification=default_failure_classification, who=who, reason=reason, result=result, state=state, tier=tier, submit_time=submit_time, start_time=start_time, end_time=end_time, last_modified=datetime.now(), running_eta=duration, push_id=push_id) # if the job was pending, there's nothing more to do here # (pending jobs have no artifacts, and we would have just created # it) if state == 'pending': return (job_guid, signature_hash) # update job (in the case of a buildbot retrigger, we will # get the root object and update that to a retry) try: job = Job.objects.get(guid=job_guid_root) except ObjectDoesNotExist: job = Job.objects.get(guid=job_guid) Job.objects.filter(id=job.id).update( guid=job_guid, signature=signature, build_platform=build_platform, machine_platform=machine_platform, machine=machine, option_collection_hash=option_collection_hash, job_type=job_type, product=product, failure_classification=default_failure_classification, who=who, reason=reason, result=result, state=state, tier=tier, submit_time=submit_time, start_time=start_time, end_time=end_time, last_modified=datetime.now(), running_eta=duration, push_id=push_id) if all([k in job_datum for k in ['taskcluster_task_id', 'taskcluster_retry_id']]): try: TaskclusterMetadata.objects.create( job=job, task_id=job_datum['taskcluster_task_id'], retry_id=job_datum['taskcluster_retry_id']) except IntegrityError: pass artifacts = job_datum.get('artifacts', []) has_text_log_summary = any(x for x in artifacts if x['name'] == 'text_log_summary') if artifacts: artifacts = serialize_artifact_json_blobs(artifacts) # need to add job guid to artifacts, since they likely weren't # present in the beginning for artifact in artifacts: if not all(k in artifact for k in ("name", "type", "blob")): raise ValueError( "Artifact missing properties: {}".format(artifact)) # Ensure every artifact has a ``job_guid`` value. # It is legal to submit an artifact that doesn't have a # ``job_guid`` value. But, if missing, it should inherit that # value from the job itself. if "job_guid" not in artifact: artifact["job_guid"] = job_guid store_job_artifacts(artifacts) log_refs = job_datum.get('log_references', []) if log_refs: for log in log_refs: name = log.get('name') or 'unknown' name = name[0:50] url = log.get('url') or 'unknown' url = url[0:255] # this indicates that a summary artifact was submitted with # this job that corresponds to the buildbot_text log url. # Therefore, the log does not need parsing. So we should # ensure that it's marked as already parsed. if has_text_log_summary and name == 'buildbot_text': parse_status = JobLog.PARSED else: parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES]) mapped_status = parse_status_map.get( log.get('parse_status')) if mapped_status: parse_status = mapped_status else: parse_status = JobLog.PENDING jl, _ = JobLog.objects.get_or_create( job=job, name=name, url=url, defaults={ 'status': parse_status }) _schedule_log_parsing(jl, result) return (job_guid, signature_hash)
def _load_job(self, job_datum, push_id, lower_tier_signatures): """ Load a job into the treeherder database If the job is a ``retry`` the ``job_guid`` will have a special suffix on it. But the matching ``pending``/``running`` job will not. So we append the suffixed ``job_guid`` to ``retry_job_guids`` so that we can update the job_id_lookup later with the non-suffixed ``job_guid`` (root ``job_guid``). Then we can find the right ``pending``/``running`` job and update it with this ``retry`` job. """ build_platform, _ = BuildPlatform.objects.get_or_create( os_name=job_datum.get('build_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('build_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('build_platform', {}).get('architecture', 'unknown')) machine_platform, _ = MachinePlatform.objects.get_or_create( os_name=job_datum.get('machine_platform', {}).get('os_name', 'unknown'), platform=job_datum.get('machine_platform', {}).get('platform', 'unknown'), architecture=job_datum.get('machine_platform', {}).get('architecture', 'unknown')) option_names = job_datum.get('option_collection', []) option_collection_hash = OptionCollection.calculate_hash( option_names) if not OptionCollection.objects.filter( option_collection_hash=option_collection_hash).exists(): # in the unlikely event that we haven't seen this set of options # before, add the appropriate database rows options = [] for option_name in option_names: option, _ = Option.objects.get_or_create(name=option_name) options.append(option) for option in options: OptionCollection.objects.create( option_collection_hash=option_collection_hash, option=option) machine, _ = Machine.objects.get_or_create( name=job_datum.get('machine', 'unknown')) # if a job with this symbol and name exists, always # use its default group (even if that group is different # from that specified) job_type, _ = JobType.objects.get_or_create( symbol=job_datum.get('job_symbol') or 'unknown', name=job_datum.get('name') or 'unknown') if job_type.job_group: job_group = job_type.job_group else: job_group, _ = JobGroup.objects.get_or_create( name=job_datum.get('group_name') or 'unknown', symbol=job_datum.get('group_symbol') or 'unknown') job_type.job_group = job_group job_type.save(update_fields=['job_group']) product_name = job_datum.get('product_name', 'unknown') if len(product_name.strip()) == 0: product_name = 'unknown' product, _ = Product.objects.get_or_create(name=product_name) job_guid = job_datum['job_guid'] job_guid = job_guid[0:50] who = job_datum.get('who') or 'unknown' who = who[0:50] reason = job_datum.get('reason') or 'unknown' reason = reason[0:125] state = job_datum.get('state') or 'unknown' state = state[0:25] build_system_type = job_datum.get('build_system_type', 'buildbot') reference_data_name = job_datum.get('reference_data_name', None) sh = sha1() sh.update(''.join( map(lambda x: str(x), [build_system_type, self.project, build_platform.os_name, build_platform.platform, build_platform.architecture, machine_platform.os_name, machine_platform.platform, machine_platform.architecture, job_group.name, job_group.symbol, job_type.name, job_type.symbol, option_collection_hash, reference_data_name]))) signature_hash = sh.hexdigest() # Should be the buildername in the case of buildbot (if not provided # default to using the signature hash) if not reference_data_name: reference_data_name = signature_hash signature, created = ReferenceDataSignatures.objects.get_or_create( name=reference_data_name, signature=signature_hash, build_system_type=build_system_type, repository=self.project, defaults={ 'first_submission_timestamp': time.time(), 'build_os_name': build_platform.os_name, 'build_platform': build_platform.platform, 'build_architecture': build_platform.architecture, 'machine_os_name': machine_platform.os_name, 'machine_platform': machine_platform.platform, 'machine_architecture': machine_platform.architecture, 'job_group_name': job_group.name, 'job_group_symbol': job_group.symbol, 'job_type_name': job_type.name, 'job_type_symbol': job_type.symbol, 'option_collection_hash': option_collection_hash }) if created: # A new ReferenceDataSignature has been added, so we need # to reload lower tier exclusions lower_tier_signatures = self._get_lower_tier_signatures() tier = job_datum.get('tier') or 1 # job tier signatures override the setting from the job structure # Check the signatures list for any supported lower tiers that have # an active exclusion profile. result = job_datum.get('result', 'unknown') # As stated elsewhere, a job will end up in the lowest tier where its # signature belongs. So if a signature is in Tier-2 and Tier-3, it # will end up in 3. for tier_info in lower_tier_signatures: if signature_hash in tier_info["signatures"]: tier = tier_info["tier"] try: duration = JobDuration.objects.values_list( 'average_duration', flat=True).get( repository__name=self.project, signature=signature_hash) except JobDuration.DoesNotExist: duration = 0 # try to insert the job unconditionally (if it already exists, this # will be a no-op) self.execute( proc='jobs.inserts.create_job_data', debug_show=self.DEBUG, placeholders=[ [ job_guid, signature_hash, None, # idx:2, job_coalesced_to_guid, None, push_id, build_platform.id, machine_platform.id, machine.id, option_collection_hash, job_type.id, product.id, who, reason, result, state, self.get_number(job_datum.get('submit_timestamp')), self.get_number(job_datum.get('start_timestamp')), self.get_number(job_datum.get('end_timestamp')), duration, tier, job_guid, get_guid_root(job_guid) # will be the same except for ``retry`` jobs ] ], executemany=True) # by default we should try to update the "root" object guid_root = get_guid_root(job_guid) ds_job_ids = self.get_job_ids_by_guid([guid_root]) if ds_job_ids: ds_job_id = ds_job_ids[guid_root]['id'] else: ds_job_id = self.get_job_ids_by_guid([job_guid])[job_guid]['id'] # we might both insert *and* update a job if it comes in with a status # that isn't pending, but we're ok with this I think (since this code # will be going away soon) if state != 'pending': self.execute( proc="jobs.updates.update_job_data", debug_show=self.DEBUG, placeholders=[ [ job_guid, None, None, push_id, machine.id, option_collection_hash, job_type.id, product.id, who, reason, result, state, self.get_number(job_datum.get('start_timestamp')), self.get_number(job_datum.get('end_timestamp')), state, ds_job_id ] ], executemany=True) # create an intermediate representation of the job useful for doing # lookups (this will eventually become the main/only/primary jobs table # when we finish migrating away from Datasource, see bug 1178641) job, _ = Job.objects.update_or_create( repository=Repository.objects.get(name=self.project), project_specific_id=ds_job_id, defaults={ 'guid': job_guid, 'push_id': push_id }) artifacts = job_datum.get('artifacts', []) has_text_log_summary = any(x for x in artifacts if x['name'] == 'text_log_summary') if artifacts: artifacts = ArtifactsModel.serialize_artifact_json_blobs(artifacts) # need to add job guid to artifacts, since they likely weren't # present in the beginning for artifact in artifacts: if not all(k in artifact for k in ("name", "type", "blob")): raise ValueError( "Artifact missing properties: {}".format(artifact)) # Ensure every artifact has a ``job_guid`` value. # It is legal to submit an artifact that doesn't have a # ``job_guid`` value. But, if missing, it should inherit that # value from the job itself. if "job_guid" not in artifact: artifact["job_guid"] = job_guid with ArtifactsModel(self.project) as artifacts_model: artifacts_model.load_job_artifacts(artifacts) log_refs = job_datum.get('log_references', []) if log_refs: for log in log_refs: name = log.get('name') or 'unknown' name = name[0:50] url = log.get('url') or 'unknown' url = url[0:255] # this indicates that a summary artifact was submitted with # this job that corresponds to the buildbot_text log url. # Therefore, the log does not need parsing. So we should # ensure that it's marked as already parsed. if has_text_log_summary and name == 'buildbot_text': parse_status = JobLog.PARSED else: parse_status_map = dict([(k, v) for (v, k) in JobLog.STATUSES]) mapped_status = parse_status_map.get( log.get('parse_status')) if mapped_status: parse_status = mapped_status else: parse_status = JobLog.PENDING jl, _ = JobLog.objects.get_or_create( job=job, name=name, url=url, defaults={ 'status': parse_status }) self._schedule_log_parsing(jl, result) return (job_guid, signature_hash)