class SSHKey(CreatedByModel, EditedAtModel, URLActionModel): """ A Django data model to store public SSH keys for logged-in users to be used in the :mod:`on-demand clusters <atmo.clusters>`. """ #: The list of valid SSH key data prefixes, will be validated #: on save. VALID_PREFIXES = [ 'ssh-rsa', 'ssh-dss', 'ecdsa-sha2-nistp256', 'ecdsa-sha2-nistp384', 'ecdsa-sha2-nistp521', ] title = models.CharField( max_length=100, help_text='Name to give to this public key', ) key = models.TextField( help_text='Should start with one of the following prefixes: %s' % ', '.join(VALID_PREFIXES), ) fingerprint = models.CharField( max_length=48, blank=True, ) class Meta: permissions = [ ('view_sshkey', 'Can view SSH key'), ] unique_together = ( ('created_by', 'fingerprint') ) __str__ = autostr('{self.title}') __repr__ = autorepr(['title', 'fingerprint']) url_prefix = 'keys' url_actions = ['detail', 'delete', 'raw'] def get_absolute_url(self): return self.urls.detail @property def prefix(self): """ The prefix of the key data, one of the :data:`~atmo.keys.models.SSHKey.VALID_PREFIXES`. """ return self.key.strip().split()[0] def save(self, *args, **kwargs): self.fingerprint = calculate_fingerprint(self.key) super().save(*args, **kwargs)
class SSHKey(CreatedByModel, EditedAtModel, URLActionModel): """ A Django data model to store public SSH keys for logged-in users to be used in the :mod:`on-demand clusters <atmo.clusters>`. """ #: The list of valid SSH key data prefixes, will be validated #: on save. VALID_PREFIXES = [ "ssh-rsa", "ssh-dss", "ecdsa-sha2-nistp256", "ecdsa-sha2-nistp384", "ecdsa-sha2-nistp521", ] title = models.CharField( max_length=100, help_text="Name to give to this public key" ) key = models.TextField( help_text="Should start with one of the following prefixes: %s" % ", ".join(VALID_PREFIXES) ) fingerprint = models.CharField(max_length=48, blank=True) class Meta: permissions = [("view_sshkey", "Can view SSH key")] unique_together = ("created_by", "fingerprint") __str__ = autostr("{self.title}") __repr__ = autorepr(["title", "fingerprint"]) url_prefix = "keys" url_actions = ["detail", "delete", "raw"] def get_absolute_url(self): return self.urls.detail @property def prefix(self): """ The prefix of the key data, one of the :data:`~atmo.keys.models.SSHKey.VALID_PREFIXES`. """ return self.key.strip().split()[0] def save(self, *args, **kwargs): self.fingerprint = calculate_fingerprint(self.key) super().save(*args, **kwargs)
class SparkJobRunAlert(EditedAtModel): """ A data model to store job run alerts for later processing by an async job that sends out emails. """ run = models.ForeignKey( SparkJobRun, on_delete=models.CASCADE, related_name='alerts', ) reason_code = models.CharField( max_length=50, blank=True, null=True, help_text="The reason code for the creation of the alert.", ) reason_message = models.TextField( default='', help_text="The reason message for the creation of the alert.", ) mail_sent_date = models.DateTimeField( blank=True, null=True, help_text="The datetime the alert email was sent.", ) class Meta: unique_together = [ ['run', 'reason_code', 'reason_message'], ] index_together = [ ['reason_code', 'mail_sent_date'], ] __str__ = autostr('{self.id}') def short_reason_message(self): return self.reason_message[:50] __repr__ = autorepr( ['id', 'reason_code', 'short_reason_message'], short_reason_message=short_reason_message, )
class EMRRelease(EditedAtModel): version = models.CharField( max_length=50, primary_key=True, ) changelog_url = models.TextField( help_text='The URL of the changelog with details about the release.', default='', ) help_text = models.TextField( help_text= 'Optional help text to show for users when creating a cluster.', default='', ) is_active = models.BooleanField( help_text='Whether this version should be shown to the user at all.', default=True, ) is_experimental = models.BooleanField( help_text= 'Whether this version should be shown to users as experimental.', default=False, ) is_deprecated = models.BooleanField( help_text= 'Whether this version should be shown to users as deprecated.', default=False, ) objects = EMRReleaseQuerySet.as_manager() class Meta: ordering = ['-version'] get_latest_by = 'created_at' verbose_name = 'EMR release' verbose_name_plural = 'EMR releases' __str__ = autostr('{self.version}') __repr__ = autorepr( ['version', 'is_active', 'is_experimental', 'is_deprecated'])
class EMRRelease(EditedAtModel): version = models.CharField(max_length=50, primary_key=True) changelog_url = models.TextField( help_text="The URL of the changelog with details about the release.", default="") help_text = models.TextField( help_text= "Optional help text to show for users when creating a cluster.", default="", ) is_active = models.BooleanField( help_text="Whether this version should be shown to the user at all.", default=True, ) is_experimental = models.BooleanField( help_text= "Whether this version should be shown to users as experimental.", default=False, ) is_deprecated = models.BooleanField( help_text= "Whether this version should be shown to users as deprecated.", default=False, ) objects = EMRReleaseQuerySet.as_manager() class Meta: ordering = ["-version"] get_latest_by = "created_at" verbose_name = "EMR release" verbose_name_plural = "EMR releases" __str__ = autostr("{self.version}") __repr__ = autorepr( ["version", "is_active", "is_experimental", "is_deprecated"])
class SparkJobRun(EditedAtModel): """ A data model to store information about every individual run of a scheduled Spark job. This denormalizes some values from its related data model :class:`SparkJob`. """ spark_job = models.ForeignKey( SparkJob, on_delete=models.CASCADE, related_name="runs", related_query_name="runs", ) jobflow_id = models.CharField(max_length=50, blank=True, null=True) emr_release_version = models.CharField(max_length=50, blank=True, null=True) size = models.IntegerField( help_text="Number of computers used to run the job.", blank=True, null=True) status = models.CharField(max_length=50, blank=True, default=DEFAULT_STATUS, db_index=True) scheduled_at = models.DateTimeField( blank=True, null=True, help_text="Date/time that the job was scheduled.") started_at = models.DateTimeField( blank=True, null=True, help_text="Date/time when the cluster was started on AWS EMR.", ) ready_at = models.DateTimeField( blank=True, null=True, help_text= "Date/time when the cluster was ready to run steps on AWS EMR.", ) finished_at = models.DateTimeField( blank=True, null=True, help_text="Date/time that the job was terminated or failed.", ) objects = SparkJobRunQuerySet.as_manager() class Meta: get_latest_by = "created_at" ordering = ["-created_at"] __str__ = autostr("{self.jobflow_id}") def spark_job_identifier(self): return self.spark_job.identifier __repr__ = autorepr( ["jobflow_id", "spark_job_identifier", "emr_release_version", "size"], spark_job_identifier=spark_job_identifier, ) @property def info(self): return self.spark_job.cluster_provisioner.info(self.jobflow_id) def sync(self, info=None): """ Updates latest status and life cycle datetimes. """ if info is None: info = self.info # a mapping between what the provisioner returns what the data model uses model_field_map = ( ("state", "status"), ("creation_datetime", "started_at"), ("ready_datetime", "ready_at"), ("end_datetime", "finished_at"), ) save_needed = False date_fields_updated = False # set the various model fields to the value the API returned for api_field, model_field in model_field_map: field_value = info.get(api_field) if field_value is None or field_value == getattr( self, model_field): continue setattr(self, model_field, field_value) save_needed = True if model_field in ("started_at", "ready_at", "finished_at"): date_fields_updated = True with transaction.atomic(): # If the job cluster terminated with error raise the alarm. if self.status == Cluster.STATUS_TERMINATED_WITH_ERRORS: transaction.on_commit(lambda: self.alert(info)) # If any data changed, save it. if save_needed: self.save() with transaction.atomic(): if date_fields_updated: # When job cluster is ready, record time to ready. if self.ready_at and not self.finished_at: # Time in seconds it took the cluster to be ready. time_to_ready = (self.ready_at - self.started_at).seconds Metric.record( "sparkjob-time-to-ready", time_to_ready, data={ "identifier": self.spark_job.identifier, "size": self.size, "jobflow_id": self.jobflow_id, }, ) if self.finished_at: # When job is finished, record normalized instance hours. hours = math.ceil( (self.finished_at - self.started_at).seconds / 60 / 60) normalized_hours = hours * self.size Metric.record( "sparkjob-normalized-instance-hours", normalized_hours, data={ "identifier": self.spark_job.identifier, "size": self.size, "jobflow_id": self.jobflow_id, }, ) if self.finished_at and self.ready_at: # When job is finished, record time in seconds it took the # scheduled job to run. Sometimes `ready_at` won't be # available if the cluster terminated with errors. run_time = (self.finished_at - self.ready_at).seconds Metric.record( "sparkjob-run-time", run_time, data={ "identifier": self.spark_job.identifier, "size": self.size, "jobflow_id": self.jobflow_id, }, ) return self.status def alert(self, info): self.alerts.get_or_create( reason_code=info["state_change_reason_code"], reason_message=info["state_change_reason_message"], )
class SparkJob(EMRReleaseModel, CreatedByModel, EditedAtModel, URLActionModel): """ A data model to store details about a scheduled Spark job, to be run on AWS EMR. """ INTERVAL_DAILY = 24 INTERVAL_WEEKLY = INTERVAL_DAILY * 7 INTERVAL_MONTHLY = INTERVAL_DAILY * 30 INTERVAL_CHOICES = [ (INTERVAL_DAILY, "Daily"), (INTERVAL_WEEKLY, "Weekly"), (INTERVAL_MONTHLY, "Monthly"), ] RESULT_PRIVATE = "private" RESULT_PUBLIC = "public" RESULT_VISIBILITY_CHOICES = [(RESULT_PRIVATE, "Private"), (RESULT_PUBLIC, "Public")] identifier = models.CharField( max_length=100, help_text="Job name, used to uniqely identify individual jobs.", unique=True, db_index=True, ) description = models.TextField(help_text="Job description.", default="") notebook_s3_key = models.CharField( max_length=800, help_text= "S3 key of the notebook after uploading it to the Spark code bucket.", ) result_visibility = models.CharField( # can currently be "public" or "private" max_length=50, help_text= "Whether notebook results are uploaded to a public or private bucket", choices=RESULT_VISIBILITY_CHOICES, default=RESULT_PRIVATE, ) size = models.IntegerField( help_text="Number of computers to use to run the job.") interval_in_hours = models.IntegerField( help_text="Interval at which the job should run, in hours.", choices=INTERVAL_CHOICES, default=INTERVAL_DAILY, ) job_timeout = models.IntegerField( help_text="Number of hours before the job times out.") start_date = models.DateTimeField( help_text="Date/time that the job should start being scheduled to run." ) end_date = models.DateTimeField( blank=True, null=True, help_text= "Date/time that the job should stop being scheduled to run, null if no end date.", ) expired_date = models.DateTimeField( blank=True, null=True, help_text="Date/time that the job was expired.", db_index=True, ) is_enabled = models.BooleanField( default=True, help_text="Whether the job should run or not.") objects = SparkJobQuerySet.as_manager() class Meta: permissions = [("view_sparkjob", "Can view Spark job")] __str__ = autostr("{self.identifier}") __repr__ = autorepr(["identifier", "size", "is_enabled"]) url_prefix = "jobs" url_actions = ["delete", "detail", "download", "edit", "run", "zeppelin"] def get_absolute_url(self): return self.urls.detail @property def provisioner(self): return SparkJobProvisioner() # TEMPORARY till we have 1:1 relationship to cluster object # and we can then ask for spark_job.cluster.provisioner @property def cluster_provisioner(self): return ClusterProvisioner() @property def schedule(self): from .schedules import SparkJobSchedule return SparkJobSchedule(self) def has_future_end_date(self, now): # no end date means it'll always be due if self.end_date is None: return True return self.end_date >= now @property def has_never_run(self): """ Whether the job has run before. Looks at both the cluster status and our own record when we asked it to run. """ return (self.latest_run is None or self.latest_run.status == DEFAULT_STATUS or self.latest_run.scheduled_at is None) @property def has_finished(self): """Whether the job's cluster is terminated or failed""" return self.latest_run and self.latest_run.status in Cluster.FINAL_STATUS_LIST @property def has_timed_out(self): """ Whether the current job run has been running longer than the job's timeout allows. """ if self.has_never_run: # Job isn't even running at the moment and never ran before return False timeout_delta = timedelta(hours=self.job_timeout) max_run_time = self.latest_run.scheduled_at + timeout_delta timed_out = timezone.now() >= max_run_time return not self.is_runnable and timed_out @property def is_due(self): """ Whether the start date is in the past and the end date is in the future. """ now = timezone.now() has_past_start_date = self.start_date <= now return has_past_start_date and self.has_future_end_date(now) @property def is_runnable(self): """ Either the job has never run before or was never finished. This is checked right before the actual provisioning. """ return self.has_never_run or self.has_finished @property def should_run(self): """Whether the scheduled Spark job should run.""" return self.is_runnable and self.is_enabled and self.is_due @property def is_public(self): return self.result_visibility == self.RESULT_PUBLIC @property def is_active(self): return self.latest_run and self.latest_run.status in Cluster.ACTIVE_STATUS_LIST @property def notebook_name(self): return self.notebook_s3_key.rsplit("/", 1)[-1] @cached_property def notebook_s3_object(self): return self.provisioner.get(self.notebook_s3_key) @cached_property def results(self): return self.provisioner.results(self.identifier, self.is_public) def get_latest_run(self): try: return self.runs.latest() except SparkJobRun.DoesNotExist: return None latest_run = cached_property(get_latest_run, name="latest_run") def run(self): """Actually run the scheduled Spark job.""" # if the job ran before and is still running, don't start it again if not self.is_runnable: return jobflow_id = self.provisioner.run( user_username=self.created_by.username, user_email=self.created_by.email, identifier=self.identifier, emr_release=self.emr_release.version, size=self.size, notebook_key=self.notebook_s3_key, is_public=self.is_public, job_timeout=self.job_timeout, ) # Create new job history record. run = self.runs.create( spark_job=self, jobflow_id=jobflow_id, scheduled_at=timezone.now(), emr_release_version=self.emr_release.version, size=self.size, ) # Remove the cached latest run to this objects will requery it. try: delattr(self, "latest_run") except AttributeError: # pragma: no cover pass # It didn't have a `latest_run` and that's ok. with transaction.atomic(): Metric.record("sparkjob-emr-version", data={"version": self.emr_release.version}) # sync with EMR API transaction.on_commit(run.sync) def expire(self): # TODO disable the job as well once it's easy to re-enable the job deleted = self.schedule.delete() self.expired_date = timezone.now() self.save() return deleted def terminate(self): """Stop the currently running scheduled Spark job.""" if self.latest_run: self.cluster_provisioner.stop(self.latest_run.jobflow_id) def first_run(self): if self.latest_run: return None from .tasks import run_job return run_job.apply_async( args=(self.pk, ), kwargs={"first_run": True}, # make sure we run this task only when we expect it # may be in the future, may be in the past # but definitely at a specific time eta=self.start_date, ) def save(self, *args, **kwargs): # whether the job is being created for the first time first_save = self.pk is None # resetting expired_date in case a user resets the end_date if self.expired_date and self.end_date and self.end_date > timezone.now( ): self.expired_date = None super().save(*args, **kwargs) # Remove the cached latest run to this objects will requery it. try: delattr(self, "latest_run") except AttributeError: # pragma: no cover pass # It didn't have a `latest_run` and that's ok. # first remove if it exists self.schedule.delete() # and then add it, but only if the end date is in the future if self.has_future_end_date(timezone.now()): self.schedule.add() if first_save: transaction.on_commit(self.first_run) def delete(self, *args, **kwargs): # make sure to shut down the cluster if it's currently running self.terminate() # make sure to clean up the job notebook from storage self.provisioner.remove(self.notebook_s3_key) self.schedule.delete() super().delete(*args, **kwargs)
def test_with_function_as_input(): f = autounicode(autostr("{self.name} {foo}", foo=lambda x: 42)) assert_equal(f(ascii), "Alex 42")
class Cluster(EMRReleaseModel, CreatedByModel, EditedAtModel): STATUS_STARTING = 'STARTING' STATUS_BOOTSTRAPPING = 'BOOTSTRAPPING' STATUS_RUNNING = 'RUNNING' STATUS_WAITING = 'WAITING' STATUS_TERMINATING = 'TERMINATING' STATUS_TERMINATED = 'TERMINATED' STATUS_TERMINATED_WITH_ERRORS = 'TERMINATED_WITH_ERRORS' ACTIVE_STATUS_LIST = ( STATUS_STARTING, STATUS_BOOTSTRAPPING, STATUS_RUNNING, STATUS_WAITING, STATUS_TERMINATING, ) READY_STATUS_LIST = [ STATUS_RUNNING, STATUS_WAITING, ] TERMINATED_STATUS_LIST = (STATUS_TERMINATED, ) FAILED_STATUS_LIST = (STATUS_TERMINATED_WITH_ERRORS, ) FINAL_STATUS_LIST = TERMINATED_STATUS_LIST + FAILED_STATUS_LIST STATE_CHANGE_REASON_INTERNAL_ERROR = 'INTERNAL_ERROR' STATE_CHANGE_REASON_VALIDATION_ERROR = 'VALIDATION_ERROR' STATE_CHANGE_REASON_INSTANCE_FAILURE = 'INSTANCE_FAILURE' STATE_CHANGE_REASON_BOOTSTRAP_FAILURE = 'BOOTSTRAP_FAILURE' STATE_CHANGE_REASON_USER_REQUEST = 'USER_REQUEST' STATE_CHANGE_REASON_STEP_FAILURE = 'STEP_FAILURE' STATE_CHANGE_REASON_ALL_STEPS_COMPLETED = 'ALL_STEPS_COMPLETED' FAILED_STATE_CHANGE_REASON_LIST = [ STATE_CHANGE_REASON_INTERNAL_ERROR, STATE_CHANGE_REASON_VALIDATION_ERROR, STATE_CHANGE_REASON_INSTANCE_FAILURE, STATE_CHANGE_REASON_BOOTSTRAP_FAILURE, STATE_CHANGE_REASON_STEP_FAILURE, ] REQUESTED_STATE_CHANGE_REASON_LIST = [ STATE_CHANGE_REASON_USER_REQUEST, ] COMPLETED_STATE_CHANGE_REASON_LIST = [ STATE_CHANGE_REASON_ALL_STEPS_COMPLETED, ] DEFAULT_SIZE = 1 DEFAULT_LIFETIME = 8 identifier = models.CharField( max_length=100, help_text= "Cluster name, used to non-uniqely identify individual clusters.") size = models.IntegerField( help_text="Number of computers used in the cluster.") lifetime = models.PositiveSmallIntegerField( help_text= "Lifetime of the cluster after which it's automatically terminated, in hours.", default=DEFAULT_LIFETIME, ) lifetime_extension_count = models.PositiveSmallIntegerField( help_text="Number of lifetime extensions.", default=0, ) ssh_key = models.ForeignKey( 'keys.SSHKey', on_delete=models.SET_NULL, blank=True, null=True, related_name='launched_clusters', # e.g. ssh_key.launched_clusters.all() help_text="SSH key to use when launching the cluster.", ) expires_at = models.DateTimeField( blank=True, null=True, help_text= "Date/time that the cluster will expire and automatically be deleted.", ) started_at = models.DateTimeField( blank=True, null=True, help_text="Date/time when the cluster was started on AWS EMR.", ) ready_at = models.DateTimeField( blank=True, null=True, help_text= "Date/time when the cluster was ready to run steps on AWS EMR.", ) finished_at = models.DateTimeField( blank=True, null=True, help_text= "Date/time when the cluster was terminated or failed on AWS EMR.", ) jobflow_id = models.CharField( max_length=50, blank=True, null=True, help_text= "AWS cluster/jobflow ID for the cluster, used for cluster management.", ) most_recent_status = models.CharField( max_length=50, default='', blank=True, help_text="Most recently retrieved AWS status for the cluster.", db_index=True, ) master_address = models.CharField( max_length=255, default='', blank=True, help_text=("Public address of the master node." "This is only available once the cluster has bootstrapped"), ) expiration_mail_sent = models.BooleanField( default=False, help_text="Whether the expiration mail were sent.", ) objects = ClusterQuerySet.as_manager() class Meta: permissions = [ ('view_cluster', 'Can view cluster'), ] #: A cluster URL helper. class urls(urlman.Urls): def detail(self): return reverse('clusters-detail', kwargs={'id': self.id}) def extend(self): return reverse('clusters-extend', kwargs={'id': self.id}) def terminate(self): return reverse('clusters-terminate', kwargs={'id': self.id}) __str__ = autostr('{self.identifier}') __repr__ = autorepr([ 'identifier', 'most_recent_status', 'size', 'lifetime', 'expires_at', 'lifetime_extension_count', ]) def get_absolute_url(self): return self.urls.detail @property def is_active(self): """Returns whether the cluster is active or not.""" return self.most_recent_status in self.ACTIVE_STATUS_LIST @property def is_terminated(self): """Returns whether the cluster is terminated or not.""" return self.most_recent_status in self.TERMINATED_STATUS_LIST @property def is_failed(self): """Returns whether the cluster has failed or not.""" return self.most_recent_status in self.FAILED_STATUS_LIST @property def is_terminating(self): """Returns whether the cluster is terminating or not.""" return self.most_recent_status == self.STATUS_TERMINATING @property def is_ready(self): """Returns whether the cluster is ready or not.""" return self.most_recent_status == self.STATUS_WAITING @property def is_expiring_soon(self): """Returns whether the cluster is expiring in the next hour.""" return self.expires_at <= timezone.now() + timedelta(hours=1) @property def provisioner(self): return ClusterProvisioner() @property def info(self): """Returns the provisioning information for the cluster.""" return self.provisioner.info(self.jobflow_id) def sync(self, info=None): """Should be called to update latest cluster status in `self.most_recent_status`.""" if info is None: info = self.info # Map AWS API fields to Cluster model fields. model_field_map = ( ('state', 'most_recent_status'), ('public_dns', 'master_address'), ('creation_datetime', 'started_at'), ('ready_datetime', 'ready_at'), ('end_datetime', 'finished_at'), ) save_needed = False date_fields_updated = False # set the various model fields to the value the API returned for api_field, model_field in model_field_map: field_value = info.get(api_field) # Only update the field if the value for a field is not set or it # hasn't changed. if field_value is None or field_value == getattr( self, model_field): continue setattr(self, model_field, field_value) save_needed = True if model_field in ('started_at', 'ready_at', 'finished_at'): date_fields_updated = True if save_needed: self.save() if date_fields_updated: if self.finished_at: # When cluster is finished, record normalized instance hours. hours = math.ceil( (self.finished_at - self.started_at).seconds / 60 / 60) normalized_hours = hours * self.size Metric.record('cluster-normalized-instance-hours', normalized_hours, data={ 'identifier': self.identifier, 'size': self.size, 'jobflow_id': self.jobflow_id, }) # When cluster is ready, record a count and time to ready. if self.ready_at and not self.finished_at: # A simple count to track number of clusters spun up # successfully. Metric.record('cluster-ready', data={ 'identifier': self.identifier, 'size': self.size, 'jobflow_id': self.jobflow_id, }) # Time in seconds it took the cluster to be ready. time_to_ready = (self.ready_at - self.started_at).seconds Metric.record('cluster-time-to-ready', time_to_ready, data={ 'identifier': self.identifier, 'size': self.size, 'jobflow_id': self.jobflow_id, }) def save(self, *args, **kwargs): """Insert the cluster into the database or update it if already present, spawning the cluster if it's not already spawned. """ # actually start the cluster if self.jobflow_id is None: self.jobflow_id = self.provisioner.start( user_username=self.created_by.username, user_email=self.created_by.email, identifier=self.identifier, emr_release=self.emr_release.version, size=self.size, public_key=self.ssh_key.key, ) # once we've stored the jobflow id we can fetch the status for the first time transaction.on_commit(self.sync) Metric.record('cluster-emr-version', data={'version': self.emr_release.version}) # set the dates if not self.expires_at: # clusters should expire after the lifetime it's set to self.expires_at = timezone.now() + timedelta(hours=self.lifetime) super().save(*args, **kwargs) def extend(self, hours): """Extend the cluster lifetime by the given number of hours.""" self.expires_at = models.F('expires_at') + timedelta(hours=hours) self.lifetime_extension_count = models.F( 'lifetime_extension_count') + 1 self.save() Metric.record('cluster-extension', data={ 'identifier': self.identifier, 'size': self.size, 'jobflow_id': self.jobflow_id, }) def deactivate(self): """Shutdown the cluster and update its status accordingly""" self.provisioner.stop(self.jobflow_id) self.sync()
class SparkJobRun(EditedAtModel): """ A data model to store information about every individual run of a scheduled Spark job. This denormalizes some values from its related data model :class:`SparkJob`. """ spark_job = models.ForeignKey( SparkJob, on_delete=models.CASCADE, related_name='runs', related_query_name='runs', ) jobflow_id = models.CharField( max_length=50, blank=True, null=True, ) emr_release_version = models.CharField( max_length=50, blank=True, null=True, ) size = models.IntegerField( help_text="Number of computers used to run the job.", blank=True, null=True, ) status = models.CharField( max_length=50, blank=True, default=DEFAULT_STATUS, db_index=True, ) scheduled_at = models.DateTimeField( blank=True, null=True, help_text="Date/time that the job was scheduled.", ) started_at = models.DateTimeField( blank=True, null=True, help_text="Date/time when the cluster was started on AWS EMR.") ready_at = models.DateTimeField( blank=True, null=True, help_text= "Date/time when the cluster was ready to run steps on AWS EMR.") finished_at = models.DateTimeField( blank=True, null=True, help_text="Date/time that the job was terminated or failed.", ) objects = SparkJobRunQuerySet.as_manager() class Meta: get_latest_by = 'created_at' ordering = ['-created_at'] __str__ = autostr('{self.jobflow_id}') def spark_job_identifier(self): return self.spark_job.identifier __repr__ = autorepr( ['jobflow_id', 'spark_job_identifier', 'emr_release_version', 'size'], spark_job_identifier=spark_job_identifier, ) @property def info(self): return self.spark_job.cluster_provisioner.info(self.jobflow_id) def sync(self, info=None): """ Updates latest status and life cycle datetimes. """ if info is None: info = self.info # a mapping between what the provisioner returns what the data model uses model_field_map = ( ('state', 'status'), ('creation_datetime', 'started_at'), ('ready_datetime', 'ready_at'), ('end_datetime', 'finished_at'), ) # set the various model fields to the value the API returned for api_field, model_field in model_field_map: field_value = info.get(api_field) if field_value is None: continue setattr(self, model_field, field_value) # if the job cluster terminated with error raise the alarm if self.status == Cluster.STATUS_TERMINATED_WITH_ERRORS: transaction.on_commit(lambda: self.alert(info)) self.save() return self.status def alert(self, info): self.alerts.get_or_create( reason_code=info['state_change_reason_code'], reason_message=info['state_change_reason_message'], )