def stop(self): ''' Shuts down the cluster. This cluster object can be restarted, by calling `start`. ''' if not self.is_running(_silent=True): raise RuntimeWarning('Can not stop cluster, this cluster is not running.') _ec2_execution._stop_cluster(self.cluster_controller) self.cluster_controller = None self._save()
def stop(self): ''' Shuts down the cluster. This cluster object can be restarted, by calling `start`. ''' if not self.is_running(_silent=True): raise RuntimeWarning( 'Can not stop cluster, this cluster is not running.') _ec2_execution._stop_cluster(self.cluster_controller) self.cluster_controller = None self._save()
def is_running(self, _silent=False): ''' Determine wheather or not the cluster is currently running. Returns ------- out : bool Weather the cluster is currently running See Also -------- start, stop ''' # See if the local cluster_controller handle is still good. if self.cluster_controller is not None: if _ec2_execution._is_host_pingable(self.cluster_controller): return True # Check S3 for possibly updated information. try: s3_copy = load(self.s3_state_path) except Exception as e: if not _silent: _log.warning('Exception trying to load updated configuration ' 'from S3 path %s. Exception: %s' % (self.s3_state_path, e)) return False # Reconcile S3 info with our local info. if s3_copy.cluster_controller is None: self.cluster_controller = None return False elif self.cluster_controller == s3_copy.cluster_controller: return False # Use the update info from S3. Now check that. if not _silent: _log.info('Using updated state from S3.') self = s3_copy if _ec2_execution._is_host_pingable(self.cluster_controller): return True else: return False
def _get_job_state(self, app_id, silent=False): ''' Wait for a given application to enter running state ''' yarn_app_states = _ec2_execution.get_job_state(self, app_id) if not yarn_app_states: raise RuntimeError('Cannot get application status from cluster.') return yarn_app_states
def packages(self): ''' Returns a list of all installed packages in this cluster, as a list of strings that are formatted according to the package friendly name (name + version). This list includes the base packages that were installed as part of the Dato deployment, Anaconda packages and any user specified `additional_packages`. ''' if not self.is_running(_silent=True): raise RuntimeWarning('The cluster must be running in order to get the list of packages.') return _ec2_execution._get_package_list(self.cluster_controller)
def _get_job_state(self, app_id, silent = False): ''' Wait for a given application to enter running state ''' yarn_app_states = _ec2_execution.get_job_state(self, app_id) if not yarn_app_states: raise RuntimeError('Cannot get application status from cluster.') return yarn_app_states
def packages(self): ''' Returns a list of all installed packages in this cluster, as a list of strings that are formatted according to the package friendly name (name + version). This list includes the base packages that were installed as part of the Turi deployment, Anaconda packages and any user specified `additional_packages`. ''' if not self.is_running(_silent=True): raise RuntimeWarning( 'The cluster must be running in order to get the list of packages.' ) return _ec2_execution._get_package_list(self.cluster_controller)
def start(self): ''' Starts the cluster. This may take a few minutes. The cluster will remain running until shutdown is called. ''' if self.is_running(_silent=True): raise RuntimeWarning('This environment has already been started') self.cluster_controller = _ec2_execution._start_commander_host( self.name, self.ec2_config, self.s3_state_path, self.num_hosts, self.additional_packages, self.idle_shutdown_timeout) self._save()
def start(self): ''' Starts the cluster. This may take a few minutes. The cluster will remain running until shutdown is called. ''' if self.is_running(_silent=True): raise RuntimeWarning('This environment has already been started') self.cluster_controller = _ec2_execution._start_commander_host(self.name, self.ec2_config, self.s3_state_path, self.num_hosts, self.additional_packages, self.idle_shutdown_timeout ) self._save()
def _cancel_job(self, app_id, silent=False): ''' Cancel a job with a given app_id ''' return _ec2_execution.cancel_job(self, app_id, silent=silent)
def _prepare_job_files(self, job): _ec2_execution.prepare_job_files(self, job)
def _create_job_home_dir(self, job_name): return _ec2_execution.create_job_home_dir(self, job_name)
def _submit_job(self, job_working_dir, num_workers, silent=False): '''This is used only by DML''' return _ec2_execution.submit_job(self, job_working_dir, max_concurrent_tasks=num_workers, silent=silent)
def _cancel_job(self, app_id, silent = False): ''' Cancel a job with a given app_id ''' return _ec2_execution.cancel_job(self, app_id, silent = silent)
def _submit_job(self, job_working_dir, num_workers, silent = False): '''This is used only by DML''' return _ec2_execution.submit_job( self, job_working_dir, max_concurrent_tasks = num_workers, silent = silent)