def _poll_step(self, step_id): """ method polls the state for given step_id and awaits its completion """ def _current_state(): step = self.client_b3.describe_step(ClusterId=self.jobflow_id, StepId=step_id) return step['Step']['Status']['State'] state = _current_state() while state in [STEP_STATE_PENDING, STEP_STATE_RUNNING]: # Job flow step is being spawned. Idle and recheck the status. time.sleep(20.0) state = _current_state() if state in [ STEP_STATE_CANCELLED, STEP_STATE_INTERRUPTED, STEP_STATE_CANCEL_PENDING, STEP_STATE_FAILED ]: raise ClusterError('EMR Step {0} failed'.format(step_id)) elif state == STEP_STATE_COMPLETED: self.logger.info('EMR Step {0} has completed'.format(step_id)) else: self.logger.warning( 'Unknown state {0} during EMR Step {1} execution'.format( state, step_id)) return state
def _poll_step(self, job_id): details = 'NA' state = JOB_STATE_PENDING while state in [ OPERATION_STATE_SETUP_DONE, JOB_STATE_PENDING, JOB_STATE_RUNNING, JOB_STATE_QUEUED ]: result = self.dataproc.projects().regions().jobs().get( projectId=self.project_id, region=self.cluster_region, jobId=job_id).execute() state = result['status']['state'] if 'details' in result['status']: details = result['status']['details'] if state == JOB_STATE_ERROR: raise ClusterError('Gcp Job {0} failed: {1}'.format( job_id, details)) elif state == JOB_STATE_DONE: self.logger.info('Gcp Job {0} has completed.') else: self.logger.warning( 'Unknown state {0} during Gcp Job {1} execution'.format( state, job_id)) return state
def _wait_for_cluster(self, cluster_id): """ method polls the state for the cluster and awaits until it is ready to start processing """ def _current_state(): cluster = self.client_b3.describe_cluster(ClusterId=cluster_id) return cluster['Cluster']['Status']['State'] state = _current_state() while state in [ CLUSTER_STATE_STARTING, CLUSTER_STATE_BOOTSTRAPPING, CLUSTER_STATE_RUNNING ]: # Cluster is being spawned. Idle and recheck the status. time.sleep(20.0) state = _current_state() if state in [ CLUSTER_STATE_TERMINATING, CLUSTER_STATE_TERMINATED, CLUSTER_STATE_TERMINATED_WITH_ERRORS ]: raise ClusterError('EMR Cluster {0} launch failed'.format( self.name)) elif state == CLUSTER_STATE_WAITING: # state WAITING marks readiness to process business steps cluster = self.client_b3.describe_cluster(ClusterId=cluster_id) master_dns = cluster['Cluster']['MasterPublicDnsName'] self.logger.info( 'EMR Cluster Launched Successfully. Master DNS node is {0}'. format(master_dns)) else: self.logger.warning( 'Unknown state {0} during EMR Cluster launch'.format(state)) return state
def run_pig_step(self, uri_script, **kwargs): """ method starts a Pig step on a cluster and monitors its execution :raise EmrLauncherError: in case the cluster is not launched :return: step state or None if the step failed """ # `https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-commandrunner.html`_ # `http://boto3.readthedocs.io/en/latest/reference/services/emr.html#EMR.Client.add_job_flow_steps`_ if not self.jobflow_id: raise ClusterError('EMR Cluster {0} is not launched'.format( self.name)) self.logger.info('Pig Script Step {') try: step = { 'Name': 'SynergyPigStep', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': [ 'pig-script', '--run-pig-script', '--args', '-f', uri_script ] } } if kwargs: properties = [{ 'Key': '{}'.format(k), 'Value': '{}'.format(v) } for k, v in kwargs.items()] step['HadoopJarStep']['Properties'] = properties step_args = [] for k, v in kwargs.items(): step_args.append('-p') step_args.append('{0}={1}'.format(k, v)) step['HadoopJarStep']['Args'].extend(step_args) step_response = self.client_b3.add_job_flow_steps( JobFlowId=self.jobflow_id, Steps=[step]) step_ids = step_response['StepIds'] assert len(step_ids) == 1 return self._poll_step(step_ids[0]) except ClusterError as e: self.logger.error('Pig Script Step Error: {0}'.format(e), exc_info=True) return None except Exception as e: self.logger.error( 'Pig Script Step Unexpected Exception: {0}'.format(e), exc_info=True) return None finally: self.logger.info('}')
def _run_step(self, job_details): if not self.cluster_details: raise ClusterError('EMR Cluster {0} is not launched'.format( self.cluster_name)) result = self.dataproc.projects().regions().jobs().submit( projectId=self.project_id, region=self.cluster_region, body=job_details).execute() job_id = result['reference']['jobId'] self.logger.info( 'Submitted job ID {0}. Waiting for completion'.format(job_id)) return self._poll_step(job_id)
def _wait_for_cluster(self): cluster = self._get_cluster() while cluster: if cluster['status']['state'] == CLUSTER_STATE_ERROR: raise ClusterError('Cluster {0} creation error: {1}'.format( self.cluster_name, cluster['status']['details'])) if cluster['status']['state'] == CLUSTER_STATE_RUNNING: self.logger.info('Cluster {0} is running'.format( self.cluster_name)) break else: time.sleep(5) cluster = self._get_cluster() return cluster
def _launch(self): """ method launches the cluster and returns when the cluster is fully operational and ready to accept business steps :see: `http://boto3.readthedocs.io/en/latest/reference/services/emr.html#EMR.Client.add_job_flow_steps`_ """ self.logger.info('Launching EMR Cluster {0} {{'.format(self.name)) try: response = self.client_b3.run_job_flow( Name=self.context.settings['aws_cluster_name'], ReleaseLabel='emr-5.12.0', Instances={ 'MasterInstanceType': 'm3.xlarge', 'SlaveInstanceType': 'm3.xlarge', 'InstanceCount': 3, 'KeepJobFlowAliveWhenNoSteps': True, 'TerminationProtected': True, 'Ec2KeyName': self.context.settings.get('aws_key_name', ''), }, BootstrapActions=[ { 'Name': 'Maximize Spark Default Config', 'ScriptBootstrapAction': { 'Path': 's3://support.elasticmapreduce/spark/maximize-spark-default-config', } }, ], Applications=[ { 'Name': 'Spark', }, { 'Name': 'Pig', }, ], VisibleToAllUsers=True, JobFlowRole='EMR_EC2_DefaultRole', ServiceRole='EMR_DefaultRole') self.logger.info('EMR Cluster Initialization Request Successful.') return response['JobFlowId'] except: self.logger.error('EMR Cluster failed to launch', exc_info=True) raise ClusterError('EMR Cluster {0} launch failed'.format( self.name)) finally: self.logger.info('}')
def run_spark_step(self, uri_script, language, **kwargs): # `https://github.com/dev-86/aws-cli/blob/29756ea294aebc7c854b3d9a2b1a56df28637e11/tests/unit/customizations/emr/test_create_cluster_release_label.py`_ # `https://docs.aws.amazon.com/emr/latest/ReleaseGuide/emr-commandrunner.html`_ # `http://boto3.readthedocs.io/en/latest/reference/services/emr.html#EMR.Client.add_job_flow_steps`_ if not self.jobflow_id: raise ClusterError('EMR Cluster {0} is not launched'.format( self.name)) self.logger.info('Spark Step {') try: step = { 'Name': 'SynergyPysparkStep', 'ActionOnFailure': 'CONTINUE', 'HadoopJarStep': { 'Jar': 'command-runner.jar', 'Args': ['spark-submit', '--deploy-mode', 'cluster', uri_script] } } if kwargs: properties = [{ 'Key': '{}'.format(k), 'Value': '{}'.format(v) } for k, v in kwargs.items()] step['HadoopJarStep']['Properties'] = properties step_response = self.client_b3.add_job_flow_steps( JobFlowId=self.jobflow_id, Steps=[step]) step_ids = step_response['StepIds'] assert len(step_ids) == 1 return self._poll_step(step_ids[0]) except ClusterError as e: self.logger.error('Spark Step Error: {0}'.format(e), exc_info=True) return None except Exception as e: self.logger.error('Spark Step Unexpected Exception: {0}'.format(e), exc_info=True) return None finally: self.logger.info('}')
def launch(self): self.logger.info('Launching EMR Cluster: {0} {{'.format( self.context.settings['aws_cluster_name'])) if self.jobflow_id \ and self._wait_for_cluster(self.jobflow_id) in [CLUSTER_STATE_STARTING, CLUSTER_STATE_BOOTSTRAPPING, CLUSTER_STATE_RUNNING]: raise ClusterError( 'EMR Cluster {0} has already been launched with id {1}. Use it or dispose it.' .format(self.name, self.jobflow_id)) cluster_id = self._get_cluster() if cluster_id: self.logger.info( 'Reusing existing EMR Cluster: {0} {{'.format(cluster_id)) else: cluster_id = self._launch() self._wait_for_cluster(cluster_id) self.jobflow_id = cluster_id self.logger.info('}')