def script_runner(self, user_pem=None, user_script_name=None, quiet_mode=False): """ :return: """ script_name = user_script_name if user_script_name is not None else self.config[ "ScriptToRun"]["File"] pem_path = user_pem if user_pem is not None else self.config[ "PemFilePath"] if script_name is not None: if pem_path is not None: job_flow_id = self.find_job_flow_id() response = self.emr.list_instances(ClusterId=job_flow_id, ) response_code = response['ResponseMetadata']['HTTPStatusCode'] if response_code == 200: hosts = self.active_instances(response) print(hosts) client = ParallelSSHClient(hosts, user='******', pkey=pem_path) if script_name.startswith("/"): # handle absolute path to_script_name = "/home/hadoop/{}".format( os.path.basename(script_name)) from_script_name = script_name else: # handle relative path to_script_name = "/home/hadoop/{}".format(script_name) from_script_name = os.path.join( os.getcwd(), script_name) logger.info("Copying script {} to {}".format( from_script_name, to_script_name)) copy_files = client.copy_file(from_script_name, to_script_name) joinall(copy_files, raise_error=True) logger.info("Finished copying script {} to {}".format( from_script_name, to_script_name)) logger.info("Running script {}".format(to_script_name)) output = client.run_command("chmod +x {} && {}".format( to_script_name, to_script_name), sudo=True) for host, host_output in output.items(): if quiet_mode: for line in host_output.stderr: print(line) else: for line in host_output.stdout: print(line) logger.info("Finished script {}".format(to_script_name)) return hosts else: raise ValueError( "Could not list instances (status code {})".format( response)) else: raise ValueError( 'pem_file_path is not specified in emrcliconfig_inst_fleets.yaml "pem_file_path:%s"' % pem_path) else: raise ValueError("script runner shell script not specified")
def install(self, user_pem=None, quiet_mode=False): pem_path = user_pem if user_pem is not None else self.config[ "PemFilePath"] cluster_id = self.find_job_flow_id() desc_cluster = self.emr.describe_cluster(ClusterId=cluster_id) cluster_state = desc_cluster['Cluster']['Status']['State'] if cluster_state not in ['WAITING', 'RUNNING']: raise ValueError("Cluster is not active") tags_list = desc_cluster['Cluster']['Tags'] fail_check = True valid_description = ["env=local"] valid_names = ['local'] for tag in tags_list: if 'Description' in tag['Key'] and any( value in tag['Value'] for value in valid_description): fail_check = False break if 'Name' in tag['Key'] and any(name in tag['Value'] for name in valid_names): fail_check = False break if not fail_check: print("Cluster tags should contain Key=Name, Value='local']") print( "Cluster tags should contain Key=Description, Value='env=local']" ) raise ValueError( "Error: Local build can not deployed on this cluster {0}". format(cluster_id)) tar_file_nl = HandleEMRCommands.build() tar_file_name = tar_file_nl[0] tar_file_location = tar_file_nl[1] if pem_path is not None: response = self.emr.list_instances(ClusterId=cluster_id, ) response_code = response['ResponseMetadata']['HTTPStatusCode'] if response_code == 200: hosts = self.active_instances(response) print(hosts) client = ParallelSSHClient(hosts, user='******', pkey=pem_path) copy_files = client.copy_file(tar_file_location, '/home/hadoop/' + tar_file_name) joinall(copy_files, raise_error=True) output = client.run_command( "python3 -m pip install --upgrade --no-deps --force-reinstall /home/hadoop/" + tar_file_name, sudo=True) for host, host_output in output.items(): if quiet_mode: for line in host_output.stderr: print(line) else: for line in host_output.stdout: print(line) print("Deployed to all nodes") return