def handle_state_master_resources_in_progress(self, spot_master_item ): """ Verify the SG, KP and Role/InstanceProfile are created :param spot_master_item: """ logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress') kp_name = spot_master_item[ TableSpotMaster.kp_name ] ec2_conn = awsext.ec2.connect_to_region( self.region_name, profile_name=self.profile_name ) key_pair = ec2_conn.find_key_pair( kp_name ) if key_pair == None: return logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: kp_name ready') vpc_id = spot_master_item[ TableSpotMaster.cheapest_vpc_id ] sg_id = spot_master_item[ TableSpotMaster.sg_id ] vpc_conn = awsext.vpc.connect_to_region( self.region_name, profile_name=self.profile_name ) group_id, is_group_exists = vpc_conn.is_security_group_exists( vpc_id, group_id=sg_id ) if not is_group_exists: return logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: sg_id ready') iam_conn = awsext.iam.connect_to_region( self.region_name, profile_name=self.profile_name ) role_name = spot_master_item[ TableSpotMaster.role_name ] if not iam_conn.is_role_exists( role_name ): return logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: role_name ready') if not iam_conn.is_instance_profile_exists( role_name ): return logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: instance_profile_name ready') batch_job_parm = get_batch_job_parm_item( spot_master_item[ TableSpotMaster.spot_master_uuid ], self.spot_batch_job_parm_table_name, self.region_name, self.profile_name, attributes=[TableSpotBatchJobParm.raw_batch_job_parm_item] ) raw_batch_job_parm_item = batch_job_parm[ TableSpotBatchJobParm.raw_batch_job_parm_item ] batch_job_parm_item = BatchJobParmItem( stringParmFile=raw_batch_job_parm_item ) # At this point, all resources have been created - some require additional steps after creation completes # Update the SG with any inbound rules inbound_rule_items_serialized = batch_job_parm_item.serialized_inbound_rule_items if inbound_rule_items_serialized != None: inbound_rule_items = deserialize_inbound_rule_items( inbound_rule_items_serialized ) security_group = vpc_conn.get_security_group( vpc_id, group_id ) vpc_conn.authorize_inbound_rules( security_group, inbound_rule_items ) # Create base policy (queue, buckets) and extend with user policy from batch_job_parm_item policy = create_policy( batch_job_parm_item ) policy_json = json.dumps( policy ) iam_conn.add_role_instance_profile_policy( role_name=spot_master_item[ TableSpotMaster.role_name ], policy_name=spot_master_item[ TableSpotMaster.policy_name ], policy=policy_json ) spot_master_row_partial_save( self.spot_master_table_name, spot_master_item, {TableSpotMaster.spot_master_state_code:SpotMasterStateCode.master_role_policy_in_progress}, region_name=self.region_name, profile_name=self.profile_name ) return
def launch_remote_client( spot_batch_job_parm_table_name, spot_rsa_key_table_name, spot_request_item, region_name='us-east-1', profile_name=None ): """ SSH into remote client, SCP files to client, run script on client, return results :param spot_batch_job_parm_table_name: :param spot_rsa_key_table_name: :param spot_request_item: :param region_name: (Default value = 'us-east-1') :param profile_name: (Default value = None) return: client_bootstrap_service_cmds_results, client_bootstrap_user_cmds_results """ spot_batch_job_parm_item = get_batch_job_parm_item( spot_request_item[ TableSpotRequest.spot_master_uuid ], spot_batch_job_parm_table_name, region_name=region_name, profile_name=profile_name ) batch_job_parm_item = BatchJobParmItem( stringParmFile=spot_batch_job_parm_item[ TableSpotBatchJobParm.raw_batch_job_parm_item ] ) filename_client_parm_json = 'clientparm.json' filename_bootstrap_service_cmds = 'bootstrap_service_cmds' filename_bootstrap_user_cmds = 'bootstrap_user_cmds' cmd_client_launch = 'python -m awsspotbatch.client.clientlaunch ' + filename_client_parm_json + ' &' client_parm_json_string = create_client_parm_json_string( spot_request_item, batch_job_parm_item ) # Get the RSA key, connect to remote instance and launch remote script rsa_key_item = get_rsa_key_item( spot_rsa_key_table_name, spot_request_item[ TableSpotRequest.spot_master_uuid ], region_name=region_name, profile_name=profile_name ) kp_material_dec = decode( kp_enc_key, str( rsa_key_item[ TableSpotRSAKey.rsa_key_encoded ]) ) key_file_obj = StringIO( kp_material_dec ) pkey = paramiko.RSAKey.from_private_key( key_file_obj ) instance_public_ip_address = spot_request_item[ TableSpotRequest.instance_public_ip_address ] instance_username = spot_request_item[ TableSpotRequest.instance_username ] ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) ssh.connect( instance_public_ip_address, timeout=10, username=instance_username, pkey=pkey ) # Bootstrap the system and user command client_bootstrap_service_primary_results = run_cmds(ssh, [batch_job_parm_item.client_bootstrap_service_primary] ) if client_bootstrap_service_primary_results[0]['remote_exit_status'] > 2: logger.error( fmt_request_item_msg_hdr( spot_request_item ) + 'SEVERE ERROR: client_bootstrap_service_primary failed with remote_exit_status=' + str(client_bootstrap_service_primary_results[0]['remote_exit_status']) + ', buf_std_out' + str(client_bootstrap_service_primary_results[0]['buf_std_out'])+ ', buf_std_err' + str(client_bootstrap_service_primary_results[0]['buf_std_err']) ) write_cmd_file_to_remote( ssh, batch_job_parm_item.client_bootstrap_service_cmds, filename_bootstrap_service_cmds ) client_bootstrap_service_cmds_results = run_cmds(ssh, ['python service/clientbootstrap.py ' + filename_bootstrap_service_cmds]) write_cmd_file_to_remote( ssh, batch_job_parm_item.client_bootstrap_user_cmds, filename_bootstrap_user_cmds ) client_bootstrap_user_cmds_results = run_cmds(ssh, ['python service/clientbootstrap.py ' + filename_bootstrap_user_cmds]) ######################## for cmd_result in client_bootstrap_service_cmds_results: logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' service cmd: ' + cmd_result['cmd']) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' remote_exit_status: ' + str(cmd_result['remote_exit_status']) ) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' buf_std_out: ' + cmd_result['buf_std_out'] ) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' buf_std_err: ' + cmd_result['buf_std_err'] ) for cmd_result in client_bootstrap_user_cmds_results: logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' user cmd: ' + cmd_result['cmd']) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' remote_exit_status: ' + str(cmd_result['remote_exit_status']) ) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' buf_std_out: ' + cmd_result['buf_std_out'] ) logger.info( fmt_request_item_msg_hdr( spot_request_item ) + ' buf_std_err: ' + cmd_result['buf_std_err'] ) ######################### # put the parm file out to this instance - will have a different SpotRequestUUID and SpotRequestiID for each instance with create_named_in_memory_temp_file( client_parm_json_string ) as temp_parm_file: sftp_client = ssh.open_sftp() sftp_client.put( temp_parm_file, filename_client_parm_json ) sftp_client.close() # write the user job parm item json file to disk - will be the same on every instance # user parm file is optional if TableSpotBatchJobParm.raw_user_job_parm_item in spot_batch_job_parm_item: user_job_parm_item_filename = 'userjobparmitem.json' user_job_parm_item_string = spot_batch_job_parm_item[ TableSpotBatchJobParm.raw_user_job_parm_item ] with create_named_in_memory_temp_file( user_job_parm_item_string ) as temp_user_job_parm_file: sftp_client = ssh.open_sftp() sftp_client.put( temp_user_job_parm_file, user_job_parm_item_filename ) sftp_client.close() # Don't wait for the clientlaunch to complete - it's running the users batch job and a monitor thread that sends SQS status msgs chan = ssh._transport.open_session() chan.exec_command( cmd_client_launch ) ssh.close() return client_bootstrap_service_cmds_results, client_bootstrap_user_cmds_results