def handle_state_master_resources_in_progress(self, spot_master_item ):
        """ Verify the SG, KP and Role/InstanceProfile are created

        :param spot_master_item: 

        """
        logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress')
        kp_name = spot_master_item[ TableSpotMaster.kp_name ]
        ec2_conn = awsext.ec2.connect_to_region( self.region_name, profile_name=self.profile_name )
        key_pair = ec2_conn.find_key_pair( kp_name )
        if key_pair == None: return
        logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: kp_name ready')
        
        vpc_id = spot_master_item[ TableSpotMaster.cheapest_vpc_id ]
        sg_id = spot_master_item[ TableSpotMaster.sg_id ]
        vpc_conn = awsext.vpc.connect_to_region( self.region_name, profile_name=self.profile_name )
        group_id, is_group_exists = vpc_conn.is_security_group_exists( vpc_id, group_id=sg_id )
        if not is_group_exists: return
        logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: sg_id ready')
        
        iam_conn = awsext.iam.connect_to_region( self.region_name, profile_name=self.profile_name )
        role_name = spot_master_item[ TableSpotMaster.role_name ]
        if not iam_conn.is_role_exists( role_name ): return
        logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: role_name ready')
        if not iam_conn.is_instance_profile_exists( role_name ): return
        logger.info( fmt_master_item_msg_hdr( spot_master_item ) + 'handle_state_master_resources_in_progress: instance_profile_name ready')
        
        batch_job_parm = get_batch_job_parm_item( spot_master_item[ TableSpotMaster.spot_master_uuid ], 
                                                  self.spot_batch_job_parm_table_name, self.region_name, self.profile_name,
                                                  attributes=[TableSpotBatchJobParm.raw_batch_job_parm_item] )
        raw_batch_job_parm_item = batch_job_parm[ TableSpotBatchJobParm.raw_batch_job_parm_item ]
        batch_job_parm_item = BatchJobParmItem( stringParmFile=raw_batch_job_parm_item )
        # At this point, all resources have been created - some require additional steps after creation completes
        # Update the SG with any inbound rules
        inbound_rule_items_serialized = batch_job_parm_item.serialized_inbound_rule_items
        if inbound_rule_items_serialized != None:
            inbound_rule_items = deserialize_inbound_rule_items( inbound_rule_items_serialized )
            security_group = vpc_conn.get_security_group( vpc_id, group_id )
            vpc_conn.authorize_inbound_rules( security_group, inbound_rule_items )
        
        # Create base policy (queue, buckets) and extend with user policy from batch_job_parm_item
        policy = create_policy( batch_job_parm_item )
        policy_json = json.dumps( policy )
        iam_conn.add_role_instance_profile_policy( role_name=spot_master_item[ TableSpotMaster.role_name ], 
                                                   policy_name=spot_master_item[ TableSpotMaster.policy_name ], 
                                                   policy=policy_json
                                                  )
        spot_master_row_partial_save( self.spot_master_table_name, spot_master_item, 
                                      {TableSpotMaster.spot_master_state_code:SpotMasterStateCode.master_role_policy_in_progress},
                                      region_name=self.region_name, profile_name=self.profile_name )

        return  
예제 #2
0
def launch_remote_client( spot_batch_job_parm_table_name, spot_rsa_key_table_name, spot_request_item, region_name='us-east-1', profile_name=None ):
    """ SSH into remote client, SCP files to client, run script on client, return results

    :param spot_batch_job_parm_table_name: 
    :param spot_rsa_key_table_name: 
    :param spot_request_item: 
    :param region_name:  (Default value = 'us-east-1')
    :param profile_name:  (Default value = None)
    return: client_bootstrap_service_cmds_results, client_bootstrap_user_cmds_results

    """
    spot_batch_job_parm_item = get_batch_job_parm_item( spot_request_item[ TableSpotRequest.spot_master_uuid ], spot_batch_job_parm_table_name,  
                                                             region_name=region_name, profile_name=profile_name )
    batch_job_parm_item = BatchJobParmItem( stringParmFile=spot_batch_job_parm_item[ TableSpotBatchJobParm.raw_batch_job_parm_item ] )

    filename_client_parm_json = 'clientparm.json'
    filename_bootstrap_service_cmds = 'bootstrap_service_cmds'
    filename_bootstrap_user_cmds = 'bootstrap_user_cmds'
    cmd_client_launch = 'python -m awsspotbatch.client.clientlaunch ' + filename_client_parm_json + ' &'
    client_parm_json_string = create_client_parm_json_string( spot_request_item, batch_job_parm_item )
    # Get the RSA key, connect to remote instance and launch remote script
    rsa_key_item = get_rsa_key_item( spot_rsa_key_table_name, spot_request_item[ TableSpotRequest.spot_master_uuid ], region_name=region_name, profile_name=profile_name )
    kp_material_dec = decode( kp_enc_key, str( rsa_key_item[ TableSpotRSAKey.rsa_key_encoded ]) )
    key_file_obj = StringIO( kp_material_dec )
    pkey = paramiko.RSAKey.from_private_key( key_file_obj )
    
    instance_public_ip_address =  spot_request_item[ TableSpotRequest.instance_public_ip_address ]
    instance_username = spot_request_item[ TableSpotRequest.instance_username ]
    
    ssh = paramiko.SSHClient()
    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
    ssh.connect( instance_public_ip_address, timeout=10, username=instance_username, pkey=pkey )
    
    # Bootstrap the system and user command
    client_bootstrap_service_primary_results = run_cmds(ssh, [batch_job_parm_item.client_bootstrap_service_primary] )
    if client_bootstrap_service_primary_results[0]['remote_exit_status'] > 2:
        logger.error( fmt_request_item_msg_hdr( spot_request_item ) + 'SEVERE ERROR: client_bootstrap_service_primary failed with remote_exit_status=' + 
                        str(client_bootstrap_service_primary_results[0]['remote_exit_status']) + 
                         ', buf_std_out' + str(client_bootstrap_service_primary_results[0]['buf_std_out'])+ 
                         ', buf_std_err' + str(client_bootstrap_service_primary_results[0]['buf_std_err'])  )
    write_cmd_file_to_remote( ssh, batch_job_parm_item.client_bootstrap_service_cmds, filename_bootstrap_service_cmds )
    client_bootstrap_service_cmds_results = run_cmds(ssh, ['python service/clientbootstrap.py ' + filename_bootstrap_service_cmds])
    write_cmd_file_to_remote( ssh, batch_job_parm_item.client_bootstrap_user_cmds, filename_bootstrap_user_cmds )
    client_bootstrap_user_cmds_results = run_cmds(ssh, ['python service/clientbootstrap.py ' + filename_bootstrap_user_cmds])
    
    ########################
    for cmd_result in client_bootstrap_service_cmds_results:
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '   service cmd: ' + cmd_result['cmd'])    
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '      remote_exit_status: ' + str(cmd_result['remote_exit_status']) )    
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '      buf_std_out: ' + cmd_result['buf_std_out'] )    
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '      buf_std_err: ' + cmd_result['buf_std_err'] )    
    for cmd_result in client_bootstrap_user_cmds_results:
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '   user cmd: ' + cmd_result['cmd'])    
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '      remote_exit_status: ' + str(cmd_result['remote_exit_status']) )    
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '      buf_std_out: ' + cmd_result['buf_std_out'] )    
        logger.info( fmt_request_item_msg_hdr( spot_request_item ) +  '      buf_std_err: ' + cmd_result['buf_std_err'] )    
    #########################   
    
    # put the parm file out to this instance - will have a different SpotRequestUUID and SpotRequestiID for each instance
    with create_named_in_memory_temp_file( client_parm_json_string ) as temp_parm_file:
        sftp_client = ssh.open_sftp()
        sftp_client.put( temp_parm_file, filename_client_parm_json )
        sftp_client.close()
    
    # write the user job parm item json file to disk - will be the same on every instance
    # user parm file is optional
    if TableSpotBatchJobParm.raw_user_job_parm_item in spot_batch_job_parm_item:
        user_job_parm_item_filename = 'userjobparmitem.json'
        user_job_parm_item_string = spot_batch_job_parm_item[ TableSpotBatchJobParm.raw_user_job_parm_item ]
        with create_named_in_memory_temp_file( user_job_parm_item_string ) as temp_user_job_parm_file:
            sftp_client = ssh.open_sftp()
            sftp_client.put( temp_user_job_parm_file, user_job_parm_item_filename )
            sftp_client.close()    
    
    # Don't wait for the clientlaunch to complete - it's running the users batch job and a monitor thread that sends SQS status msgs 
    chan = ssh._transport.open_session()   
    chan.exec_command( cmd_client_launch )
    
    ssh.close()

    return client_bootstrap_service_cmds_results, client_bootstrap_user_cmds_results