Esempio n. 1
0
def scale_up(ec2_pool, extra_nodes, instance_type, spot, spot_bid_price):
    log.debug('Scaling condor pool %s with %d extra nodes'%(ec2_pool.id, extra_nodes))
    
    errors = []
    vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key)
    
    log.debug('Retrieving machine image')
    ami = get_active_ami(ec2_connection)

    try:
        if not spot:
            #Fix price launch. This is easy.
            log.debug('Launching fixed price instances')
            worker_reservation = ec2_connection.run_instances(ami.id,
                                                       key_name=ec2_pool.key_pair.name,
                                                       instance_type=instance_type,
                                                       subnet_id=ec2_pool.vpc.subnet_id,
                                                       security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                       user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                       min_count=extra_nodes,
                                                       max_count=extra_nodes,
                                                       )
            sleep(3)
            instances = worker_reservation.instances
            for instance in instances:
                ec2_instance = EC2Instance()
                ec2_instance.ec2_pool = ec2_pool
                ec2_instance.instance_id = instance.id
                ec2_instance.instance_type = ec2_pool.initial_instance_type
                ec2_instance.instance_role = 'worker'
                
                ec2_instance.save()
            
        
        
        else:
            #We're launching a spot request pool instead.
            log.debug('Launching spot requests')
            worker_requests = ec2_connection.request_spot_instances(str(spot_bid_price),
                                                                    ami.id,
                                                                    type='persistent',
                                                                    count=extra_nodes,
                                                                    key_name=ec2_pool.key_pair.name,
                                                                    instance_type=instance_type,
                                                                    subnet_id=ec2_pool.vpc.subnet_id,
                                                                    security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                                    user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                                    )
            for request in worker_requests:
                spot_request = SpotRequest(ec2_pool=ec2_pool,
                                           request_id=request.id,
                                           price=request.price,
                                           status_code=request.status.code,
                                           status_message=request.status.message,
                                           state=request.state,
                                           instance_type=ec2_pool.initial_instance_type,
                                           )
                spot_request.save()
                
    except EC2ResponseError, e:
        errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \
        however a master instance was launched successfully. Check your AWS usage limit to ensure you \
        are not trying to exceed it. You should either try again to scale the pool up, or terminate it.'))
        errors.append(e)
Esempio n. 2
0
def scale_up(ec2_pool, extra_nodes, instance_type, spot, spot_bid_price):
    log.debug('Scaling condor pool %s with %d extra nodes'%(ec2_pool.id, extra_nodes))
    
    errors = []
    vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key)
    
    log.debug('Retrieving machine image')
    ami = get_active_ami(ec2_connection)

    try:
        if not spot:
            #Fix price launch. This is easy.
            log.debug('Launching fixed price instances')
            worker_reservation = ec2_connection.run_instances(ami.id,
                                                       key_name=ec2_pool.key_pair.name,
                                                       instance_type=instance_type,
                                                       subnet_id=ec2_pool.vpc.subnet_id,
                                                       security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                       user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                       min_count=extra_nodes,
                                                       max_count=extra_nodes,
                                                       )
            sleep(3)
            instances = worker_reservation.instances
            for instance in instances:
                ec2_instance = EC2Instance()
                ec2_instance.ec2_pool = ec2_pool
                ec2_instance.instance_id = instance.id
                ec2_instance.instance_type = ec2_pool.initial_instance_type
                ec2_instance.instance_role = 'worker'
                
                ec2_instance.save()
            
        
        
        else:
            #We're launching a spot request pool instead.
            log.debug('Launching spot requests')
            worker_requests = ec2_connection.request_spot_instances(str(spot_bid_price),
                                                                    ami.id,
                                                                    type='persistent',
                                                                    count=extra_nodes,
                                                                    key_name=ec2_pool.key_pair.name,
                                                                    instance_type=instance_type,
                                                                    subnet_id=ec2_pool.vpc.subnet_id,
                                                                    security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                                    user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                                    )
            for request in worker_requests:
                spot_request = SpotRequest(ec2_pool=ec2_pool,
                                           request_id=request.id,
                                           price=request.price,
                                           status_code=request.status.code,
                                           status_message=request.status.message,
                                           state=request.state,
                                           instance_type=ec2_pool.initial_instance_type,
                                           )
                spot_request.save()
                
    except EC2ResponseError as e:
        errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \
        however a master instance was launched successfully. Check your AWS usage limit to ensure you \
        are not trying to exceed it. You should either try again to scale the pool up, or terminate it.'))
        errors.append(e)

    
    
    return errors
Esempio n. 3
0
def launch_pool(ec2_pool):
    """
    Launch a EC2 pool with the definitions provided by the ec2_pool object
    """
    
    log.debug('Launcing EC2 pool')
    assert isinstance(ec2_pool, EC2Pool)
    
    errors = []
    
    #Initiate the connection    
    vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key)
    
    log.debug('Retrieving machine image')
    ami = get_active_ami(ec2_connection)
    
    #Launch the master instance
    #Add the pool details to the launch string
    master_launch_string = ec2_config.MASTER_LAUNCH_STRING
    #And launch
    log.debug('Launching Master node')
    master_reservation = ec2_connection.run_instances(ami.id,
                                               key_name=ec2_pool.key_pair.name,
                                               instance_type=settings.MASTER_NODE_TYPE,
                                               subnet_id=ec2_pool.vpc.subnet_id,
                                               security_group_ids=[ec2_pool.vpc.master_group_id],
                                               user_data=master_launch_string,
                                               min_count=1,#Only 1 instance needed
                                               max_count=1,
                                               )
    #
    sleep(2)
    
    ec2_instances = []

    master_instance = master_reservation.instances[0]
    master_ec2_instance = EC2Instance()
    master_ec2_instance.ec2_pool = ec2_pool
    master_ec2_instance.instance_id = master_instance.id
    master_ec2_instance.instance_type = settings.MASTER_NODE_TYPE
    master_ec2_instance.instance_role = 'master'
    
    
    master_ec2_instance.save()
    ec2_instances.append(master_ec2_instance)

    ec2_pool.master = master_ec2_instance
    
    ec2_pool.last_update_time = now()
    ec2_pool.save()
    
    #wait until the master has a private ip address
    #sleep in beween
    log.debug('Waiting for private IP to be assigned to master node')
    sleep_time=5
    max_retrys=20
    current_try=0
    while master_ec2_instance.get_private_ip() == None and current_try<max_retrys:
        sleep(sleep_time)
        current_try+=1
    sleep(2)
    if ec2_pool.size > 0:
        log.debug('Launching worker nodes')
        
        #Are we launcing fixed price or spot instances?
        try:
            if not ec2_pool.spot_request:
                #Fix price launch. This is easy.
                worker_reservation = ec2_connection.run_instances(ami.id,
                                                           key_name=ec2_pool.key_pair.name,
                                                           instance_type=ec2_pool.initial_instance_type,
                                                           subnet_id=ec2_pool.vpc.subnet_id,
                                                           security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                           user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                           min_count=ec2_pool.size,
                                                           max_count=ec2_pool.size,
                                                           )
                sleep(3)
                instances = worker_reservation.instances
                for instance in instances:
                    ec2_instance = EC2Instance()
                    ec2_instance.ec2_pool = ec2_pool
                    ec2_instance.instance_id = instance.id
                    ec2_instance.instance_type = ec2_pool.initial_instance_type
                    ec2_instance.instance_role = 'worker'
                    
                    ec2_instance.save()
                
                    ec2_instances.append(ec2_instance)
            
            
            else:
                #We're launching a spot request pool instead.
                worker_requests = ec2_connection.request_spot_instances(str(ec2_pool.spot_price),
                                                                        ami.id,
                                                                        type='persistent',
                                                                        count=ec2_pool.size,
                                                                        key_name=ec2_pool.key_pair.name,
                                                                        instance_type=ec2_pool.initial_instance_type,
                                                                        subnet_id=ec2_pool.vpc.subnet_id,
                                                                        security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                                        user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                                        )
                for request in worker_requests:
                    spot_request = SpotRequest(ec2_pool=ec2_pool,
                                               request_id=request.id,
                                               price=request.price,
                                               status_code=request.status.code,
                                               status_message=request.status.message,
                                               state=request.state,
                                               instance_type=ec2_pool.initial_instance_type,
                                               )
                    spot_request.save()
                
        except EC2ResponseError, e:
            errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \
            however a master instance was launched successfully. Check your AWS usage limit to ensure you \
            are not trying to exceed it. You should either try again to scale the pool up, or terminate it.'))
            errors.append(e)
Esempio n. 4
0
def launch_pool(ec2_pool):
    """
    Launch a EC2 pool with the definitions provided by the ec2_pool object
    """
    
    log.debug('Launcing EC2 pool')
    assert isinstance(ec2_pool, EC2Pool)
    
    errors = []
    
    #Initiate the connection    
    vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key)
    
    log.debug('Retrieving machine image')
    ami = get_active_ami(ec2_connection)
    
    #Launch the master instance
    #Add the pool details to the launch string
    master_launch_string = ec2_config.MASTER_LAUNCH_STRING
    #And launch
    log.debug('Launching Master node')
    master_reservation = ec2_connection.run_instances(ami.id,
                                               key_name=ec2_pool.key_pair.name,
                                               instance_type=settings.MASTER_NODE_TYPE,
                                               subnet_id=ec2_pool.vpc.subnet_id,
                                               security_group_ids=[ec2_pool.vpc.master_group_id],
                                               user_data=master_launch_string,
                                               min_count=1,#Only 1 instance needed
                                               max_count=1,
                                               )
    #
    sleep(2)
    
    ec2_instances = []

    master_instance = master_reservation.instances[0]
    master_ec2_instance = EC2Instance()
    master_ec2_instance.ec2_pool = ec2_pool
    master_ec2_instance.instance_id = master_instance.id
    master_ec2_instance.instance_type = settings.MASTER_NODE_TYPE
    master_ec2_instance.instance_role = 'master'
    
    
    master_ec2_instance.save()
    ec2_instances.append(master_ec2_instance)

    ec2_pool.master = master_ec2_instance
    
    ec2_pool.last_update_time = now()
    ec2_pool.save()
    
    #wait until the master has a private ip address
    #sleep in beween
    log.debug('Waiting for private IP to be assigned to master node')
    sleep_time=5
    max_retrys=20
    current_try=0
    while master_ec2_instance.get_private_ip() == None and current_try<max_retrys:
        sleep(sleep_time)
        current_try+=1
    sleep(2)
    if ec2_pool.size > 0:
        log.debug('Launching worker nodes')
        
        #Are we launcing fixed price or spot instances?
        try:
            if not ec2_pool.spot_request:
                #Fix price launch. This is easy.
                worker_reservation = ec2_connection.run_instances(ami.id,
                                                           key_name=ec2_pool.key_pair.name,
                                                           instance_type=ec2_pool.initial_instance_type,
                                                           subnet_id=ec2_pool.vpc.subnet_id,
                                                           security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                           user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                           min_count=ec2_pool.size,
                                                           max_count=ec2_pool.size,
                                                           )
                sleep(3)
                instances = worker_reservation.instances
                for instance in instances:
                    ec2_instance = EC2Instance()
                    ec2_instance.ec2_pool = ec2_pool
                    ec2_instance.instance_id = instance.id
                    ec2_instance.instance_type = ec2_pool.initial_instance_type
                    ec2_instance.instance_role = 'worker'
                    
                    ec2_instance.save()
                
                    ec2_instances.append(ec2_instance)
            
            
            else:
                #We're launching a spot request pool instead.
                worker_requests = ec2_connection.request_spot_instances(str(ec2_pool.spot_price),
                                                                        ami.id,
                                                                        type='persistent',
                                                                        count=ec2_pool.size,
                                                                        key_name=ec2_pool.key_pair.name,
                                                                        instance_type=ec2_pool.initial_instance_type,
                                                                        subnet_id=ec2_pool.vpc.subnet_id,
                                                                        security_group_ids=[ec2_pool.vpc.worker_group_id],
                                                                        user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(),
                                                                        )
                for request in worker_requests:
                    spot_request = SpotRequest(ec2_pool=ec2_pool,
                                               request_id=request.id,
                                               price=request.price,
                                               status_code=request.status.code,
                                               status_message=request.status.message,
                                               state=request.state,
                                               instance_type=ec2_pool.initial_instance_type,
                                               )
                    spot_request.save()
                
        except EC2ResponseError as e:
            errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \
            however a master instance was launched successfully. Check your AWS usage limit to ensure you \
            are not trying to exceed it. You should either try again to scale the pool up, or terminate it.'))
            errors.append(e)
            
    #Create an sqs queue
    log.debug('Creating SQS for pool')
    sqs_connection = aws_tools.create_sqs_connection(ec2_pool.vpc.access_key)
    queue = sqs_connection.get_queue(ec2_pool.get_queue_name())
    if queue != None:
        sqs_connection.delete_queue(queue)
    
    sqs_connection.create_queue(ec2_pool.get_queue_name())
    
    #Create an SNS topic for instance alarm notifications
    log.debug('Creating SNS topic for alarms')
    sns_connection = aws_tools.create_sns_connection(ec2_pool.vpc.access_key)
    topic_data = sns_connection.create_topic(ec2_pool.get_alarm_notify_topic())
    
    topic_arn = topic_data['CreateTopicResponse']['CreateTopicResult']['TopicArn']
    
    log.debug('SNS topic created with arn %s' %topic_arn)
    
    ec2_pool.alarm_notify_topic_arn = topic_arn
    #And create a  subscription to the api_terminate_instance_alarm endpoint
    termination_notify_url = 'http://' + settings.HOST + str(reverse_lazy('api_terminate_instance_alarm'))
    
    try:
        sns_connection.subscribe(topic_arn, 'http', termination_notify_url)
    except BotoServerError as e:
        errors.append(('Error enabling smart termination', 'Smart termination was not successfully enabled'))
        try:
            ec2_pool.smart_terminate = False
            sns_connection.delete_topic(topic_arn)
        except:
            pass
    #Apply an alarm to each of the ec2 instances to notify that they should be shutdown should they be unused
    ##Note, this is now performed when the master node sends a notification back to the server through the API
    
    #Assign an elastic IP to the master instance
    #Try up to 5 times
    log.debug('Assigning elastic IP to master node')
    try:
        elastic_ip = assign_ip_address(master_ec2_instance)
        log.debug('Assigned elastic IP address to instance %s' % master_ec2_instance.instance_id)
    except Exception as  e:
        log.error('Error assigning elastic ip to master instance %s' % master_ec2_instance.instance_id)
        log.exception(e)
        raise e
    
    ec2_pool.address = 'ubuntu@' + str(elastic_ip.public_ip)
    
    
    #Check to see if we can ssh in
    #Try a couple of times
    tries = 15
    for i in range(tries):
        log.debug('Testing SSH credentials')
        command = ['ssh', '-o', 'StrictHostKeyChecking=no', '-i', ec2_pool.key_pair.path, ec2_pool.address, 'pwd']
        
        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'DISPLAY' : ''})
        output = process.communicate()
        
        log.debug('SSH response:')
        log.debug(output)
        
        if process.returncode == 0:
            log.debug('SSH success')
            break
        sleep(5)
    
    
    return errors