def get_remote_resources(user, key=None): """Return an overview of all the aws resources in use """ if key: keys=[key] else: keys = AWSAccessKey.objects.filter(user=user) overview = ResourceOverview() for key in keys: try: vpc_connection, ec2_connection =aws_tools.create_connections(key) #Get ec2 count instance_reservations=ec2_connection.get_all_instances() for reservation in instance_reservations: for instance in reservation.instances: if instance.state == 'pending' or instance.state=='running': overview.add_ec2_instance(key, instance.id) except Exception, e: log.exception(e) try: addresses = ec2_connection.get_all_addresses() for address in addresses: if address.allocation_id == None: overview.add_elastic_ip(key, None, None, address.public_ip) else: overview.add_elastic_ip(key, address.allocation_id, address.association_id, None) except Exception, e: log.exception(e)
def release_ip_address_from_instance(ec2_instance): """Dissassociate and release the public IP address of the ec2 instance """ assert isinstance(ec2_instance, EC2Instance) vpc_connection, ec2_connection = aws_tools.create_connections(ec2_instance.ec2_pool.vpc.access_key) errors=[] try: ip = ElasticIP.objects.get(instance=ec2_instance) log.debug('Disassociating IP') ec2_connection.disassociate_address(association_id=ip.association_id) except Exception as e: log.exception(e) errors.append(e) try: log.debug('Releasing IP') ec2_connection.release_address(allocation_id=ip.allocation_id) except Exception as e: log.exception(e) errors.append(e) try: ip.delete() except Exception as e: log.exception(e) errors.append(e) return errors
def get_status(self): try: vpc_connection, ec2_connection = aws_tools.create_connections(self.access_key) vpc_state = self.get_vpc(vpc_connection).state vpc_connection.close() ec2_connection.close() return vpc_state except Exception, e: return 'error: ' + str(e)
def get_status(self): try: vpc_connection, ec2_connection = aws_tools.create_connections( self.access_key) vpc_state = self.get_vpc(vpc_connection).state vpc_connection.close() ec2_connection.close() return vpc_state except Exception as e: return 'error: ' + str(e)
def release_ip_address_from_instance(ec2_instance): """Dissassociate and release the public IP address of the ec2 instance """ assert isinstance(ec2_instance, EC2Instance) vpc_connection, ec2_connection = aws_tools.create_connections(ec2_instance.ec2_pool.vpc.access_key) errors=[] try: ip = ElasticIP.objects.get(instance=ec2_instance) log.debug('Disassociating IP') ec2_connection.disassociate_address(association_id=ip.association_id) except Exception, e: log.exception(e) errors.append(e)
def release_ip_address(key, allocation_id, association_id=None, public_ip=None): """Dissociate and release the IP address with the allocation id and optional association id. Alternatively just use public ip """ vpc_connection, ec2_connection = aws_tools.create_connections(key) try: if association_id: log.debug('Disassociating IP') ec2_connection.disassociate_address(association_id=association_id) if public_ip: log.debug('Disassociating IP') ec2_connection.disassociate_address(public_ip=public_ip) except Exception, e: log.exception(e)
def get(self, request, *args, **kwargs): assert isinstance(request, HttpRequest) instance_type = request.GET.get('instance_type') key_id = request.GET.get('key_id') full_history = request.GET.get('history', False) if key_id != 'NULL': key = AWSAccessKey.objects.get(id=key_id) else: keys = AWSAccessKey.objects.filter(use_for_spotprice_history=True) key = keys[ 0] #Use the first key marked as use_for_spotprice_history to get the history vpc_connection, ec2_connection = aws_tools.create_connections(key) if full_history != 'true': time_str = '%Y-%m-%dT%H:%M:%SZ' utc_now = datetime.datetime.utcnow() now_time_str = utc_now.strftime(time_str) #Format time for aws #get the time for 10 mins ago prev_time = utc_now - datetime.timedelta(seconds=600) prev_time_str = prev_time.strftime(time_str) #Get the history from boto history = ec2_connection.get_spot_price_history( start_time=prev_time_str, end_time=now_time_str, instance_type=instance_type) #And the most recent history price point price = history[0].price output = {'price': price} else: #Get the full price history. Don't specify start and end times history = ec2_connection.get_spot_price_history( instance_type=instance_type) output = {'price': []} for item in history: output['price'].append((item.timestamp, item.price)) json_response = json.dumps(output) return HttpResponse(json_response, content_type="application/json", status=200)
def terminate_resources(user, resources): """Terminate the AWS resources here. These will not correspond to any local model """ assert isinstance(resources, ResourceOverview) ec2_instances = {} elastic_ips = {} #Build up dicts to contain resources indexed by key for key, instance_id in resources.ec2_instances: assert key.user == user if key in ec2_instances: ec2_instances[key].append(instance_id) else: ec2_instances[key] = [instance_id] for key, allocation_id, association_id, public_ip in resources.elastic_ips: assert key.user == user if key in elastic_ips: elastic_ips[key].append((allocation_id, association_id, public_ip)) else: elastic_ips[key] = [(allocation_id, association_id, public_ip)] #Release IPs for key in elastic_ips: for allocation_id, association_id, public_ip in elastic_ips[key]: log.debug('Releasing IP address with allocation ID %s' % allocation_id) try: if public_ip: ec2_tools.release_ip_address(key, None, None, public_ip) else: ec2_tools.release_ip_address(key, allocation_id, association_id, None) except Exception as e: log.exception(e) #Terminate EC2 instances for key in ec2_instances: log.debug('Terminating %d instances for key %s' % (len(ec2_instances[key]), key.name)) try: vpc_connection, ec2_connection = aws_tools.create_connections(key) ec2_connection.terminate_instances(ec2_instances[key]) except Exception as e: log.exception(e)
def terminate_pool(ec2_pool): assert isinstance(ec2_pool, EC2Pool) log.debug('Terminating condor pool %s (user %s)' %(ec2_pool.name, ec2_pool.vpc.access_key.user.username)) #Keep a track of the following errors errors=[] #Create an ec2_connection object vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) assert isinstance(ec2_connection, EC2Connection) #First, refresh the status of the pool try: refresh_pool(ec2_pool) except Exception, e: log.exception(e) errors.append(e)
def terminate_instances(instances): """Terminate the selected instances. Will also involve terminating any associated alarms and spot requests instances: iterable EC2Instances, list or queryset """ vpc_connection, ec2_connection = aws_tools.create_connections(instances[0].ec2_pool.vpc.access_key) #Terminate any spot requests first spot_requests_to_terminate = SpotRequest.objects.filter(ec2_instance__in=instances) spot_request_ids = [request.request_id for request in spot_requests_to_terminate] try: if spot_request_ids != []: log.debug('Cancelling %d spot requests'%len(spot_request_ids)) ec2_connection.cancel_spot_instance_requests(request_ids=spot_request_ids) for spot_request in spot_requests_to_terminate: spot_request.delete() except Exception, e: log.exception(e)
def create_key_pair(pool): """Create a keypair and store it in the users storage directory """ assert isinstance(pool, models.EC2Pool) vpc_connection, ec2_connection = aws_tools.create_connections(pool.vpc.access_key) name = 'keypair_%s' % pool.uuid key = ec2_connection.create_key_pair(name) #The directory where we store the ssh keypairs. Must be writable filepath = settings.KEYPAIR_FILEPATH path=os.path.join(filepath, name + '.pem') key.save(filepath) key_pair = EC2KeyPair(name=name, path=path) key_pair.save() return key_pair
def assign_ip_address(ec2_instance): """Assign a public IP address to the ec2 instance """ #Check to see if there are any unassigned IP addresses: vpc_connection, ec2_connection = aws_tools.create_connections(ec2_instance.ec2_pool.vpc.access_key) sleep(2) assert isinstance(ec2_instance, EC2Instance) ips = ElasticIP.objects.filter(vpc=ec2_instance.ec2_pool.vpc).filter(instance=None) sleep_time=5 allocate_new = False if ips.count() > 0: #Use the first IP address log.debug('Using existing IP address') elastic_ip=ips[0] try: release_ip_address(ec2_instance.condor_pool.vpc.acess_key, allocation_id=elastic_ip.allocation_id, association_id=elastic_ip.association_id) except Exception, e: log.exception(e) allocate_new = True
def get(self, request, *args, **kwargs): assert isinstance(request, HttpRequest) instance_type = request.GET.get('instance_type') key_id = request.GET.get('key_id') full_history = request.GET.get('history', False) if key_id != 'NULL': key = AWSAccessKey.objects.get(id=key_id) else: keys = AWSAccessKey.objects.filter(use_for_spotprice_history=True) key = keys[0] #Use the first key marked as use_for_spotprice_history to get the history vpc_connection, ec2_connection = aws_tools.create_connections(key) if full_history != 'true': time_str = '%Y-%m-%dT%H:%M:%SZ' utc_now = datetime.datetime.utcnow() now_time_str = utc_now.strftime(time_str) #Format time for aws #get the time for 10 mins ago prev_time = utc_now - datetime.timedelta(seconds=600) prev_time_str = prev_time.strftime(time_str) #Get the history from boto history = ec2_connection.get_spot_price_history(start_time=prev_time_str, end_time=now_time_str, instance_type=instance_type) #And the most recent history price point price = history[0].price output = {'price' : price} else: #Get the full price history. Don't specify start and end times history = ec2_connection.get_spot_price_history(instance_type=instance_type) output = {'price':[]} for item in history: output['price'].append((item.timestamp, item.price)) json_response = json.dumps(output) return HttpResponse(json_response, content_type="application/json", status=200)
def get_remote_resources(user, key=None): """Return an overview of all the aws resources in use """ if key: keys = [key] else: keys = AWSAccessKey.objects.filter(user=user) overview = ResourceOverview() for key in keys: try: vpc_connection, ec2_connection = aws_tools.create_connections(key) #Get ec2 count instance_reservations = ec2_connection.get_all_instances() for reservation in instance_reservations: for instance in reservation.instances: if instance.state == 'pending' or instance.state == 'running': overview.add_ec2_instance(key, instance.id) except Exception as e: log.exception(e) try: addresses = ec2_connection.get_all_addresses() for address in addresses: if address.allocation_id == None: overview.add_elastic_ip(key, None, None, address.public_ip) else: overview.add_elastic_ip(key, address.allocation_id, address.association_id, None) except Exception as e: log.exception(e) return overview
def terminate_instances(instances): """Terminate the selected instances. Will also involve terminating any associated alarms and spot requests instances: iterable EC2Instances, list or queryset """ vpc_connection, ec2_connection = aws_tools.create_connections(instances[0].ec2_pool.vpc.access_key) #Terminate any spot requests first spot_requests_to_terminate = SpotRequest.objects.filter(ec2_instance__in=instances) spot_request_ids = [request.request_id for request in spot_requests_to_terminate] try: if spot_request_ids != []: log.debug('Cancelling %d spot requests'%len(spot_request_ids)) ec2_connection.cancel_spot_instance_requests(request_ids=spot_request_ids) for spot_request in spot_requests_to_terminate: spot_request.delete() except Exception as e: log.exception(e) log.debug('Deleting termination alarms') for instance in instances: try: cloudwatch_connection = aws_tools.create_cloudwatch_connection(instance.ec2_pool.vpc.access_key) if instance.termination_alarm: cloudwatch_connection.delete_alarms([instance.termination_alarm]) except Exception as e: log.exception(e) instance_ids = [instance.instance_id for instance in instances] log.debug('Terminating instances') #TODO: terminate the necessary alarms and spot requests before terminating the instances themselves. ec2_connection.terminate_instances(instance_ids)
def release_ip_address(key, allocation_id, association_id=None, public_ip=None): """Dissociate and release the IP address with the allocation id and optional association id. Alternatively just use public ip """ vpc_connection, ec2_connection = aws_tools.create_connections(key) try: if association_id: log.debug('Disassociating IP') ec2_connection.disassociate_address(association_id=association_id) if public_ip: log.debug('Disassociating IP') ec2_connection.disassociate_address(public_ip=public_ip) except Exception as e: log.exception(e) try: log.debug('Releasing IP') if allocation_id: ec2_connection.release_address(allocation_id=allocation_id) else: ec2_connection.release_address(public_ip=public_ip) except Exception as e: log.exception(e)
def dispatch(self, request, *args, **kwargs): key_id = self.kwargs['key_id'] key = AWSAccessKey.objects.get(id=key_id) kwargs['key'] = key assert key.user == request.user kwargs['show_loading_screen'] = True kwargs['loading_title'] = 'Removing key and associated VPC' kwargs['loading_description'] = 'Please be patient and do not navigate away from this page.' #Is this an original key or is it a copy if key.copy_of == None: original = True else: original = False if original: #Build a list of any pools and running jobs that will be terminated when this pool is terminated try: pools = EC2Pool.objects.filter(vpc__vpc_id = key.vpc.vpc_id) except: pools = [] shared_keys = AWSAccessKey.objects.filter(copy_of=key) shared_user_ids = [shared_key.user.id for shared_key in shared_keys] kwargs['shared_users'] = User.objects.filter(id__in=shared_user_ids) else: #A copy of a key. If so, we'll not be deleting the real vpc, adn so try: pools = EC2Pool.objects.filter(vpc__id=key.vpc.id) except: pools = [] kwargs['pools'] = pools errors=[] if kwargs['confirmed']: #Go through and terminate each of the running pools for pool in pools: #First, remove any running tasks running_tasks = pool.get_running_tasks() for task in running_tasks: for subtask in task.subtask_set.all(): condor_tools.remove_task(subtask) task.delete() other_tasks = Task.objects.filter(condor_pool=pool).exclude(pk__in=running_tasks) #Then 'prune' the remaining tasks to remove the pool as a foreignkey for task in other_tasks: task.condor_pool = None task.set_custom_field('condor_pool_name', pool.name) task.save() ec2_tools.terminate_pool(pool) if original: #We also need to delete the vpc (and any associated) try: related = AWSAccessKey.objects.filter(copy_of=key) for related_key in related: related_key.delete() if key.vpc != None: vpc_connection, ec2_connection = aws_tools.create_connections(key) errors += (vpc_tools.delete_vpc(key.vpc, vpc_connection, ec2_connection)) if errors != []: log.exception(errors) request.session['errors'] = aws_tools.process_errors(errors) except Exception as e: log.exception(e) #And delete the key key.delete() else: #Just delete the key object and the vpc key.delete() return HttpResponseRedirect(reverse_lazy('my_account_keys')) return super(KeysDeleteView, self).dispatch(request, *args, **kwargs)
def form_valid(self, *args, **kwargs): form=kwargs['form'] #Create the key object and save it #Was a file uploaded? If so, first check if access_key_id and secret_key are blank if self.request.FILES.get('access_key_file'): if form.cleaned_data['access_key_id'] != '' or form.cleaned_data['secret_key'] != '': form._errors[NON_FIELD_ERRORS] = 'Either upload a file or enter the key details manually' return self.form_invalid(self, *args, **kwargs) #Try and save the key file to a temp file temp_file_descriptor, temp_filename = tempfile.mkstemp() form_tools.handle_uploaded_file(self.request.FILES['access_key_file'], temp_filename) access_key_re = re.compile(r'AWSAccessKeyId\=(?P<access_key>.{20})\n*') secret_key_re = re.compile(r'AWSSecretKey\=(?P<secret_key>.{40})\n*') access_key='' secret_key='' temp_file = open(temp_filename, 'r') total_read_lines = 5 #How many lines to read before giving up line_count = 0 for line in temp_file.readlines(): if access_key_re.match(line): access_key = access_key_re.match(line).group('access_key') elif secret_key_re.match(line): secret_key = secret_key_re.match(line).group('secret_key') if line_count < total_read_lines: line_count +=1 #Count the numeber of lines. Should be in the first 2 lines anyway, so this gives a bit of leeway else: break temp_file.close() os.remove(temp_filename) if not (access_key and secret_key): form._errors[NON_FIELD_ERRORS] = 'The uploaded access key could not be read' return self.form_invalid(self, *args, **kwargs) key = AWSAccessKey(access_key_id = access_key, secret_key=secret_key) else: if form.cleaned_data['access_key_id'] == '' or form.cleaned_data['secret_key'] == '': form._errors[NON_FIELD_ERRORS] = 'Either upload a file or enter the key details manually' return self.form_invalid(self, *args, **kwargs) else: key = AWSAccessKey() key.access_key_id = form.cleaned_data['access_key_id'] key.secret_key = form.cleaned_data['secret_key'] key.user = self.request.user key.name = form.cleaned_data['name'] key.save() try: #Authenticate the keypair vpc_connection, ec2_connection = aws_tools.create_connections(key) #Run a test API call ec2_connection.get_all_regions() except Exception as e: #Since we are about to return form_invalid, add the errors directly to the form non field error list #kwargs['errors']=aws_tools.process_errors([e]) key.delete() error_list = [x[1] for x in e.errors] form._errors[NON_FIELD_ERRORS] = ErrorList(error_list) return self.form_invalid(self, *args, **kwargs) #And launch the VPC try: vpc = vpc_tools.create_vpc(key, vpc_connection, ec2_connection) except Exception as e: log.exception(e) try: vpc.delete() except: pass try: key.delete() except: pass form._errors[NON_FIELD_ERRORS] = 'Error launching VPC for key' return self.form_invalid(self, *args, **kwargs) return super(KeysAddView, self).form_valid(*args, **kwargs)
def get_ec2_connection(self): vpc_connection, ec2_connection = aws_tools.create_connections( self.ec2_pool.vpc.access_key) return ec2_connection
for key in elastic_ips: for allocation_id, association_id, public_ip in elastic_ips[key]: log.debug('Releasing IP address with allocation ID %s'%allocation_id) try: if public_ip: ec2_tools.release_ip_address(key, None, None, public_ip) else: ec2_tools.release_ip_address(key, allocation_id, association_id, None) except Exception, e: log.exception(e) #Terminate EC2 instances for key in ec2_instances: log.debug('Terminating %d instances for key %s' %(len(ec2_instances[key]), key.name)) try: vpc_connection, ec2_connection = aws_tools.create_connections(key) ec2_connection.terminate_instances(ec2_instances[key]) except Exception, e: log.exception(e) def health_check(user, key=None): """Perform a health check on all AWS resources """ ec2_pools = EC2Pool.objects.filter(vpc__access_key__user=user) for ec2_pool in ec2_pools: health = ec2_pool.get_health() if health != 'healthy': return health return 'healthy'
def launch_pool(ec2_pool): """ Launch a EC2 pool with the definitions provided by the ec2_pool object """ log.debug('Launcing EC2 pool') assert isinstance(ec2_pool, EC2Pool) errors = [] #Initiate the connection vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) #Launch the master instance #Add the pool details to the launch string master_launch_string = ec2_config.MASTER_LAUNCH_STRING #And launch log.debug('Launching Master node') master_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=settings.MASTER_NODE_TYPE, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.master_group_id], user_data=master_launch_string, min_count=1,#Only 1 instance needed max_count=1, ) # sleep(2) ec2_instances = [] master_instance = master_reservation.instances[0] master_ec2_instance = EC2Instance() master_ec2_instance.ec2_pool = ec2_pool master_ec2_instance.instance_id = master_instance.id master_ec2_instance.instance_type = settings.MASTER_NODE_TYPE master_ec2_instance.instance_role = 'master' master_ec2_instance.save() ec2_instances.append(master_ec2_instance) ec2_pool.master = master_ec2_instance ec2_pool.last_update_time = now() ec2_pool.save() #wait until the master has a private ip address #sleep in beween log.debug('Waiting for private IP to be assigned to master node') sleep_time=5 max_retrys=20 current_try=0 while master_ec2_instance.get_private_ip() == None and current_try<max_retrys: sleep(sleep_time) current_try+=1 sleep(2) if ec2_pool.size > 0: log.debug('Launching worker nodes') #Are we launcing fixed price or spot instances? try: if not ec2_pool.spot_request: #Fix price launch. This is easy. worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=ec2_pool.size, max_count=ec2_pool.size, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() ec2_instances.append(ec2_instance) else: #We're launching a spot request pool instead. worker_requests = ec2_connection.request_spot_instances(str(ec2_pool.spot_price), ami.id, type='persistent', count=ec2_pool.size, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError as e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e) #Create an sqs queue log.debug('Creating SQS for pool') sqs_connection = aws_tools.create_sqs_connection(ec2_pool.vpc.access_key) queue = sqs_connection.get_queue(ec2_pool.get_queue_name()) if queue != None: sqs_connection.delete_queue(queue) sqs_connection.create_queue(ec2_pool.get_queue_name()) #Create an SNS topic for instance alarm notifications log.debug('Creating SNS topic for alarms') sns_connection = aws_tools.create_sns_connection(ec2_pool.vpc.access_key) topic_data = sns_connection.create_topic(ec2_pool.get_alarm_notify_topic()) topic_arn = topic_data['CreateTopicResponse']['CreateTopicResult']['TopicArn'] log.debug('SNS topic created with arn %s' %topic_arn) ec2_pool.alarm_notify_topic_arn = topic_arn #And create a subscription to the api_terminate_instance_alarm endpoint termination_notify_url = 'http://' + settings.HOST + str(reverse_lazy('api_terminate_instance_alarm')) try: sns_connection.subscribe(topic_arn, 'http', termination_notify_url) except BotoServerError as e: errors.append(('Error enabling smart termination', 'Smart termination was not successfully enabled')) try: ec2_pool.smart_terminate = False sns_connection.delete_topic(topic_arn) except: pass #Apply an alarm to each of the ec2 instances to notify that they should be shutdown should they be unused ##Note, this is now performed when the master node sends a notification back to the server through the API #Assign an elastic IP to the master instance #Try up to 5 times log.debug('Assigning elastic IP to master node') try: elastic_ip = assign_ip_address(master_ec2_instance) log.debug('Assigned elastic IP address to instance %s' % master_ec2_instance.instance_id) except Exception as e: log.error('Error assigning elastic ip to master instance %s' % master_ec2_instance.instance_id) log.exception(e) raise e ec2_pool.address = 'ubuntu@' + str(elastic_ip.public_ip) #Check to see if we can ssh in #Try a couple of times tries = 15 for i in range(tries): log.debug('Testing SSH credentials') command = ['ssh', '-o', 'StrictHostKeyChecking=no', '-i', ec2_pool.key_pair.path, ec2_pool.address, 'pwd'] process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env={'DISPLAY' : ''}) output = process.communicate() log.debug('SSH response:') log.debug(output) if process.returncode == 0: log.debug('SSH success') break sleep(5) return errors
def refresh_pool(ec2_pool): """Refresh the state of each instance in a ec2 pool """ log.debug('Refreshing pool %s status' % ec2_pool.name) if ec2_pool.copy_of: copied_pool = ec2_pool ec2_pool = EC2Pool.objects.get(id=ec2_pool.copy_of.id) else: copied_pool = None #If this pool is not an original, then don't refresh. log.debug('refreshing status of pool %s' % ec2_pool.name) difference = utcnow() - ec2_pool.last_update_time.replace(tzinfo=utc) log.debug('Time difference %s' % str(difference)) if difference < datetime.timedelta(seconds=3): log.debug('Pool recently refreshed. Not updating') return vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) #Get a list of any spot requests associated with the pool spot_requests = SpotRequest.objects.filter(ec2_pool=ec2_pool) | SpotRequest.objects.filter(ec2_pool__copy_of=ec2_pool) spot_request_ids = [request.request_id for request in spot_requests] try: if spot_request_ids != []: spot_request_list = ec2_connection.get_all_spot_instance_requests(request_ids=spot_request_ids) else: spot_request_list = [] except EC2ResponseError: #Perhaps a particular spot request wasn't found? Go through the list the slow way spot_request_list = [] not_found_requests = [] for spot_request_id in spot_request_ids: try: spot_instance_request = ec2_connection.get_all_spot_instance_requests(request_ids=[spot_request_id]) spot_request_list.append(spot_instance_request) except: log.debug('Spot request %s not found, not updating status' %spot_request_id) not_found_requests.append(spot_request_id) #Don't do anything with spot requests that weren't found for now for request in spot_request_list: try: spot_request = SpotRequest.objects.get(request_id=request.id) spot_request.status_code = request.status.code spot_request.status_message = request.status.message spot_request.state = request.state if request.instance_id != None: try: ec2_instance = EC2Instance.objects.get(instance_id=request.instance_id) except: ec2_instance = EC2Instance(ec2_pool=ec2_pool, instance_type=spot_request.instance_type, instance_role='worker', instance_id=request.instance_id, state='unknown', instance_status='unknown', system_status='unknown', ) ec2_instance.save() spot_request.ec2_instance = ec2_instance else: spot_request.ec2_instance = None spot_request.save() except Exception as e: log.exception(e) instances = EC2Instance.objects.filter(ec2_pool=ec2_pool) | EC2Instance.objects.filter(ec2_pool__copy_of=ec2_pool) instances = instances.exclude(state='terminated') instance_ids = [instance.instance_id for instance in instances] try: instance_status_list = ec2_connection.get_all_instance_status(instance_ids) except EC2ResponseError: #Perhaps an instance wasn't found? If so we'll have to go through the list the slow way instance_status_list = [] not_found_instances = [] for instance_id in instance_ids: try: instance_status = ec2_connection.get_all_instance_status([instance_id])[0] instance_status_list.append(instance_status) except: log.debug('Instance %s not found, presuming terminated' % instance_id) not_found_instances.append(instance_id) for instance_id in not_found_instances: ec2_instance = EC2Instance.objects.get(instance_id=instance_id) ec2_instance.state='terminated' ec2_instance.instance_status = 'terminated' ec2_instance.system_status = 'terminated' ec2_instance.state_transition_reason = 'Unknown' ec2_instance.save() for status in instance_status_list: #assert isinstance(status, ) log.debug('Refreshing instance %s' % status.id) try: id=status.id ec2_instance = instances.get(instance_id=id) if ec2_instance.state!=status.state_name: ec2_instance.state=status.state_name ec2_instance.save() instance=ec2_instance.get_instance() ec2_instance.state_transition_reason=instance.state_reason ec2_instance.instance_status = status.instance_status.status ec2_instance.system_status = status.system_status.status ec2_instance.save() except Exception as e: log.exception(e) #Add instance termination alarms. Because instance metrics don't appear instantly, #We have to do this now, as opposed to when the pool was first launched #If instance alarms have already been added, this step will be quickly skipped if ec2_pool.smart_terminate: add_instances_alarms(ec2_pool) ec2_pool.last_update_time = now() ec2_pool.save() #Did we just update the status of a copied pool? if copied_pool: copied_pool.last_update_time = ec2_pool.last_update_time copied_pool.save()
def scale_up(ec2_pool, extra_nodes, instance_type, spot, spot_bid_price): log.debug('Scaling condor pool %s with %d extra nodes'%(ec2_pool.id, extra_nodes)) errors = [] vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) try: if not spot: #Fix price launch. This is easy. log.debug('Launching fixed price instances') worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=extra_nodes, max_count=extra_nodes, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() else: #We're launching a spot request pool instead. log.debug('Launching spot requests') worker_requests = ec2_connection.request_spot_instances(str(spot_bid_price), ami.id, type='persistent', count=extra_nodes, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError as e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e) return errors
def scale_down(ec2_pool, nodes_to_terminate, instance_type, pricing, spot_price_order, spot_price_custom): log.debug('Scaling pool %s down' % ec2_pool.name) vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) errors=[] #Filter down instances so that they match the query instances = EC2Instance.objects.filter(ec2_pool=ec2_pool).exclude(instance_role='master') spot_requests = SpotRequest.objects.filter(ec2_pool=ec2_pool) if instance_type != None: instances = instances.filter(instance_type=instance_type) spot_requests = spot_requests.filter(instance_type=instance_type) if pricing == 'fixed': instances = instances.filter(spotrequest=None) spot_requests = spot_requests.none() else: instances = instances.exclude(spotrequest=None) if pricing == 'spot' and spot_price_order == 'custom': spot_requests = spot_requests.filter(price=spot_price_custom) instances = instances.filter(spotrequest__in=spot_requests) elif pricing == 'spot' and spot_price_order == 'lowest': spot_requests = spot_requests.order_by('price') instances = instances.order_by('spotrequest__price') elif pricing == 'spot' and spot_price_order == 'highest': spot_requests = spot_requests.order_by('-price') instances = instances.order_by('-spotrequest__price') #Now we have the list of instances to terminate, terminate them if nodes_to_terminate > instances.count(): instances = instances else: instances = instances[0:nodes_to_terminate] if nodes_to_terminate > spot_requests.count(): spot_requests = spot_requests else: spot_requests = spot_requests[0:nodes_to_terminate] if pricing == 'fixed': instances_to_terminate = [instance.instance_id for instance in instances] else: instances_to_terminate = [] for spot_request in spot_requests: if spot_request.ec2_instance != None: instances_to_terminate.append(spot_request.ec2_instance.instance_id) #Are there any spot requests to terminate? try: spot_request_ids = [request.request_id for request in spot_requests] if spot_request_ids != []: log.debug('Cancelling %d spot requests'%len(spot_request_ids)) ec2_connection.cancel_spot_instance_requests(request_ids=spot_request_ids) for spot_request in spot_requests: spot_request.delete() if instances_to_terminate != []: terminate_instances(instances) except Exception as e: log.exception(e) errors.append(e)
def scale_down(ec2_pool, nodes_to_terminate, instance_type, pricing, spot_price_order, spot_price_custom): log.debug('Scaling pool %s down' % ec2_pool.name) vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) errors=[] #Filter down instances so that they match the query instances = EC2Instance.objects.filter(ec2_pool=ec2_pool).exclude(instance_role='master') spot_requests = SpotRequest.objects.filter(ec2_pool=ec2_pool) if instance_type != None: instances = instances.filter(instance_type=instance_type) spot_requests = spot_requests.filter(instance_type=instance_type) if pricing == 'fixed': instances = instances.filter(spotrequest=None) spot_requests = spot_requests.none() else: instances = instances.exclude(spotrequest=None) if pricing == 'spot' and spot_price_order == 'custom': spot_requests = spot_requests.filter(price=spot_price_custom) instances = instances.filter(spotrequest__in=spot_requests) elif pricing == 'spot' and spot_price_order == 'lowest': spot_requests = spot_requests.order_by('price') instances = instances.order_by('spotrequest__price') elif pricing == 'spot' and spot_price_order == 'highest': spot_requests = spot_requests.order_by('-price') instances = instances.order_by('-spotrequest__price') #Now we have the list of instances to terminate, terminate them if nodes_to_terminate > instances.count(): instances = instances else: instances = instances[0:nodes_to_terminate] if nodes_to_terminate > spot_requests.count(): spot_requests = spot_requests else: spot_requests = spot_requests[0:nodes_to_terminate] if pricing == 'fixed': instances_to_terminate = [instance.instance_id for instance in instances] else: instances_to_terminate = [] for spot_request in spot_requests: if spot_request.ec2_instance != None: instances_to_terminate.append(spot_request.ec2_instance.instance_id) #Are there any spot requests to terminate? try: spot_request_ids = [request.request_id for request in spot_requests] if spot_request_ids != []: log.debug('Cancelling %d spot requests'%len(spot_request_ids)) ec2_connection.cancel_spot_instance_requests(request_ids=spot_request_ids) for spot_request in spot_requests: spot_request.delete() if instances_to_terminate != []: terminate_instances(instances) except Exception, e: log.exception(e) errors.append(e)
def scale_up(ec2_pool, extra_nodes, instance_type, spot, spot_bid_price): log.debug('Scaling condor pool %s with %d extra nodes'%(ec2_pool.id, extra_nodes)) errors = [] vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) try: if not spot: #Fix price launch. This is easy. log.debug('Launching fixed price instances') worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=extra_nodes, max_count=extra_nodes, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() else: #We're launching a spot request pool instead. log.debug('Launching spot requests') worker_requests = ec2_connection.request_spot_instances(str(spot_bid_price), ami.id, type='persistent', count=extra_nodes, key_name=ec2_pool.key_pair.name, instance_type=instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError, e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e)
def terminate_pool(ec2_pool): assert isinstance(ec2_pool, EC2Pool) log.debug('Terminating condor pool %s (user %s)' %(ec2_pool.name, ec2_pool.vpc.access_key.user.username)) #Keep a track of the following errors errors=[] #Create an ec2_connection object vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) assert isinstance(ec2_connection, EC2Connection) #First, refresh the status of the pool try: refresh_pool(ec2_pool) except Exception as e: log.exception(e) errors.append(e) spot_requests = SpotRequest.objects.filter(ec2_pool=ec2_pool) spot_request_ids = [request.request_id for request in spot_requests] try: log.debug('Cancelling %d spot requests'%len(spot_request_ids)) if spot_request_ids != []: ec2_connection.cancel_spot_instance_requests(request_ids=spot_request_ids) for spot_request in spot_requests: spot_request.delete() except Exception as e: log.exception(e) errors.append(e) instances = EC2Instance.objects.filter(ec2_pool=ec2_pool) instances = instances.exclude(state='terminated').exclude(state='shutting-down') #Dissassociate the IP address of the master instance and release i try: release_ip_address_from_instance(ec2_pool.master) except Exception as e: log.exception(e) errors.append(e) try: terminate_instances(instances) except Exception as e: log.exception(e) errors.append(e) key_pair = ec2_pool.key_pair try: ec2_connection.delete_key_pair(key_pair.name) except Exception as e: log.exception(e) errors.append(e) log.debug('Removing keypair file') try: os.remove(key_pair.path) except Exception as e: log.exception(e) pass try: log.debug('Deleting SQS queue for pool') sqs_connection = aws_tools.create_sqs_connection(ec2_pool.vpc.access_key) queue = sqs_connection.get_queue(ec2_pool.get_queue_name()) if queue != None: sqs_connection.delete_queue(queue) except Exception as e: log.exception(e) try: log.debug('Deleting SQS topic') sns_connection = aws_tools.create_sns_connection(ec2_pool.vpc.access_key) sns_connection.delete_topic(ec2_pool.alarm_notify_topic_arn) except Exception as e: log.exception(e) ec2_pool.delete() key_pair.delete() log.debug('Pool terminated') return errors
def assign_ip_address(ec2_instance): """Assign a public IP address to the ec2 instance """ #Check to see if there are any unassigned IP addresses: vpc_connection, ec2_connection = aws_tools.create_connections(ec2_instance.ec2_pool.vpc.access_key) sleep(2) assert isinstance(ec2_instance, EC2Instance) ips = ElasticIP.objects.filter(vpc=ec2_instance.ec2_pool.vpc).filter(instance=None) sleep_time=5 allocate_new = False if ips.count() > 0: #Use the first IP address log.debug('Using existing IP address') elastic_ip=ips[0] try: release_ip_address(ec2_instance.condor_pool.vpc.acess_key, allocation_id=elastic_ip.allocation_id, association_id=elastic_ip.association_id) except Exception as e: log.exception(e) allocate_new = True elif ips.count() == 0 or allocate_new: #We need to allocate a new ip address first max_attempts=5 attempt_count=0 while attempt_count < max_attempts: try: log.debug('Allocating new IP address') address=ec2_connection.allocate_address('vpc') elastic_ip = ElasticIP() elastic_ip.allocation_id = address.allocation_id elastic_ip.public_ip = address.public_ip elastic_ip.vpc = ec2_instance.ec2_pool.vpc assert elastic_ip.allocation_id != None assert elastic_ip.allocation_id != '' break except Exception as e: #Something is wrong here with the elastic ip log.exception(e) attempt_count += 1 try: elastic_ip.delete() except: pass try: ec2_connection.release_address(allocation_id=address.allocation_id) except: pass sleep(sleep_time) #Wait until the instance is in state running, then associate the ip address #Sleep 5 seconds between attempts #Max 6 attemps... max_attempts=20 attempt_count=0 log.debug('Associating IP addresss with EC2 instance') while attempt_count < max_attempts: if ec2_instance.get_state() == 'running': log.debug('Instance running') sleep(sleep_time) #sleep again, just to be on the safe side break else: log.warning('Instance not running. Sleeping...') sleep(sleep_time) attempt_count +=1 #Now try associating an elastic IP max_attempts=5 attempt_count=0 while attempt_count < max_attempts: try: assert ec2_connection.associate_address(instance_id=ec2_instance.instance_id, allocation_id=elastic_ip.allocation_id) sleep(sleep_time) log.debug('IP associated with instance') elastic_ip.instance=ec2_instance #Use an inelegent workaround to get the association id of the address, since the api doesn't tell us this #Reload the address object new_address = ec2_connection.get_all_addresses(allocation_ids=[elastic_ip.allocation_id])[0] elastic_ip.association_id=new_address.association_id elastic_ip.save() return elastic_ip except Exception as e: log.debug('Unable to associate IP address with instance') log.debug(e) attempt_count += 1 if attempt_count == max_attempts: raise e sleep(sleep_time)
def form_valid(self, *args, **kwargs): form=kwargs['form'] #Create the key object and save it #Was a file uploaded? If so, first check if access_key_id and secret_key are blank if self.request.FILES.get('access_key_file'): if form.cleaned_data['access_key_id'] != '' or form.cleaned_data['secret_key'] != '': form._errors[NON_FIELD_ERRORS] = 'Either upload a file or enter the key details manually' return self.form_invalid(self, *args, **kwargs) #Try and save the key file to a temp file temp_file_descriptor, temp_filename = tempfile.mkstemp() form_tools.handle_uploaded_file(self.request.FILES['access_key_file'], temp_filename) access_key_re = re.compile(r'AWSAccessKeyId\=(?P<access_key>.{20})\n*') secret_key_re = re.compile(r'AWSSecretKey\=(?P<secret_key>.{40})\n*') access_key='' secret_key='' temp_file = open(temp_filename, 'r') total_read_lines = 5 #How many lines to read before giving up line_count = 0 for line in temp_file.readlines(): if access_key_re.match(line): access_key = access_key_re.match(line).group('access_key') elif secret_key_re.match(line): secret_key = secret_key_re.match(line).group('secret_key') if line_count < total_read_lines: line_count +=1 #Count the numeber of lines. Should be in the first 2 lines anyway, so this gives a bit of leeway else: break temp_file.close() os.remove(temp_filename) if not (access_key and secret_key): form._errors[NON_FIELD_ERRORS] = 'The uploaded access key could not be read' return self.form_invalid(self, *args, **kwargs) key = AWSAccessKey(access_key_id = access_key, secret_key=secret_key) else: if form.cleaned_data['access_key_id'] == '' or form.cleaned_data['secret_key'] == '': form._errors[NON_FIELD_ERRORS] = 'Either upload a file or enter the key details manually' return self.form_invalid(self, *args, **kwargs) else: key = AWSAccessKey() key.access_key_id = form.cleaned_data['access_key_id'] key.secret_key = form.cleaned_data['secret_key'] key.user = self.request.user key.name = form.cleaned_data['name'] key.save() try: #Authenticate the keypair vpc_connection, ec2_connection = aws_tools.create_connections(key) #Run a test API call ec2_connection.get_all_regions() except Exception, e: #Since we are about to return form_invalid, add the errors directly to the form non field error list #kwargs['errors']=aws_tools.process_errors([e]) key.delete() error_list = [x[1] for x in e.errors] form._errors[NON_FIELD_ERRORS] = ErrorList(error_list) return self.form_invalid(self, *args, **kwargs)
def dispatch(self, request, *args, **kwargs): key_id = self.kwargs['key_id'] key = AWSAccessKey.objects.get(id=key_id) kwargs['key'] = key assert key.user == request.user kwargs['show_loading_screen'] = True kwargs['loading_title'] = 'Removing key and associated VPC' kwargs['loading_description'] = 'Please be patient and do not navigate away from this page.' #Is this an original key or is it a copy if key.copy_of == None: original = True else: original = False if original: #Build a list of any pools and running jobs that will be terminated when this pool is terminated try: pools = EC2Pool.objects.filter(vpc__vpc_id = key.vpc.vpc_id) except: pools = [] shared_keys = AWSAccessKey.objects.filter(copy_of=key) shared_user_ids = [shared_key.user.id for shared_key in shared_keys] kwargs['shared_users'] = User.objects.filter(id__in=shared_user_ids) else: #A copy of a key. If so, we'll not be deleting the real vpc, adn so try: pools = EC2Pool.objects.filter(vpc__id=key.vpc.id) except: pools = [] kwargs['pools'] = pools errors=[] if kwargs['confirmed']: #Go through and terminate each of the running pools for pool in pools: #First, remove any running tasks running_tasks = pool.get_running_tasks() for task in running_tasks: for subtask in task.subtask_set.all(): condor_tools.remove_task(subtask) task.delete() other_tasks = Task.objects.filter(condor_pool=pool).exclude(pk__in=running_tasks) #Then 'prune' the remaining tasks to remove the pool as a foreignkey for task in other_tasks: task.condor_pool = None task.set_custom_field('condor_pool_name', pool.name) task.save() ec2_tools.terminate_pool(pool) if original: #We also need to delete the vpc (and any associated) try: related = AWSAccessKey.objects.filter(copy_of=key) for related_key in related: related_key.delete() if key.vpc != None: vpc_connection, ec2_connection = aws_tools.create_connections(key) errors += (vpc_tools.delete_vpc(key.vpc, vpc_connection, ec2_connection)) if errors != []: log.exception(errors) request.session['errors'] = aws_tools.process_errors(errors) except Exception, e: log.exception(e) #And delete the key key.delete() else: #Just delete the key object and the vpc key.delete() return HttpResponseRedirect(reverse_lazy('my_account_keys'))
def launch_pool(ec2_pool): """ Launch a EC2 pool with the definitions provided by the ec2_pool object """ log.debug('Launcing EC2 pool') assert isinstance(ec2_pool, EC2Pool) errors = [] #Initiate the connection vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) log.debug('Retrieving machine image') ami = get_active_ami(ec2_connection) #Launch the master instance #Add the pool details to the launch string master_launch_string = ec2_config.MASTER_LAUNCH_STRING #And launch log.debug('Launching Master node') master_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=settings.MASTER_NODE_TYPE, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.master_group_id], user_data=master_launch_string, min_count=1,#Only 1 instance needed max_count=1, ) # sleep(2) ec2_instances = [] master_instance = master_reservation.instances[0] master_ec2_instance = EC2Instance() master_ec2_instance.ec2_pool = ec2_pool master_ec2_instance.instance_id = master_instance.id master_ec2_instance.instance_type = settings.MASTER_NODE_TYPE master_ec2_instance.instance_role = 'master' master_ec2_instance.save() ec2_instances.append(master_ec2_instance) ec2_pool.master = master_ec2_instance ec2_pool.last_update_time = now() ec2_pool.save() #wait until the master has a private ip address #sleep in beween log.debug('Waiting for private IP to be assigned to master node') sleep_time=5 max_retrys=20 current_try=0 while master_ec2_instance.get_private_ip() == None and current_try<max_retrys: sleep(sleep_time) current_try+=1 sleep(2) if ec2_pool.size > 0: log.debug('Launching worker nodes') #Are we launcing fixed price or spot instances? try: if not ec2_pool.spot_request: #Fix price launch. This is easy. worker_reservation = ec2_connection.run_instances(ami.id, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), min_count=ec2_pool.size, max_count=ec2_pool.size, ) sleep(3) instances = worker_reservation.instances for instance in instances: ec2_instance = EC2Instance() ec2_instance.ec2_pool = ec2_pool ec2_instance.instance_id = instance.id ec2_instance.instance_type = ec2_pool.initial_instance_type ec2_instance.instance_role = 'worker' ec2_instance.save() ec2_instances.append(ec2_instance) else: #We're launching a spot request pool instead. worker_requests = ec2_connection.request_spot_instances(str(ec2_pool.spot_price), ami.id, type='persistent', count=ec2_pool.size, key_name=ec2_pool.key_pair.name, instance_type=ec2_pool.initial_instance_type, subnet_id=ec2_pool.vpc.subnet_id, security_group_ids=[ec2_pool.vpc.worker_group_id], user_data=ec2_config.WORKER_LAUNCH_STRING % ec2_pool.master.get_private_ip(), ) for request in worker_requests: spot_request = SpotRequest(ec2_pool=ec2_pool, request_id=request.id, price=request.price, status_code=request.status.code, status_message=request.status.message, state=request.state, instance_type=ec2_pool.initial_instance_type, ) spot_request.save() except EC2ResponseError, e: errors.append(('Error launching worker instances', 'An error occured when launching the worker instances, \ however a master instance was launched successfully. Check your AWS usage limit to ensure you \ are not trying to exceed it. You should either try again to scale the pool up, or terminate it.')) errors.append(e)
def get_ec2_connection(self): vpc_connection, ec2_connection = aws_tools.create_connections(self.ec2_pool.vpc.access_key) return ec2_connection
def refresh_pool(ec2_pool): """Refresh the state of each instance in a ec2 pool """ log.debug('Refreshing pool %s status' % ec2_pool.name) if ec2_pool.copy_of: copied_pool = ec2_pool ec2_pool = EC2Pool.objects.get(id=ec2_pool.copy_of.id) else: copied_pool = None #If this pool is not an original, then don't refresh. log.debug('refreshing status of pool %s' % ec2_pool.name) difference = utcnow() - ec2_pool.last_update_time.replace(tzinfo=utc) log.debug('Time difference %s' % str(difference)) if difference < datetime.timedelta(seconds=3): log.debug('Pool recently refreshed. Not updating') return vpc_connection, ec2_connection = aws_tools.create_connections(ec2_pool.vpc.access_key) #Get a list of any spot requests associated with the pool spot_requests = SpotRequest.objects.filter(ec2_pool=ec2_pool) | SpotRequest.objects.filter(ec2_pool__copy_of=ec2_pool) spot_request_ids = [request.request_id for request in spot_requests] try: if spot_request_ids != []: spot_request_list = ec2_connection.get_all_spot_instance_requests(request_ids=spot_request_ids) else: spot_request_list = [] except EC2ResponseError: #Perhaps a particular spot request wasn't found? Go through the list the slow way spot_request_list = [] not_found_requests = [] for spot_request_id in spot_request_ids: try: spot_instance_request = ec2_connection.get_all_spot_instance_requests(request_ids=[spot_request_id]) spot_request_list.append(spot_instance_request) except: log.debug('Spot request %s not found, not updating status' %spot_request_id) not_found_requests.append(spot_request_id) #Don't do anything with spot requests that weren't found for now for request in spot_request_list: try: spot_request = SpotRequest.objects.get(request_id=request.id) spot_request.status_code = request.status.code spot_request.status_message = request.status.message spot_request.state = request.state if request.instance_id != None: try: ec2_instance = EC2Instance.objects.get(instance_id=request.instance_id) except: ec2_instance = EC2Instance(ec2_pool=ec2_pool, instance_type=spot_request.instance_type, instance_role='worker', instance_id=request.instance_id, state='unknown', instance_status='unknown', system_status='unknown', ) ec2_instance.save() spot_request.ec2_instance = ec2_instance else: spot_request.ec2_instance = None spot_request.save() except Exception, e: log.exception(e)