def authorize_security_group(self, parameters, group_id, from_port, to_port, ip_protocol, cidr_ip): """Opens up traffic on the given port range for traffic of the named type. Args: parameters: A dict that contains the credentials necessary to authenticate with AWS. group_id: A str that contains the id of the group whose ports should be opened. from_port: An int that names the first port that access should be allowed on. to_port: An int that names the last port that access should be allowed on. ip_protocol: A str that indicates if TCP, UDP, or ICMP traffic should be allowed. cidr_ip: A str that names the IP range that traffic should be allowed from. Raises: AgentRuntimeException: If the ports could not be opened on the security group. """ AppScaleLogger.log('Authorizing security group {0} for {1} traffic from ' \ 'port {2} to port {3}'.format(group_id, ip_protocol, from_port, to_port)) conn = self.open_connection(parameters) retries_left = self.SECURITY_GROUP_RETRY_COUNT while retries_left: try: conn.authorize_security_group(group_id=group_id, from_port=from_port, to_port=to_port, cidr_ip=cidr_ip, ip_protocol=ip_protocol) except EC2ResponseError: pass try: group_info = self.get_security_group_by_name( conn, parameters[self.PARAM_GROUP], parameters.get(self.PARAM_VPC_ID)) for rule in group_info.rules: if int(rule.from_port) == from_port and int(rule.to_port) == to_port \ and rule.ip_protocol == ip_protocol: return except SecurityGroupNotFoundException as e: raise AgentRuntimeException(e.message) time.sleep(self.SLEEP_TIME) retries_left -= 1 raise AgentRuntimeException("Couldn't authorize {0} traffic from port " \ "{1} to port {2} on CIDR IP {3}".format(ip_protocol, from_port, to_port, cidr_ip))
def cleanup_state(self, parameters): """ Removes the keyname and security group created during this AppScale deployment. Args: parameters: A dict that contains the keyname and security group to delete. """ AppScaleLogger.log("Deleting keyname {0}".format( parameters[self.PARAM_KEYNAME])) conn = self.open_connection(parameters) conn.delete_key_pair(parameters[self.PARAM_KEYNAME]) AppScaleLogger.log("Deleting security group {0}".format( parameters[self.PARAM_GROUP])) retries_left = self.SECURITY_GROUP_RETRY_COUNT while True: try: sg = self.get_security_group_by_name( conn, parameters[self.PARAM_GROUP], parameters.get(self.PARAM_VPC_ID)) conn.delete_security_group(group_id=sg.id) return except EC2ResponseError as e: time.sleep(self.SLEEP_TIME) retries_left -= 1 if retries_left == 0: raise AgentRuntimeException( 'Error deleting security group! Reason: ' '{}'.format(e.message)) except SecurityGroupNotFoundException: AppScaleLogger.log( 'Could not find security group {}, skipping ' 'delete.'.format(parameters[self.PARAM_GROUP])) return
def create_security_group(self, parameters, group): """Creates a new security group in AWS with the given name. Args: parameters: A dict that contains the credentials necessary to authenticate with AWS. group: A str that names the group that should be created. Returns: The 'boto.ec2.securitygroup.SecurityGroup' that was just created. Raises: AgentRuntimeException: If the security group could not be created. """ AppScaleLogger.log('Creating security group: {0}'.format(group)) conn = self.open_connection(parameters) specified_vpc = parameters.get(self.PARAM_VPC_ID) retries_left = self.SECURITY_GROUP_RETRY_COUNT while retries_left: try: conn.create_security_group(group, 'AppScale security group', specified_vpc) except EC2ResponseError: pass try: return self.get_security_group_by_name(conn, group, specified_vpc) except SecurityGroupNotFoundException: pass time.sleep(self.SLEEP_TIME) retries_left -= 1 raise AgentRuntimeException("Couldn't create security group with " \ "name {0}".format(group))
def create_security_group(self, parameters, group): """Creates a new security group in AWS with the given name. Args: parameters: A dict that contains the credentials necessary to authenticate with AWS. group: A str that names the group that should be created. Raises: AgentRuntimeException: If the security group could not be created. """ AppScaleLogger.log('Creating security group: {0}'.format(group)) conn = self.open_connection(parameters) retries_left = self.SECURITY_GROUP_RETRY_COUNT while retries_left: try: conn.create_security_group(group, 'AppScale security group') except EC2ResponseError: pass try: conn.get_all_security_groups(group) return except EC2ResponseError: pass time.sleep(self.SLEEP_TIME) retries_left -= 1 raise AgentRuntimeException("Couldn't create security group with " \ "name {0}".format(group))
def configure_instance_security(self, parameters): """ Creates a GCE network and firewall with the specified name, and opens the ports on that firewall as needed for AppScale. We expect both the network and the firewall to not exist before this point, to avoid accidentally placing AppScale instances from different deployments in the same network and firewall (thus enabling them to see each other's web traffic). Args: parameters: A dict with keys for each parameter needed to connect to Google Compute Engine, and an additional key indicating the name of the network and firewall that we should create in GCE. Returns: True, if the named network and firewall was created successfully. Raises: AgentRuntimeException: If the named network or firewall already exist in GCE. """ AppScaleLogger.log("Verifying that SSH key exists locally") keyname = parameters[self.PARAM_KEYNAME] private_key = LocalState.LOCAL_APPSCALE_PATH + keyname public_key = private_key + ".pub" if os.path.exists(private_key) or os.path.exists(public_key): raise AgentRuntimeException( "SSH key already found locally - please " + "use a different keyname") LocalState.generate_rsa_key(keyname, parameters[self.PARAM_VERBOSE]) ssh_key_exists, all_ssh_keys = self.does_ssh_key_exist(parameters) if not ssh_key_exists: self.create_ssh_key(parameters, all_ssh_keys) if self.does_network_exist(parameters): raise AgentRuntimeException("Network already exists - please use a " + \ "different group name.") if self.does_firewall_exist(parameters): raise AgentRuntimeException("Firewall already exists - please use a " + \ "different group name.") network_url = self.create_network(parameters) self.create_firewall(parameters, network_url)
def handle_failure(self, msg): """ Log the specified error message and raise an AgentRuntimeException Args: msg: An error message to be logged and included in the raised exception. Raises: AgentRuntimeException Contains the input error message. """ AppScaleLogger.log(msg) raise AgentRuntimeException(msg)
def handle_failure(self, msg): """ Log the specified error message and raise an AgentRuntimeException Args: msg An error message to be logged and included in the raised exception Raises: AgentRuntimeException Contains the input error message """ logging.debug(msg) raise AgentRuntimeException(msg)
def ensure_operation_succeeds(self, gce_service, auth_http, response, project_id): """ Waits for the given GCE operation to finish successfully. Callers should use this function whenever they perform a destructive operation in Google Compute Engine. For example, it is not necessary to use this function when seeing if a resource exists (e.g., a network, firewall, or instance), but it is useful to use this method when creating or deleting a resource. One example is when we create a network. As we are only allowed to have five networks, it is useful to make sure that the network was successfully created before trying to create a firewall attached to that network. Args: gce_service: An apiclient.discovery.Resource that is a connection valid for requests to Google Compute Engine for the given user. auth_http: A HTTP connection that has been signed with the given user's Credentials, and is authorized with the GCE scope. response: A dict that contains the operation that we want to ensure has succeeded, referenced by a unique ID (the 'name' field). project_id: A str that identifies the GCE project that requests should be billed to. """ status = response['status'] while status != 'DONE' and response: operation_id = response['name'] # Identify if this is a per-zone resource if 'zone' in response: zone_name = response['zone'].split('/')[-1] request = gce_service.zoneOperations().get( project=project_id, operation=operation_id, zone=zone_name) else: request = gce_service.globalOperations().get( project=project_id, operation=operation_id) response = request.execute(http=auth_http) if response: status = response['status'] if 'error' in response: message = "\n".join([errors['message'] for errors in response['error']['errors']]) raise AgentRuntimeException(str(message))
def configure_instance_security(self, parameters): """ Configure the resource group and storage account needed to create the network interface for the VMs to be spawned. This method is called before starting virtual machines. Args: parameters: A dict containing values necessary to authenticate with the underlying cloud. Returns: True, if the group and account were created successfully. False, otherwise. Raises: AgentRuntimeException: If security features could not be successfully configured in the underlying cloud. """ credentials = self.open_connection(parameters) resource_group = parameters[self.PARAM_RESOURCE_GROUP] storage_account = parameters[self.PARAM_STORAGE_ACCOUNT] zone = parameters[self.PARAM_ZONE] subscription_id = parameters[self.PARAM_SUBSCRIBER_ID] AppScaleLogger.log("Verifying that SSH key exists locally.") keyname = parameters[self.PARAM_KEYNAME] private_key = LocalState.LOCAL_APPSCALE_PATH + keyname public_key = private_key + ".pub" if os.path.exists(private_key) or os.path.exists(public_key): raise AgentRuntimeException( "SSH key already found locally - please " "use a different keyname.") LocalState.generate_rsa_key(keyname, parameters[self.PARAM_VERBOSE]) AppScaleLogger.log("Configuring network for machine/s under " "resource group '{0}' with storage account '{1}' " "in zone '{2}'".format(resource_group, storage_account, zone)) # Create a resource group and an associated storage account to access resources. self.create_resource_group(parameters, credentials) resource_client = ResourceManagementClient(credentials, subscription_id) resource_client.providers.register(self.MICROSOFT_COMPUTE_RESOURCE) resource_client.providers.register(self.MICROSOFT_NETWORK_RESOURCE)
def run_instances(self, count, parameters, security_configured, public_ip_needed): """ Spawns the specified number of EC2 instances using the parameters provided. This method is blocking in that it waits until the requested VMs are properly booted up. However if the requested VMs cannot be procured within 1800 seconds, this method will treat it as an error and return. (Also see documentation for the BaseAgent class) Args: count: Number of VMs to spawned. parameters: A dictionary of parameters. This must contain 'keyname', 'group', 'image_id' and 'instance_type' parameters. security_configured: Uses this boolean value as an heuristic to detect brand new AppScale deployments. public_ip_needed: A boolean, specifies whether to launch with a public ip or not. Returns: A tuple of the form (instances, public_ips, private_ips) """ image_id = parameters[self.PARAM_IMAGE_ID] instance_type = parameters[self.PARAM_INSTANCE_TYPE] keyname = parameters[self.PARAM_KEYNAME] group = parameters[self.PARAM_GROUP] zone = parameters[self.PARAM_ZONE] # In case of autoscaling, the server side passes these parameters as a # string, so this check makes sure that spot instances are only created # when the flag is True. spot = parameters[self.PARAM_SPOT] in ['True', 'true', True] AppScaleLogger.log("Starting {0} machines with machine id {1}, with " \ "instance type {2}, keyname {3}, in security group {4}, in availability" \ " zone {5}".format(count, image_id, instance_type, keyname, group, zone)) if spot: AppScaleLogger.log("Using spot instances") else: AppScaleLogger.log("Using on-demand instances") start_time = datetime.datetime.now() active_public_ips = [] active_private_ips = [] active_instances = [] # Make sure we do not have terminated instances using the same keyname. instances = self.__describe_instances(parameters) term_instance_info = self.__get_instance_info(instances, 'terminated', keyname) if len(term_instance_info[2]): self.handle_failure('SSH keyname {0} is already registered to a '\ 'terminated instance. Please change the "keyname" '\ 'you specified in your AppScalefile to a different '\ 'value. If the keyname was autogenerated, erase it '\ 'to have a new one generated for you.'.format(keyname)) try: attempts = 1 while True: instance_info = self.describe_instances(parameters) active_public_ips = instance_info[0] active_private_ips = instance_info[1] active_instances = instance_info[2] # If security has been configured on this agent just now, # that's an indication that this is a fresh cloud deployment. # As such it's not expected to have any running VMs. if len(active_instances) > 0 or security_configured: break elif attempts == self.DESCRIBE_INSTANCES_RETRY_COUNT: self.handle_failure('Failed to invoke describe_instances') attempts += 1 # Get subnet from parameters. subnet = parameters.get(self.PARAM_SUBNET_ID) network_interfaces = None groups = None conn = self.open_connection(parameters) # A subnet indicates we're using VPC Networking. if subnet: # Get security group by name. try: sg = self.get_security_group_by_name( conn, group, parameters[self.PARAM_VPC_ID]) except SecurityGroupNotFoundException as e: raise AgentRuntimeException(e.message) # Create network interface specification. network_interface = NetworkInterfaceSpecification( associate_public_ip_address=public_ip_needed, groups=[sg.id], subnet_id=subnet) network_interfaces = NetworkInterfaceCollection( network_interface) else: groups = [group] if spot: price = parameters[self.PARAM_SPOT_PRICE] or \ self.get_optimal_spot_price(conn, instance_type, zone) conn.request_spot_instances( str(price), image_id, key_name=keyname, instance_type=instance_type, count=count, placement=zone, security_groups=groups, network_interfaces=network_interfaces) else: conn.run_instances(image_id, count, count, key_name=keyname, instance_type=instance_type, placement=zone, security_groups=groups, network_interfaces=network_interfaces) instance_ids = [] public_ips = [] private_ips = [] end_time = datetime.datetime.now() + datetime.timedelta( 0, self.MAX_VM_CREATION_TIME) while datetime.datetime.now() < end_time: AppScaleLogger.log("Waiting for your instances to start...") public_ips, private_ips, instance_ids = self.describe_instances( parameters) # If we need a public ip, make sure we actually get one. if public_ip_needed and not self.diff(public_ips, private_ips): time.sleep(self.SLEEP_TIME) continue public_ips = self.diff(public_ips, active_public_ips) private_ips = self.diff(private_ips, active_private_ips) instance_ids = self.diff(instance_ids, active_instances) if count == len(public_ips): break time.sleep(self.SLEEP_TIME) if not public_ips: self.handle_failure('No public IPs were able to be procured ' 'within the time limit') if len(public_ips) != count: for index in range(0, len(public_ips)): if public_ips[index] == '0.0.0.0': instance_to_term = instance_ids[index] AppScaleLogger.log('Instance {0} failed to get a public IP address'\ 'and is being terminated'.format(instance_to_term)) conn.terminate_instances([instance_to_term]) end_time = datetime.datetime.now() total_time = end_time - start_time if spot: AppScaleLogger.log("Started {0} spot instances in {1} seconds" \ .format(count, total_time.seconds)) else: AppScaleLogger.log("Started {0} on-demand instances in {1} seconds" \ .format(count, total_time.seconds)) return instance_ids, public_ips, private_ips except EC2ResponseError as exception: self.handle_failure('EC2 response error while starting VMs: ' + exception.error_message)