def wait_until(state): asg = self.get(network, service_name) def instance_list(service, state): return [ instance for subnetwork in service.subnetworks for instance in subnetwork.instances if instance.state == state ] retries = 0 while len(instance_list(asg, state)) < instance_count: logger.info( "Waiting for instance creation for service %s. %s of %s running", service_name, len(instance_list(asg, state)), instance_count) logger.debug("Waiting for instance creation in asg: %s", asg) asg = self.get(network, service_name) retries = retries + 1 if retries > RETRY_COUNT: raise OperationTimedOut( "Timed out waiting for ASG to be created") time.sleep(RETRY_DELAY) logger.info("Success! %s of %s instances running.", len(instance_list(asg, state)), instance_count)
def apply(self, resource_definition): subnets = self.get(resource_definition) if len(subnets) != 1: raise DisallowedOperationException( "Cannot apply, matched more than one subnet!: %s" % subnets) logger.info("Applying subnet: %s", subnets[0]) return subnets[0]
def get(self, resource_definition): ec2 = self.driver.client("ec2") firewall = resource_definition logger.info("Getting firewall: %s", firewall) network = self.network.get(firewall.network.name) search_filters = [] if not firewall.network or not firewall.network.name: raise DisallowedOperationException( "Network selector required when getting firewall") network = self.network.get(firewall.network.name) search_filters.append({ 'Name': 'vpc-id', 'Values': [network.network_id] }) search_filters.append({ 'Name': 'group-name', 'Values': [firewall.name] }) security_groups = ec2.describe_security_groups(Filters=search_filters) if len(security_groups["SecurityGroups"]) > 1: raise BadEnvironmentStateException( "Found multiple security groups with name %s, in vpc %s: %s" % (firewall.name, network.network_id, security_groups)) return [ Firewall(version=firewall.version, name=sg["GroupName"], network=firewall.network, id=sg["GroupId"]) for sg in security_groups["SecurityGroups"] ]
def apply(self, resource_definition): firewalls = self.get(resource_definition) if len(firewalls) != 1: raise DisallowedOperationException( "Cannot apply, matched more than one firewall!: %s" % firewalls) logger.info("Applying firewall: %s", firewalls[0]) return firewalls[0]
def attempt_delete_security_group(security_group_id): try: ec2.delete_security_group(GroupId=security_group_id) return True except ClientError as client_error: logger.info("Recieved exception destroying security group: %s", client_error) if (client_error.response["Error"]["Code"] == "DependencyViolation"): return False raise client_error
def get_availability_zones(self): """ Returns the list of all availiablity zones in the current region. """ ec2 = self.driver.client("ec2") if self.mock: # NOTE: Moto does not have this function supported, so this has to be here to get the # mock tests passing. logger.info("Returning hard coded azs for mock AWS provider") return ["us-east-1a", "us-east-1b", "us-east-1c"] availability_zones = ec2.describe_availability_zones() return [ az["ZoneName"] for az in availability_zones["AvailabilityZones"] ]
def create(self, resource_definition): firewall = resource_definition logger.info("Creating firewall: %s", firewall) ec2 = self.driver.client("ec2") network = self.network.get(firewall.network.name) group = ec2.create_security_group( VpcId=network.network_id, GroupName=firewall.name, Description="Firewall:%s created by Cloudless" % firewall.name) new_firewall = Firewall(version=firewall.version, name=firewall.name, id=group["GroupId"], network=firewall.network) return new_firewall
def destroy_launch_configuration(self, asg_name): autoscaling = self.driver.client("autoscaling") try: autoscaling.delete_launch_configuration( LaunchConfigurationName=str(asg_name)) except ClientError as client_error: logger.info("Recieved exception destroying launch configuration: %s", client_error) msg = "Launch configuration name not found - Launch configuration %s not found" % ( str(asg_name)) if (client_error.response["Error"]["Code"] == "ValidationError" and client_error.response["Error"]["Message"] == msg): logger.debug("Launch configuration: %s already deleted", str(asg_name)) else: raise client_error
def create(self, resource_definition): subnet = resource_definition logger.info("Creating subnet: %s", subnet) network = self._get_network(subnet.network) old_subnetworks = self.subnetwork.create_from_args( network.name, network.id, network.cidr_block, subnet.name, 3, subnet.size) subnets = [{ "id": old_subnetwork.subnetwork_id, "availability_zone": old_subnetwork.availability_zone, "cidr_block": old_subnetwork.cidr_block } for old_subnetwork in old_subnetworks] return SubnetModel( version=subnet.version, name=subnet.name, subnets=subnets, )
def create(self, subnetwork_name, subnet_cidr, availability_zone, dc_id, retry_count, retry_delay): """ Provision a single subnet with a route table and the proper tags. """ ec2 = self.driver.client("ec2") created_subnet = ec2.create_subnet(CidrBlock=subnet_cidr, AvailabilityZone=availability_zone, VpcId=dc_id) subnet_id = created_subnet["Subnet"]["SubnetId"] route_table = ec2.create_route_table(VpcId=dc_id) route_table_id = route_table["RouteTable"]["RouteTableId"] ec2.associate_route_table(RouteTableId=route_table_id, SubnetId=subnet_id) creation_retries = 0 while creation_retries < retry_count: try: ec2.create_tags(Resources=[subnet_id], Tags=[{ "Key": "Name", "Value": subnetwork_name }]) subnets = ec2.describe_subnets( Filters=[{ 'Name': "vpc-id", 'Values': [dc_id] }, { 'Name': "tag:Name", 'Values': [subnetwork_name] }]) subnet_ids = [ subnet["SubnetId"] for subnet in subnets["Subnets"] ] if subnet_id not in subnet_ids: time.sleep(float(retry_delay)) else: break except ec2.exceptions.ClientError as client_error: logger.info("Caught exception creating tags: %s", client_error) time.sleep(float(retry_delay)) creation_retries = creation_retries + 1 if creation_retries >= retry_count: raise OperationTimedOut("Cannot find created Subnet: %s" % subnet_id) return created_subnet["Subnet"]
def delete_referencing_rules(self, vpc_id, security_group_id): """ Removes all rules referencing the given security group in the given VPC, so it can be safely deleted. """ ec2 = self.driver.client("ec2") logger.info("Deleting rules referencing %s in %s", security_group_id, vpc_id) security_groups = ec2.describe_security_groups( Filters=[{ 'Name': 'vpc-id', 'Values': [vpc_id] }]) for security_group in security_groups["SecurityGroups"]: logger.info("Checking security group: %s", security_group_id) for rule in security_group["IpPermissions"]: for uigp in rule["UserIdGroupPairs"]: if "GroupId" not in uigp: continue if uigp["GroupId"] != security_group_id: continue rule_to_remove = {} rule_to_remove["FromPort"] = rule["FromPort"] rule_to_remove["ToPort"] = rule["ToPort"] rule_to_remove["IpProtocol"] = rule["IpProtocol"] rule_to_remove["UserIdGroupPairs"] = [{ "GroupId": uigp["GroupId"] }] logger.info("Revoking rule: %s in security group %s", rule_to_remove, security_group) ec2.revoke_security_group_ingress( GroupId=security_group["GroupId"], IpPermissions=[rule_to_remove])
def add(self, source, destination, port): """ Adds a route from "source" to "destination". """ logger.debug("Adding path from %s to %s", source, destination) if self.has_access(source, destination, port): logger.info("Service %s already has access to %s on port: %s", source, destination, port) return True # Currently controlling egress in AWS is not supported. All egress is always allowed. if not isinstance(destination, Service): raise DisallowedOperationException( "Destination must be a cloudless.types.networking.Service object" ) dest_sg_id, _, _, src_ip_permissions = self._extract_service_info( source, destination, port) ec2 = self.driver.client("ec2") ec2.authorize_security_group_ingress(GroupId=dest_sg_id, IpPermissions=src_ip_permissions) return Path(destination.network, source, destination, "tcp", port)
def destroy(self, image): """ Destroy a image given the provided image object. """ ec2 = self.driver.client("ec2") raw_images = ec2.describe_images(Owners=["self"]) logger.debug("Images: %s", raw_images) snapshot_ids = [] for raw_image in raw_images["Images"]: if raw_image["ImageId"] == image.image_id: for bdm in raw_image["BlockDeviceMappings"]: snapshot_id = bdm.get("Ebs", {}).get("SnapshotId") if snapshot_id: snapshot_ids.append(snapshot_id) logger.info("Deregistering image: %s", image.image_id) ec2.deregister_image(ImageId=image.image_id) for snapshot_id in snapshot_ids: logger.info("Deleting snapshot: %s", snapshot_id) ec2.delete_snapshot(SnapshotId=snapshot_id) def retry_if_timeout(exception): """ Checks if this exception is just because we haven't converged yet. """ return isinstance(exception, OperationTimedOut) @retry(wait_fixed=RETRY_DELAY, stop_max_attempt_number=RETRY_COUNT, retry_on_exception=retry_if_timeout) def wait_for_destroyed(image_name): logger.info("Waiting for image: %s to be destroyed", image_name) if self.get(image.name): raise OperationTimedOut( "Timed out waiting for image %s to be gone." % image_name) logger.info("Success, did not find image: %s", image_name) wait_for_destroyed(image.name) return True
def delete_by_name(self, vpc_id, security_group_name, retries, retry_delay): ec2 = self.driver.client("ec2") logger.info("Deleting security group %s in %s", security_group_name, vpc_id) security_groups = ec2.describe_security_groups( Filters=[{ 'Name': 'vpc-id', 'Values': [vpc_id] }, { 'Name': 'group-name', 'Values': [security_group_name] }]) if not security_groups["SecurityGroups"]: return True if len(security_groups["SecurityGroups"]) > 1: raise BadEnvironmentStateException( "Found multiple security groups with name %s, in vpc %s: %s" % (security_group_name, vpc_id, security_groups)) security_group_id = security_groups["SecurityGroups"][0]["GroupId"] self.delete_referencing_rules(vpc_id, security_group_id) return self.delete_with_retries(security_group_id, retries, retry_delay)
def destroy_with_args(self, network_name, network_id, subnetwork_name): """ Destroy all networks represented by this object. Also destroys the underlying VPC if it's empty. Steps: 1. Discover the current VPC. 2. Destroy route tables. 2.a. Disassociate and delete route table. 2.b. Delete non referenced internet gateways. 3. Delete all subnets. 4. Wait until subnets are deleted. """ ec2 = self.driver.client("ec2") subnet_ids = [ subnet_info.subnetwork_id for subnet_info in self.get_with_args( network_name, subnetwork_name) ] # 1. Discover the current VPC. dc_id = network_id # 2. Destroy route tables. def delete_route_table(route_table): # 2.a. Disassociate and delete route table. associations = route_table["Associations"] for association in associations: ec2.disassociate_route_table( AssociationId=association["RouteTableAssociationId"]) ec2.delete_route_table(RouteTableId=route_table["RouteTableId"]) # 2.b. Delete non referenced internet gateways. routes = route_table["Routes"] for route in routes: if "GatewayId" in route and route["GatewayId"] != "local": igw_id = route["GatewayId"] if not self.internet_gateways.route_count(dc_id, igw_id): ec2.detach_internet_gateway(InternetGatewayId=igw_id, VpcId=dc_id) ec2.delete_internet_gateway(InternetGatewayId=igw_id) for subnet_id in subnet_ids: subnet_filter = { 'Name': 'association.subnet-id', 'Values': [subnet_id] } route_tables = ec2.describe_route_tables(Filters=[subnet_filter]) if len(route_tables["RouteTables"]) > 1: raise BadEnvironmentStateException( "Expected to find at most one route table associated " "with: %s, output: %s" % (subnet_id, route_tables)) if len(route_tables["RouteTables"]) == 1: delete_route_table(route_tables["RouteTables"][0]) # 3. Delete all subnets. for subnet_id in subnet_ids: self.subnets.delete(subnet_id, RETRY_COUNT, RETRY_DELAY) # 4. Wait until subnets are deleted. remaining_subnets = ec2.describe_subnets(Filters=[{ 'Name': 'vpc-id', 'Values': [dc_id] }]) remaining_subnet_ids = [ subnet["SubnetId"] for subnet in remaining_subnets["Subnets"] ] retries = 0 while (any(i in subnet_ids for i in remaining_subnet_ids) and retries < 720): logger.info("Found remaining subnets: %s", remaining_subnet_ids) remaining_subnets = ec2.describe_subnets(Filters=[{ 'Name': 'vpc-id', 'Values': [dc_id] }]) remaining_subnet_ids = [ subnet["SubnetId"] for subnet in remaining_subnets["Subnets"] ] retries = retries + 1 time.sleep(1)
def destroy(self, service): """ Destroy a group of instances described by "service". """ logger.debug("Attempting to destroy: %s", service) asg_name = AsgName(network=service.network.name, subnetwork=service.name) asg = self._discover_asg(service.network.name, service.name) if asg: self.asg.destroy_auto_scaling_group_instances(asg_name) # Wait for instances to be gone. Need to do this before we can delete # the actual ASG otherwise it will error. def instance_list(service, state): return [ instance for subnetwork in service.subnetworks for instance in subnetwork.instances if instance.state != state ] service = self.get(service.network, service.name) logger.debug("Found service: %s", service) retries = 0 while service and instance_list(service, "terminated"): logger.info( "Waiting for instance termination in service %s. %s still terminating", service.name, len(instance_list(service, "terminated"))) logger.debug("Waiting for instance termination in asg: %s", service) service = self.get(service.network, service.name) retries = retries + 1 if retries > RETRY_COUNT: raise OperationTimedOut("Timed out waiting for ASG scale down") time.sleep(RETRY_DELAY) logger.info("Success! All instances terminated.") asg = self._discover_asg(service.network.name, service.name) if asg: self.asg.destroy_auto_scaling_group(asg_name) # Wait for ASG to be gone. Need to wait for this because it's a dependency of the launch # configuration. asg = self._discover_asg(service.network.name, service.name) retries = 0 while asg: logger.debug("Waiting for asg deletion: %s", asg) asg = self._discover_asg(service.network.name, service.name) retries = retries + 1 if retries > RETRY_COUNT: raise OperationTimedOut("Timed out waiting for ASG deletion") time.sleep(RETRY_DELAY) vpc_id = service.network.network_id lc_security_group = self.asg.get_launch_configuration_security_group( service.network.name, service.name) self.asg.destroy_launch_configuration(asg_name) if lc_security_group: logger.debug("Deleting referencing rules of sg: %s", lc_security_group) self.security_groups.delete_referencing_rules( vpc_id, lc_security_group) logger.debug("Attempting to delete sg: %s", lc_security_group) self.security_groups.delete_with_retries(lc_security_group, RETRY_COUNT, RETRY_DELAY) else: logger.debug("Attempting to delete sg by name: %s", str(asg_name)) self.security_groups.delete_by_name(vpc_id, str(asg_name), RETRY_COUNT, RETRY_DELAY) self.subnetwork.destroy(service.network, service.name)
def delete(self, resource_definition): subnet = resource_definition logger.info("Deleting subnet: %s", subnet) network = self._get_network(subnet.network) return self.subnetwork.destroy_with_args(network.name, network.id, subnet.name)
def delete(self, resource_definition): firewall = resource_definition logger.info("Deleting firewall: %s", firewall) network = self.network.get(firewall.network.name) return self._delete_by_name(network.network_id, firewall.name, 10, 5)
def destroy(self, network): """ Destroy a network given the provided network object. """ ec2 = self.driver.client("ec2") # Check to see if we have any subnets, otherwise bail out subnets = ec2.describe_subnets(Filters=[{ 'Name': 'vpc-id', 'Values': [network.network_id] }]) if subnets["Subnets"]: message = "Found subnets in network, cannot delete: %s" % subnets logger.error(message) raise DisallowedOperationException(message) # Delete internet gateway if it's no longer referenced igw = ec2.describe_internet_gateways( Filters=[{ 'Name': 'attachment.vpc-id', 'Values': [network.network_id] }]) igw_id = None if len(igw["InternetGateways"]) == 1: igw_id = igw["InternetGateways"][0]["InternetGatewayId"] elif len(igw["InternetGateways"]) > 1: raise Exception( "Invalid response from describe_internet_gateways: %s" % igw) if igw_id and not self.internet_gateways.route_count( network.network_id, igw_id): ec2.detach_internet_gateway(InternetGatewayId=igw_id, VpcId=network.network_id) ec2.delete_internet_gateway(InternetGatewayId=igw_id) # Since we check above that there are no subnets, and therefore nothing # deployed in this VPC, for now assume it is safe to delete. security_groups = ec2.describe_security_groups( Filters=[{ 'Name': 'vpc-id', 'Values': [network.network_id] }]) for security_group in security_groups["SecurityGroups"]: if security_group["GroupName"] == "default": continue logger.info("Deleting security group: %s", security_group["GroupName"]) ec2.delete_security_group(GroupId=security_group["GroupId"]) # Delete internet gateway, also safe because our subnets are gone. igws = ec2.describe_internet_gateways( Filters=[{ 'Name': 'attachment.vpc-id', 'Values': [network.network_id] }]) logger.info("Deleting internet gateways: %s", igws) for igw in igws["InternetGateways"]: logger.info("Deleting internet gateway: %s", igw) igw_id = igw["InternetGatewayId"] ec2.detach_internet_gateway(InternetGatewayId=igw_id, VpcId=network.network_id) ec2.delete_internet_gateway(InternetGatewayId=igw_id) # Now, actually delete the VPC try: deletion_result = ec2.delete_vpc(VpcId=network.network_id) except ec2.exceptions.ClientError as client_error: if client_error.response['Error']['Code'] == 'DependencyViolation': logger.info("Dependency violation deleting VPC: %s", client_error) raise client_error return deletion_result
def create(self, name, service): """ Create new image named "name" from "service". """ ec2 = self.driver.client("ec2") instances = [ instance for subnetwork in service.subnetworks for instance in subnetwork.instances ] if len(instances) != 1: raise DisallowedOperationException( "Service must have exactly one instance, found %s" % instances) def get_instance(instance_id): reservations = ec2.describe_instances(InstanceIds=[instance_id]) raw_instances = [ instance for reservation in reservations["Reservations"] for instance in reservation["Instances"] ] if len(raw_instances) != 1: raise DisallowedOperationException( "Service must have exactly one instance, found %s" % raw_instances) return raw_instances[0] # First, stop instances to prevent the state from changing while we're snapshotting. logger.info("Stopping instance: %s", instances[0].instance_id) autoscaling = self.driver.client("autoscaling") # Must detach from autoscaling group otherwise our instance will get terminated. See # https://stackoverflow.com/a/28883869. # # Also see https://github.com/getcloudless/cloudless/issues/20. def detach_from_asg(service, instance_id): asg_name = str( AsgName(network=service.network.name, subnetwork=service.name)) autoscaling.update_auto_scaling_group( AutoScalingGroupName=asg_name, MinSize=0) self.asg.wait_for_in_service(asg_name, instance_id) autoscaling.detach_instances(InstanceIds=[instance_id], AutoScalingGroupName=asg_name, ShouldDecrementDesiredCapacity=True) detach_from_asg(service, instances[0].instance_id) def retry_if_timeout(exception): """ Checks if this exception is just because we haven't converged yet. """ return isinstance(exception, OperationTimedOut) ec2.stop_instances(InstanceIds=[instances[0].instance_id]) @retry(wait_fixed=RETRY_DELAY, stop_max_attempt_number=RETRY_COUNT, retry_on_exception=retry_if_timeout) def wait_for_stopped(instance_id): raw_instance = get_instance(instance_id) logger.debug("Current state: %s", raw_instance) if raw_instance["State"]["Name"] != "stopped": raise OperationTimedOut( "Timed out waiting for instance: %s to stop" % instance_id) wait_for_stopped(instances[0].instance_id) # Get information about the instance's block device def get_blockdev_info(): raw_instance = get_instance(instances[0].instance_id) logger.debug("Getting blockdev info from: %s", raw_instance) if len(raw_instance["BlockDeviceMappings"]) != 1: raise DisallowedOperationException( "Currently only support saving instances with one blockdev, found %s" % (raw_instance)) volume_id = raw_instance["BlockDeviceMappings"][0]["Ebs"][ "VolumeId"] volumes = ec2.describe_volumes(VolumeIds=[volume_id]) if len(volumes["Volumes"]) != 1: raise BadEnvironmentStateException( "Found two volumes with the same id: %s" % volumes) volume = volumes["Volumes"][0] return { "DeviceName": raw_instance["BlockDeviceMappings"][0]["DeviceName"], "Ebs": { "Encrypted": volume["Encrypted"], "DeleteOnTermination": True, "VolumeSize": volume["Size"], "VolumeType": volume["VolumeType"] } } block_device = get_blockdev_info() # Save the image and return image data def get_image(image_id): images = ec2.describe_images(ImageIds=[image_id]) if len(images["Images"]) != 1: raise BadEnvironmentStateException( "Expected exactly one image, found %s" % images) return images["Images"][0] @retry(wait_fixed=RETRY_DELAY, stop_max_attempt_number=RETRY_COUNT, retry_on_exception=retry_if_timeout) def wait_for_available(image_id): image = get_image(image_id) logger.debug("Current image state: %s", image) if image["State"] != "available": raise OperationTimedOut( "Timed out waiting for image %s to be available." % image_id) logger.info("Creating image from instance: %s", instances[0].instance_id) image_id = ec2.create_image(InstanceId=instances[0].instance_id, Name=name, BlockDeviceMappings=[block_device]) wait_for_available(image_id["ImageId"]) logger.info("Created image: %s", image_id["ImageId"]) image = get_image(image_id["ImageId"]) # Terminate the instance so it doesn't cause us to fail deleting our service. This is # unfortunately brittle and if something fails before this point we'll be in this weird # state where the security group will have a dependency. That's not acceptable, but really # it depends on fixing: https://github.com/getcloudless/cloudless/issues/20 because the ASG # only reports the running instances and that's how the service destroy discovers them. ec2.terminate_instances(InstanceIds=[instances[0].instance_id]) return Image(image_id=image["ImageId"], name=image["Name"], created_at=image["CreationDate"])
def wait_for_destroyed(image_name): logger.info("Waiting for image: %s to be destroyed", image_name) if self.get(image.name): raise OperationTimedOut( "Timed out waiting for image %s to be gone." % image_name) logger.info("Success, did not find image: %s", image_name)