def test_configurations(): """Test for method for get, fetch, delete.""" # region for our tests region = 'us-east-1' # create dummy ec2 instance so we can figure out account id client = boto3.client('ec2', region_name=region) client.run_instances(ImageId='ami-123abc', MinCount=1, MaxCount=5) # create a mock table mocks.create_dynamodb(region) ctx = utils.MockContext() # make sure we successfully created the table dynamodb = boto3.resource('dynamodb', region_name=region) table = dynamodb.Table('ebs_snapshot_configuration') assert table.table_status == "ACTIVE" # put some data in config_data = { "match": { "instance-id": "i-abc12345", "tag:plant": "special_flower", "tag:Name": "legacy_server" }, "snapshot": { "retention": "6 days", "minimum": 6, "frequency": "13 hours" } } # put it in the table, be sure it succeeded response = dynamo.store_configuration(region, 'foo', '111122223333', config_data) assert response != {} # now list everything, be sure it was present fetched_configurations = dynamo.list_configurations(ctx, region) assert fetched_configurations == [config_data] # now get that specific one specific_config = dynamo.get_configuration(ctx, region, 'foo', '111122223333') assert specific_config == config_data # be sure another get for invalid item returns none missing_config = dynamo.get_configuration(ctx, region, 'abc', '111122223333') assert missing_config is None # be sure it returns in a list fetched_configurations = dynamo.list_ids(ctx, region) assert 'foo' in fetched_configurations # now delete it and confirm both list and get return nothing dynamo.delete_configuration(region, 'foo', '111122223333') specific_config = dynamo.get_configuration(ctx, region, 'foo', '111122223333') assert specific_config is None fetched_configurations = dynamo.list_configurations(ctx, region) assert fetched_configurations == []
def test_store_bad_configuration(): """Test for storing a bad config.""" # region for our tests region = 'us-east-1' # create dummy ec2 instance so we can figure out account id client = boto3.client('ec2', region_name=region) client.run_instances(ImageId='ami-123abc', MinCount=1, MaxCount=5) aws_account_id = AWS_MOCK_ACCOUNT object_id = 'foo' # create a mock table mocks.create_dynamodb(region) ctx = utils.MockContext() # make sure we successfully created the table dynamodb = boto3.resource('dynamodb', region_name=region) table = dynamodb.Table('ebs_snapshot_configuration') assert table.table_status == "ACTIVE" # put some bad data in config_data = { "match_bad_name": { "instance-id": "i-abc12345", "tag:plant": "special_flower", "tag:Name": "legacy_server" } } # this should blow up with pytest.raises(Exception): dynamo.store_configuration(region, object_id, aws_account_id, config_data) # now force it table.put_item( Item={ 'aws_account_id': aws_account_id, 'id': object_id, 'configuration': "{, 123 bare words :: }" }) # now watch it blow up on listing them with pytest.raises(EbsSnapperError): dynamo.list_configurations(ctx, region, aws_account_id) # now blow up on fetching a specific one by Key with pytest.raises(EbsSnapperError): dynamo.get_configuration(ctx, region, object_id, aws_account_id)
def test_list_missing_configurations(): """Test for method of the same name.""" # region for our tests region = 'us-east-1' # create dummy ec2 instance so we can figure out account id client = boto3.client('ec2', region_name=region) client.run_instances(ImageId='ami-123abc', MinCount=1, MaxCount=5) with pytest.raises(Exception): # make sure we successfully created the table dynamodb = boto3.resource('dynamodb', region_name=region) table = dynamodb.Table('ebs_snapshot_configuration') assert table.table_status == "ACTIVE" dynamo.list_configurations(region)
def ensure_cloudwatch_rule_for_replication(context, installed_region='us-east-1'): """Be sure replication is running, or not running, based on configs""" client = boto3.client('events', region_name=installed_region) cw_rule_name = utils.find_replication_cw_event_rule(context) current_state = client.describe_rule(Name=cw_rule_name) configurations = dynamo.list_configurations(context, installed_region) replication = False for cfg in configurations: if 'replication' in cfg and cfg['replication'] == 'yes': replication = True if replication and current_state['State'] == 'DISABLED': LOG.warn('Enabling snapshot replication due to configuration.') client.enable_rule(Name=cw_rule_name) elif not replication and current_state['State'] == 'ENABLED': LOG.warn('Disabling snapshot replication due to configuration.') client.disable_rule(Name=cw_rule_name)
def perform_snapshot(context, region, installed_region='us-east-1'): """Check the region and instance, and see if we should take any snapshots""" LOG.info('Reviewing snapshots in region %s', region) # fetch these, in case we need to figure out what applies to an instance configurations = dynamo.list_configurations(context, installed_region) LOG.debug('Fetched all possible configuration rules from DynamoDB') # build a list of any IDs (anywhere) that we should ignore ignore_ids = utils.build_ignore_list(configurations) # setup some lookup tables cache_data = utils.build_cache_maps(context, configurations, region, installed_region) all_instances = cache_data['instance_id_to_data'] instance_configs = cache_data['instance_id_to_config'] volume_snap_recent = cache_data['volume_id_to_most_recent_snapshot_date'] for instance_id in set(all_instances.keys()): # before we go do some work if timeout_check(context, 'perform_snapshot'): break if instance_id in ignore_ids: continue snapshot_settings = instance_configs[instance_id] # parse out snapshot settings retention, frequency = utils.parse_snapshot_settings(snapshot_settings) # grab the data about this instance id, if we don't already have it instance_data = all_instances[instance_id] ami_id = instance_data['ImageId'] LOG.info('Reviewing snapshots in region %s on instance %s', region, instance_id) for dev in instance_data.get('BlockDeviceMappings', []): # before we go make a bunch more API calls if timeout_check(context, 'perform_snapshot'): break # we probably should have been using volume keys from one of the # caches here, but since we're not, we're going to have to check here too LOG.debug('Considering device %s', dev) volume_id = dev['Ebs']['VolumeId'] if volume_id in ignore_ids: continue # find snapshots recent = volume_snap_recent.get(volume_id) now = datetime.datetime.now(dateutil.tz.tzutc()) # snapshot due? if should_perform_snapshot(frequency, now, volume_id, recent): LOG.debug('Performing snapshot for %s, calculating tags', volume_id) else: LOG.debug('NOT Performing snapshot for %s', volume_id) continue # perform actual snapshot and create tag: retention + now() as a Y-M-D delete_on_dt = now + retention delete_on = delete_on_dt.strftime('%Y-%m-%d') volume_data = utils.get_volume(volume_id, region=region) expected_tags = utils.calculate_relevant_tags( instance_data.get('Tags', None), volume_data.get('Tags', None)) utils.snapshot_and_tag( instance_id, ami_id, volume_id, delete_on, region, additional_tags=expected_tags)
def clean_snapshot(context, region, default_min_snaps=5, installed_region='us-east-1'): """Check the region see if we should clean up any snapshots""" LOG.info('clean_snapshot in region %s', region) # fetch these, in case we need to figure out what applies to an instance configurations = dynamo.list_configurations(context, installed_region) LOG.debug('Fetched all possible configuration rules from DynamoDB') # build a list of any IDs (anywhere) that we should ignore ignore_ids = utils.build_ignore_list(configurations) # figure out if we're in an account-wide mode where we ignore retention and # destroy all snapshots with a delete_on value that we want to delete ignore_retention_enabled = utils.ignore_retention_enabled(configurations) cache_data = utils.build_cache_maps(context, configurations, region, installed_region) instance_configs = cache_data['instance_id_to_config'] all_volumes = cache_data['volume_id_to_instance_id'] volume_snap_count = cache_data['volume_id_to_snapshot_count'] # figure out what dates we want to nuke today = datetime.date.today() delete_on_values = [] for i in range(0, 8): # seven days ago until today del_date = today + timedelta(days=-i) delete_on_values.append(del_date.strftime('%Y-%m-%d')) # setup counters before we start deleted_count = 0 # setup our filters filters = [ { 'Name': 'tag-key', 'Values': ['DeleteOn'] }, { 'Name': 'tag-value', 'Values': delete_on_values }, ] params = {'Filters': filters} # paginate the snapshot list tag_paginator = utils.build_snapshot_paginator(params, region) for page in tag_paginator: # stop if we're running out of time if timeout_check(context, 'clean_snapshot'): break # if we don't get even a page of results, or missing hash key, skip if not page and 'Snapshots' not in page: continue for snap in page['Snapshots']: # stop if we're running out of time if timeout_check(context, 'clean_snapshot'): break # ugly comprehension to strip out a tag delete_on = [ r['Value'] for r in snap['Tags'] if r.get('Key') == 'DeleteOn' ][0] # volume for snapshot snapshot_volume = snap['VolumeId'] minimum_snaps = default_min_snaps if snapshot_volume in ignore_ids: continue try: # given volume id, get the instance for it volume_instance = all_volumes.get(snapshot_volume, None) # minimum required if volume_instance is not None: snapshot_settings = instance_configs.get( volume_instance, None) if snapshot_settings is not None: try: minimum_snaps = int( snapshot_settings['snapshot']['minimum']) except ValueError: raise Exception( "Minimum number of snaps configured is not an integer." ) # current number of snapshots if snapshot_volume in volume_snap_count: no_snaps = volume_snap_count[snapshot_volume] else: raise Exception( 'Could not count snapshots, missing volume') # if we have less than the minimum, don't delete this one if no_snaps <= minimum_snaps: LOG.warn('Not deleting snapshot %s from %s (%s)', snap['SnapshotId'], region, delete_on) LOG.warn('Only %s snapshots exist, below minimum of %s', no_snaps, minimum_snaps) continue except: # if we couldn't figure out a minimum of snapshots, # don't clean this up -- these could be orphaned snapshots LOG.warn( 'Error analyzing snapshot %s from %s, skipping... (%s)', snap['SnapshotId'], region, delete_on) # skip this loop iteration unless ignore_retention_enabled if not ignore_retention_enabled: continue log_snapcount = volume_snap_count.get(snapshot_volume, 'unknown') \ if volume_snap_count else None LOG.warn('Deleting snapshot %s from %s (%s, count=%s > %s)', snap['SnapshotId'], region, delete_on, log_snapcount, minimum_snaps) deleted_count += utils.delete_snapshot(snap['SnapshotId'], region) if deleted_count <= 0: LOG.warn('No snapshots were cleaned up for the entire region %s', region) else: LOG.info( 'Function clean_snapshots_tagged completed, deleted count: %s', str(deleted_count)) LOG.info('Function clean_snapshot completed')
def perform_replication(context, region, installed_region='us-east-1'): """Check the region and instance, and see if we should clean or create copies""" LOG.info('Performing snapshot replication in region %s', region) # TL;DR -- always try to clean up first, before making new copies. # build a list of ignore IDs, just in case they are relevant here configurations = dynamo.list_configurations(context, installed_region) ignore_ids = utils.build_ignore_list(configurations) LOG.debug('Fetched all configured ignored IDs rules from DynamoDB') # 1. collect snapshots from this region relevant_tags = ['replication_src_region', 'replication_dst_region'] found_snapshots = utils.build_replication_cache(context, relevant_tags, configurations, region, installed_region) # 2. evaluate snapshots that were copied to this region, if source not found, delete for snapshot in found_snapshots.get('replication_src_region', []): snapshot_id = snapshot['SnapshotId'] snapshot_description = snapshot['Description'] if timeout_check(context, 'perform_replication'): break if snapshot_id in ignore_ids: continue if snapshot['State'] in ['pending', 'error']: LOG.warn('Skip cleaning up this snapshot ' + snapshot_id + ' due to ' + snapshot['State'] + ' state: ' + snapshot_description) continue LOG.info('Working on cleaning up this snapshot ' + snapshot_id + ' (if needed): ' + snapshot_description) # what region did this come from? tag_pairs = snapshot.get('Tags', []) region_tag_pair = [ x for x in tag_pairs if x.get('Key', None) == 'replication_src_region' ] region_tag_value = region_tag_pair[0].get('Value') # what snapshot id did this come from? snapshotid_tag_pair = [ x for x in tag_pairs if x.get('Key', None) == 'replication_snapshot_id' ] snapshotid_tag_value = snapshotid_tag_pair[0].get('Value') ec2_source = boto3.client('ec2', region_name=region_tag_value) try: found_originals = ec2_source.describe_snapshots( SnapshotIds=[snapshotid_tag_value ], # we think the original snapshot id is this Filters=[ # where it gets copied to should be us { 'Name': 'tag:replication_dst_region', 'Values': [region] }, ]) except Exception as err: if 'InvalidSnapshot.NotFound' in str(err): found_originals = {'Snapshots': []} else: raise err num_found = len(found_originals.get('Snapshots', [])) if num_found > 0: LOG.info('Not removing this snapshot ' + snapshot_id + ' from ' + region + ' since snapshot_id ' + snapshotid_tag_value + ' was already found in ' + region_tag_value) continue # ax it! LOG.warn('Removing this snapshot ' + snapshot_id + ' from ' + region + ' since snapshot_id ' + snapshotid_tag_value + ' was not found in ' + region_tag_value) utils.delete_snapshot(snapshot_id, region) # 3. evaluate snapshots that should be copied from this region, if dest not found, copy and tag for snapshot in found_snapshots.get('replication_dst_region', []): snapshot_id = snapshot['SnapshotId'] snapshot_description = snapshot['Description'] if timeout_check(context, 'perform_replication'): break if snapshot_id in ignore_ids: continue if snapshot['State'] in ['pending', 'error']: LOG.warn('Skip copying this snapshot ' + snapshot_id + ' due to ' + snapshot['State'] + ' state: ' + snapshot_description) continue LOG.info('Working on copying this snapshot ' + snapshot_id + ' (if needed): ' + snapshot_description) # what region should this be mapped to? tag_pairs = snapshot.get('Tags', []) region_tag_pair = [ x for x in tag_pairs if x.get('Key', None) == 'replication_dst_region' ] region_tag_value = region_tag_pair[0].get('Value') # does it already exist in the target region? ec2_destination = boto3.client('ec2', region_name=region_tag_value) found_replicas = ec2_destination.describe_snapshots(Filters=[ # came from our region originally { 'Name': 'tag:replication_src_region', 'Values': [region] }, # came from our snapshot originally { 'Name': 'tag:replication_snapshot_id', 'Values': [snapshot_id] } ]) num_found = len(found_replicas.get('Snapshots', [])) if num_found > 0: LOG.info('Not creating more snapshots, since snapshot_id ' + snapshot_id + ' was already found in ' + region_tag_value) continue # we need to make one in the target region LOG.warn('Creating a new snapshot, since snapshot_id ' + snapshot_id + ' was not already found in ' + region_tag_value) utils.copy_snapshot_and_tag(context, region, region_tag_value, snapshot_id, snapshot_description)
def perform_replication(context, region, installed_region='us-east-1'): """Check the region and instance, and see if we should clean or create copies""" LOG.info('Performing snapshot replication in region %s', region) # TL;DR -- always try to clean up first, before making new copies. # build a list of ignore IDs, just in case they are relevant here configurations = dynamo.list_configurations(context, installed_region) ignore_ids = utils.build_ignore_list(configurations) LOG.debug('Fetched all configured ignored IDs rules from DynamoDB') # 1. collect snapshots from this region snap_cached_src_regions = [] snap_cached_dst_regions = [] src_snap_list = [] replication_snap_list = [] relevant_tags = ['replication_src_region', 'replication_dst_region'] found_snapshots = utils.build_replication_cache( context, relevant_tags, configurations, region, installed_region ) # 1a. build snapshot cache from all source regions for snapshot_regions in found_snapshots.get('replication_src_region', []): # what region did this come from? tag_pairs = snapshot_regions.get('Tags', []) region_tag_pair = [x for x in tag_pairs if x.get('Key') == 'replication_src_region'] region_tag_value = region_tag_pair[0].get('Value') if region_tag_value not in snap_cached_src_regions: LOG.info('Caching snapshots in source region: %s', region_tag_value) snap_cached_src_regions.append(region_tag_value) ec2_source = boto3.client('ec2', region_name=region_tag_value) try: response = ec2_source.describe_snapshots( Filters=[{'Name': 'tag:replication_dst_region', 'Values': [region]}] ) mysnaps = response['Snapshots'] except Exception as err: if 'InvalidSnapshot.NotFound' in str(err): mysnaps = {'Snapshots', []} else: raise err for snap in mysnaps: src_snap_list.append(snap['SnapshotId']) LOG.info('Caching completed for source region: ' + region_tag_value + ': cache size: ' + str(len(src_snap_list))) sleep(1) # 1b. build snapshot cache for all destination regions for snapshot_regions in found_snapshots.get('replication_dst_region', []): # which region is destination tag_pairs = snapshot_regions.get('Tags', []) region_tag_pair = [x for x in tag_pairs if x.get('Key') == 'replication_dst_region'] region_tag_value = region_tag_pair[0].get('Value') if region_tag_value not in snap_cached_dst_regions: LOG.info('Caching snapshots in destination region: %s', region_tag_value) snap_cached_dst_regions.append(region_tag_value) ec2_source = boto3.client('ec2', region_name=region_tag_value) try: response = ec2_source.describe_snapshots( Filters=[{'Name': 'tag:replication_src_region', 'Values': [region]}] ) mysnaps = response['Snapshots'] except Exception as err: if 'InvalidSnapshot.NotFound' in str(err): mysnaps = {'Snapshots', []} else: raise err for snap in mysnaps: for tags in snap['Tags']: if tags["Key"] == 'replication_snapshot_id': replication_snap_list.append(tags["Value"]) LOG.info('Caching completed for destination region: ' + region_tag_value + ': cache size: ' + str(len(replication_snap_list))) sleep(1) # 2. evaluate snapshots that were copied to this region, if source not found, delete for snapshot in found_snapshots.get('replication_src_region', []): snapshot_id = snapshot['SnapshotId'] snapshot_description = snapshot['Description'] if timeout_check(context, 'perform_replication'): break if snapshot_id in ignore_ids: continue if snapshot['State'] in ['pending', 'error']: LOG.warn('Skip cleaning up this snapshot ' + snapshot_id + ' due to ' + snapshot['State'] + ' state: ' + snapshot_description) continue LOG.info('Working on cleaning up this snapshot ' + snapshot_id + ' (if needed): ' + snapshot_description) # what region did this come from? tag_pairs = snapshot.get('Tags', []) region_tag_pair = [x for x in tag_pairs if x.get('Key') == 'replication_src_region'] region_tag_value = region_tag_pair[0].get('Value') # what snapshot id did this come from? snapshotid_tag_pair = [x for x in tag_pairs if x.get('Key') == 'replication_snapshot_id'] snapshotid_tag_value = snapshotid_tag_pair[0].get('Value') if snapshotid_tag_value in src_snap_list: LOG.info('Not removing this snapshot ' + snapshot_id + ' from ' + region + ' since snapshot_id ' + snapshotid_tag_value + ' was found in ' + region_tag_value) continue # ax it! LOG.warn('Removing this snapshot ' + snapshot_id + ' from ' + region + ' since snapshot_id ' + snapshotid_tag_value + ' was not found in ' + region_tag_value) utils.delete_snapshot(snapshot_id, region) sleep(2) # 3. evaluate snapshots that should be copied from this region, if dest not found, copy and tag for snapshot in found_snapshots.get('replication_dst_region', []): snapshot_id = snapshot['SnapshotId'] snapshot_description = snapshot['Description'] if timeout_check(context, 'perform_replication'): break if snapshot_id in ignore_ids: continue if snapshot['State'] in ['pending', 'error']: LOG.warn('Skip copying this snapshot ' + snapshot_id + ' due to ' + snapshot['State'] + ' state: ' + snapshot_description) continue LOG.info('Working on copying this snapshot ' + snapshot_id + ' (if needed): ' + snapshot_description) # what region should this be mapped to? tag_pairs = snapshot.get('Tags', []) region_tag_pair = [x for x in tag_pairs if x.get('Key') == 'replication_dst_region'] region_tag_value = region_tag_pair[0].get('Value') name_tag_pair = [x for x in tag_pairs if x.get('Key') == 'Name'] name_tag_pair.append({}) # Adds empty dictionary to list in even no Name tag is present name_tag_value = name_tag_pair[0].get('Value') # does it already exist in the target region? if snapshot_id in replication_snap_list: LOG.info('Not creating more snapshots, since snapshot_id ' + snapshot_id + ' was already found in ' + region_tag_value) continue # we need to make one in the target region LOG.warn('Creating a new snapshot, since snapshot_id ' + snapshot_id + ' was not already found in ' + region_tag_value) utils.copy_snapshot_and_tag( context, region, region_tag_value, name_tag_value, snapshot_id, snapshot_description)
def sanity_check(context, installed_region='us-east-1', aws_account_id=None): """Retrieve configuration from DynamoDB and return array of dictionary objects""" findings = [] # determine aws account id if aws_account_id is None: found_owners = utils.get_owner_id(context) else: found_owners = [aws_account_id] if len(found_owners) <= 0: findings.append( 'There are no instances I could find on this account. ' + 'Cannot figure out the account number without any instances. ' + 'Without account number, cannot figure out what to name the S3 bucket or stack.' ) return findings else: aws_account = found_owners[0] # The bucket does not exist or you have no access bucket_exists = None try: s3_client = boto3.client('s3', region_name=installed_region) ebs_bucket_name = 'ebs-snapper-{}'.format(aws_account) s3_client.head_bucket(Bucket=ebs_bucket_name) bucket_exists = True except ClientError: bucket_exists = False # Configurations exist but tags do not configurations = [] dynamodb_exists = None try: configurations = dynamo.list_configurations(context, installed_region) dynamodb_exists = True except ClientError: configurations = [] dynamodb_exists = False # we're going across all regions, but store these in one regions = utils.get_regions(must_contain_instances=True) ignored_tag_values = ['false', '0', 'no'] found_config_tag_values = [] found_backup_tag_values = [] # check out all the configs in dynamodb for config in configurations: # if it's missing the match section, ignore it if not utils.validate_snapshot_settings(config): findings.append( "Found a snapshot configuration that isn't valid: {}".format( str(config))) continue # build a boto3 filter to describe instances with configuration_matches = config['match'] filters = utils.convert_configurations_to_boto_filter( configuration_matches) for k, v in configuration_matches.iteritems(): if str(v).lower() in ignored_tag_values: continue to_add = '{}, value:{}'.format(k, v) found_config_tag_values.append(to_add) # if we ended up with no boto3 filters, we bail so we don't snapshot everything if len(filters) <= 0: LOG.warn('Could not convert configuration match to a filter: %s', configuration_matches) findings.append( "Found a snapshot configuration that couldn't be converted to a filter" ) continue filters.append({ 'Name': 'instance-state-name', 'Values': ['running', 'stopped'] }) found_instances = None for r in regions: ec2 = boto3.client('ec2', region_name=r) instances = ec2.describe_instances(Filters=filters) res_list = instances.get('Reservations', []) for reservation in res_list: inst_list = reservation.get('Instances', []) if len(inst_list) > 0: found_instances = True break # Look at all the tags on instances found_tag_data = ec2.describe_tags(Filters=[{ 'Name': 'resource-type', 'Values': ['instance'] }]) for tag in found_tag_data.get('Tags', []): k = tag['Key'] v = tag['Value'] if str(v).lower() in ignored_tag_values: continue to_add = 'tag:{}, value:{}'.format(k, v) if k.lower() in ['backup' ] and to_add not in found_backup_tag_values: found_backup_tag_values.append(to_add) if not found_instances: long_config = [] for k, v in configuration_matches.iteritems(): long_config.append('{}, value:{}'.format(k, v)) findings.append( "{} was configured, but didn't match any instances".format( ", ".join(long_config))) if len(found_backup_tag_values) > 0 or len(found_config_tag_values) > 0: if not (bucket_exists and dynamodb_exists): findings.append( 'Configuations or tags are present, but EBS snapper not fully deployed' ) if bucket_exists and dynamodb_exists and len(configurations) == 0: findings.append( 'No configurations existed for this account, but ebs-snapper was deployed' ) # tagged instances without any config for s in found_backup_tag_values: if s not in found_config_tag_values: findings.append( '{} was tagged on an instance, but no configuration exists'. format(s)) LOG.debug("configs: %s", str(found_config_tag_values)) LOG.debug("tags: %s", str(found_backup_tag_values)) return findings