def ec2_service(self): if self._ec2_service is None: self._ec2_service = services.create_service( "ec2", session=self._session_, service_retry_strategy=get_default_retry_strategy( "ec2", context=self._context_)) return self._ec2_service
def get_instances(): ec2 = services.create_service("ec2", session=self._session_, service_retry_strategy=get_default_retry_strategy("ec2", context=self._context_)) return list(ec2.describe(services.ec2_service.INSTANCES, InstanceIds=instance_ids, region=region if region is not None else self._region_, tags=True, select="Reservations[*].Instances[].{Tags:Tags,InstanceId:InstanceId}"))
def get_source_snapshot(): ec2 = services.create_service( "ec2", session=self._session_, service_retry_strategy=get_default_retry_strategy( "ec2", context=self._context_)) snapshot = ec2.get(services.ec2_service.SNAPSHOTS, region=self.source_region, RestorableByUserIds=["self"], Filters=[{ "Name": "snapshot-id", "Values": [self.source_snapshot_id] }]) return snapshot
def is_completed(self, _, start_results): """ Tests if the copy snapshot action has been completed. This method uses the id of the copied snapshot and test if it does exist and is complete in the destination region. As long as this is not the case the method must return None :param start_results: Result of the api that started the copy, contains the id of the snapshot in the destination region :param _: not used :return: Result of copy action, None if not completed yet """ # start result data is passed in as text, for this action it is json formatted snapshot_create_data = json.loads(start_results) # create service instance to test is snapshot exists ec2 = services.create_service( "ec2", session=self.session, service_retry_strategy=get_default_retry_strategy( "ec2", context=self.context)) copied_snapshot_id = snapshot_create_data["copied-snapshot-id"] # test if the snapshot with the id that was returned from the CopySnapshot API call exists and is completed copied_snapshot = ec2.get("Snapshots", region=self.destination_region, select="Snapshots[?State=='completed']", OwnerIds=["self"], Filters=[{ "Name": "snapshot-id", "Values": [copied_snapshot_id] }]) if copied_snapshot is not None: # action completed self.logger.info(INFO_CHECK_COMPLETED_RESULT, copied_snapshot) self.logger.info(INFO_COPY_COMPLETED, self.source_snapshot_id, self.source_region, copied_snapshot_id, self.destination_region) return safe_json(copied_snapshot) # not done yet self.logger.info(INFO_COPY_PENDING, copied_snapshot_id, self.destination_region) return None
def _account_service_sessions(self, service_name): """ Returns a list of service instances for each handled account/role :return: """ account = self._event.get(handlers.HANDLER_SELECT_ARGUMENTS, {}).get(handlers.HANDLER_EVENT_ACCOUNT) retry_strategy = get_default_retry_strategy(service=service_name, context=self._context) if account is not None: if account == AwsService.get_aws_account(): yield services.create_service(service_name=service_name) else: for role in self.task[actions.ACTION_CROSS_ACCOUNT]: if AwsService.account_from_role_arn(role) == account: yield services.create_service( service_name=service_name, role_arn=role, service_retry_strategy=retry_strategy) else: self._logger.error(MSG_NO_CROSS_ACCOUNT_ROLE, self.task[handlers.TASK_NAME], account) else: if self.task.get(handlers.TASK_THIS_ACCOUNT, True): yield services.create_service( service_name=service_name, service_retry_strategy=retry_strategy) for role in self.task.get(handlers.TASK_CROSS_ACCOUNT_ROLES, []): yield services.create_service( service_name=service_name, role_arn=role, service_retry_strategy=retry_strategy)
def execute(self): def get_start_time(sn): if isinstance(sn["StartTime"], datetime): return sn["StartTime"] return dateutil.parser.parse(sn["StartTime"]) def snapshots_to_delete(): def by_retention_days(): delete_before_dt = self._datetime_.utcnow().replace( tzinfo=pytz.timezone("UTC")) - timedelta( days=int(self.retention_days)) self._logger_.info(INF_RETENTION_DAYS, delete_before_dt) for sn in sorted(self.snapshots, key=lambda snap: snap["Region"]): snapshot_dt = get_start_time(sn) if snapshot_dt < delete_before_dt: self._logger_.debug(DEBUG_SN_RETENTION_DAYS_DELETE, sn["SnapshotId"], get_start_time(sn), sn["VolumeId"], self.retention_days) yield sn else: self._logger_.debug(DEBUG_SN_RETENTION_DAYS_KEEP, sn, s, ["VolumeId"], get_start_time(sn), delete_before_dt.isoformat()) def by_retention_count(): self._logger_.info(INF_KEEP_RETENTION_COUNT, self.retention_count) sorted_snapshots = sorted( self.snapshots, key=lambda snap: (snap["VolumeId"], snap["StartTime"]), reverse=True) volume = None count_for_volume = 0 for sn in sorted_snapshots: if sn["VolumeId"] != volume: volume = sn["VolumeId"] count_for_volume = 0 count_for_volume += 1 if count_for_volume > self.retention_count: self._logger_.debug(DEBUG_SN_DELETE_RETENTION_COUNT, sn["SnapshotId"], sn["StartTime"], sn["VolumeId"], count_for_volume) yield sn else: self._logger_.debug(DEBUG_SN_KEEP_RETENTION_COUNT, sn["SnapshotId"], sn["StartTime"], sn["VolumeId"], count_for_volume) return list( by_retention_days()) if self.retention_days != 0 else list( by_retention_count()) self._logger_.info("{}, version {}", self.properties[ACTION_TITLE], self.properties[ACTION_VERSION]) deleted_count = 0 self._logger_.debug("Snapshots : {}", self.snapshots) snapshot_id = "" self._logger_.info( INF_SNAPSHOTS_FOR_VOLUME, ",".join([ "{} ({})".format(s["SnapshotId"], s["StartTime"]) for s in self.snapshots ]), self.snapshots[0].get("VolumeId", "")) ec2 = services.create_service( "ec2", session=self._session_, service_retry_strategy=get_default_retry_strategy( "ec2", context=self._context_)) for deleted_snapshot in snapshots_to_delete(): if self.time_out(): break if "deleted" not in self.result: self.result["deleted"] = {} self.result["deleted"][self._region_] = [] try: snapshot_id = deleted_snapshot["SnapshotId"] if ec2.get(services.ec2_service.SNAPSHOTS, region=self._region_, SnapshotIds=[snapshot_id]) is None: self._logger_.info(INF_NO_LONGER_AVAILABLE, snapshot_id) else: self.ec2_client.delete_snapshot_with_retries( DryRun=self.dryrun, SnapshotId=snapshot_id, _expected_boto3_exceptions_=[ "InvalidSnapshot.NotFound" ]) time.sleep(0.2) deleted_count += 1 copied = deleted_snapshot.get("IsCopied", False) self._logger_.info(INF_SNAPSHOT_DELETED, "copied " if copied else "", snapshot_id, deleted_snapshot["VolumeId"]) self.result["deleted"][self._region_].append(snapshot_id) except ClientError as ex_client: if ex_client.response.get("Error", {}).get( "Code", "") == "InvalidSnapshot.NotFound": self._logger_.info(INF_SNAPSHOT_NOT_FOUND, snapshot_id) else: raise ex_client except Exception as ex: if self.dryrun: self._logger_.debug(str(ex)) self.result["delete_snapshot"] = str(ex) return self.result else: raise ex self.result.update({ "snapshots": len(self.snapshots), "snapshots-deleted": deleted_count, METRICS_DATA: build_action_metrics(self, DeletedSnapshots=deleted_count) }) return self.result
def _select_parameters(self, event_name, task): if self._event_name() == EBS_SNAPSHOT_FOR_VOLUME_CREATED: return { "Filters": [{ "Name": "volume-id", "Values": [self._source] }], "_expected_boto3_exceptions_": ["InvalidVolume.NotFound"] } if self._event_name() == EBS_SNAPSHOT_FOR_VOLUME_COPIED: ec2 = services.create_service( "ec2", role_arn=self._role_executing_triggered_task, service_retry_strategy=get_default_retry_strategy( "ec2", context=self._context)) try: source_volume = None copied_snapshot_id = self._event["detail"][ "snapshot_id"].split("/")[-1] # get the copied snapshot with tags copied_snapshot = ec2.get( services.ec2_service.SNAPSHOTS, SnapshotIds=[copied_snapshot_id], OwnerIds=["self"], region=self._event_region(), tags=True, _expected_boto3_exceptions_=["InvalidSnapshot.NotFound"]) if copied_snapshot is not None: # get the source volume from the tags source_volume = copied_snapshot.get("Tags", {}).get( marker_snapshot_tag_source_source_volume_id(), None) if source_volume is None: self._logger.warning(WARN_SOURCE_VOLUME_NOT_FOUND, copied_snapshot_id) return None snapshots = list( ec2.describe(services.ec2_service.SNAPSHOTS, region=self._event_region(), OwnerIds=["self"], Filters=[{ "Name": "volume-id", "Values": [source_volume] }], _expected_boto3_exceptions_=[ "InvalidVolume.NotFound" ])) snapshots += list( ec2.describe( services.ec2_service.SNAPSHOTS, region=self._event_region(), OwnerIds=["self"], tags=True, Filters=[{ "Name": "tag:" + actions. marker_snapshot_tag_source_source_volume_id(), "Values": [source_volume] }], _expected_boto3_exceptions_=["InvalidVolume.NotFound" ])) if len(snapshots) == 0: return None snapshots = list(set([s["SnapshotId"] for s in snapshots])) return { handlers.HANDLER_EVENT_RESOURCE_NAME: services.ec2_service.SNAPSHOTS, handlers.HANDLER_EVENT_REGIONS: [self._event_region()], "SnapshotIds": snapshots, "_expected_boto3_exceptions_": ["InvalidSnapshot.NotFound"] } except Exception as ex: self._logger.error( ERR_GETTING_SOURCE_VOLUME, self._event.get("detail", {}).get("source", ""), ex) return None return { "SnapshotIds": [r.split("/")[-1] for r in self._event.get("resources")], "_expected_boto3_exceptions_": ["InvalidSnapshot.NotFound"] }
def is_completed(self, snapshot_create_data): def delete_source_after_copy(): self._logger_.info(INF_DELETING_SNAPSHOT, self.source_snapshot_id) self.ec2_source_client.delete_snapshot_with_retries( SnapshotId=self.source_snapshot_id) self._logger_.info(INF_SNAPSHOT_DELETED, self.source_snapshot_id, self.source_region) def source_tags(copy_id, source_tags_param): snapshot_tags = {} snapshot_tags.update( self.build_tags_from_template( parameter_name=source_tags_param, region=self.source_region, tag_variables={ TAG_PLACEHOLDER_COPIED_SNAPSHOT_ID: copy_id, TAG_PLACEHOLDER_COPIED_REGION: self._destination_region_ })) return snapshot_tags def set_source_snapshot_tags(copy_id): snapshot_tags = source_tags(copy_id, PARAM_SOURCE_TAGS) if len(snapshot_tags) == 0: return self._logger_.info(INF_CREATE_SOURCE_TAGS, snapshot_tags, self._account_) if len(snapshot_tags) > 0: tagging.set_ec2_tags(ec2_client=self.ec2_source_client, resource_ids=[self.source_snapshot_id], tags=snapshot_tags, logger=self._logger_) self._logger_.info(INF_TAGS_CREATED) def grant_create_volume_permissions(snap_id): if self.accounts_with_create_permissions is not None and len( self.accounts_with_create_permissions) > 0: args = { "CreateVolumePermission": { "Add": [{ "UserId": a.strip() } for a in self.accounts_with_create_permissions] }, "SnapshotId": snap_id } try: self.ec2_destination_client.modify_snapshot_attribute_with_retries( **args) self._logger_.info( INF_SETTING_CREATE_VOLUME_PERMISSIONS, ", ".join(self.accounts_with_create_permissions)) except Exception as ex: raise_exception(ERR_SETTING_CREATE_VOLUME_PERMISSIONS, self.accounts_with_create_permissions, ex) def tag_shared_snapshots(tags, snap_id): # creates tags for snapshots that have been shared in account the snapshots are shared with if len(tags) == 0 or not self.tag_snapshots_in_shared_accounts: return if self.accounts_with_create_permissions in ["", None]: return for account in self.accounts_with_create_permissions: session_for_tagging = self.get_action_session( account=account, param_name=PARAM_DESTINATION_ACCOUNT_TAG_ROLENAME, logger=self._logger_) if session_for_tagging is None: self._logger_.error(ERR_TAGS_NOT_SET_IN_ACCOUNT, account) continue try: ec2_client = get_client_with_retries( service_name="ec2", methods=["create_tags", "delete_tags"], context=self._context_, region=self.get(PARAM_DESTINATION_REGION), session=session_for_tagging, logger=self._logger_) tagging.set_ec2_tags(ec2_client=ec2_client, resource_ids=[snap_id], tags=tags, logger=self._logger_) self._logger_.info(INF_CREATE_SHARED_TAGS, tags, account) except Exception as ex: raise_exception(ERR_SETTING_SHARED_TAGS, account, str(ex)) def tag_shared_source_snapshot(copy_id): # created tags for snapshots for shared snapshots in the source account of the shares snapshots snapshot_tags = source_tags(copy_id, PARAM_SOURCE_SHARED_BY_TAGS) if len(snapshot_tags ) == 0 or not self.tag_snapshots_in_source_account: return # only for snapshots that have been shared by other account if self.owner == self.get_account_for_task(): self._logger_.debug( "Account {} is owner, no tags set for snapshot {} in account of owner", self._account_, self.source_snapshot_id) return session_for_tagging = self.get_action_session( account=self.owner, param_name=PARAM_SOURCE_ACCOUNT_TAG_ROLE_NAME, logger=self._logger_) if session_for_tagging is None: self._logger_.error(ERR_TAGS_NOT_SET_IN_ACCOUNT, self.owner) return try: self._logger_.info(INF_CREATE_SHARED_ACCOUNT_SNAPSHOT_TAGS, snapshot_tags, self.source_snapshot_id, self.owner) ec2_client = get_client_with_retries( service_name="ec2", methods=["create_tags", "delete_tags"], context=self._context_, region=self.source_region, session=session_for_tagging, logger=self._logger_) tagging.set_ec2_tags(ec2_client=ec2_client, resource_ids=[self.source_snapshot_id], tags=snapshot_tags, logger=self._logger_) except Exception as ex: raise_exception(ERR_SETTING_SOURCE_SHARED_TAGS, self.owner, str(ex)) if snapshot_create_data.get("already-copied", False): self._logger_.info(INF_COMPLETE_ALREADY_COPIED, self.source_snapshot_id) return self.result if snapshot_create_data.get("not-longer-available", False): self._logger_.info(INF_COMPLETED_NOT_LONGER_AVAILABLE, self.source_snapshot_id) return self.result # create service instance to test if snapshot exists ec2 = services.create_service( "ec2", session=self._session_, service_retry_strategy=get_default_retry_strategy( "ec2", context=self._context_)) copy_snapshot_id = snapshot_create_data["copy-snapshot-id"] # test if the snapshot with the id that was returned from the CopySnapshot API call exists and is completed copied_snapshot = ec2.get(services.ec2_service.SNAPSHOTS, region=self._destination_region_, OwnerIds=["self"], Filters=[{ "Name": "snapshot-id", "Values": [copy_snapshot_id] }]) if copied_snapshot is not None: self._logger_.debug(INF_CHECK_COMPLETED_RESULT, copied_snapshot) state = copied_snapshot[ "State"] if copied_snapshot is not None else None if copied_snapshot is None or state == SNAPSHOT_STATE_PENDING: self._logger_.info(INF_COPY_PENDING, copy_snapshot_id, self._destination_region_) return None if state == SNAPSHOT_STATE_ERROR: copied_tag_name = Ec2CopySnapshotAction.marker_tag_copied_to( self._task_) self.ec2_source_client.delete_tags_with_retries( Resources=[self.source_snapshot_id], Tags=[{ "Key": copied_tag_name }]) raise_exception(ERR_COPY_SNAPSHOT) if state == SNAPSHOT_STATE_COMPLETED: self._logger_.info(INF_COPY_COMPLETED, self.source_snapshot_id, self.source_region, copy_snapshot_id, self._destination_region_) grant_create_volume_permissions(copy_snapshot_id) tag_shared_snapshots(snapshot_create_data.get("tags", {}), copy_snapshot_id) tag_shared_source_snapshot(copy_snapshot_id) if self.delete_after_copy: delete_source_after_copy() else: set_source_snapshot_tags(copy_snapshot_id) # wait there for 15 seconds as count the limit for max number of concurrent snapshot copies # by the EC2 service is sometimes delayed time.sleep(5) return copied_snapshot return None
def handle_request(self): """ Handles the select resources request. Creates new actions for resources found for a task :return: Results of handling the request """ def filter_by_action_filter(srv, used_role, r): filter_method = getattr(self.action_class, actions.SELECT_AND_PROCESS_RESOURCE_METHOD, None) if filter_method is not None: r = filter_method(srv, self._logger, self._resource_name, r, self._context, self.task, used_role) if r is None: self._logger.debug( DEBUG_FILTER_METHOD, self.action_class.__name__, actions.SELECT_AND_PROCESS_RESOURCE_METHOD) return None else: self._logger.debug( DEBUG_FILTERED_RESOURCE, self.action_class.__name__, actions.SELECT_AND_PROCESS_RESOURCE_METHOD, safe_json(r, indent=3)) return r def is_selected_resource(aws_service, resource, used_role, taskname, tags_filter, does_resource_supports_tags): # No tags then just use filter method if any if not does_resource_supports_tags: self._logger.debug(DEBUG_RESOURCE_NO_TAGS, resource) return filter_by_action_filter(srv=aws_service, used_role=used_role, r=resource) tags = resource.get("Tags", {}) # name of the tag that holds the list of tasks for this resource tagname = self._task_tag if tags_filter is None: # test if name of the task is in list of tasks in tag value if (tagname not in tags) or (taskname not in tagging.split_task_list( tags[tagname])): self._logger.debug( DEBUG_RESOURCE_NOT_SELECTED, safe_json(resource, indent=2), taskname, ','.join( ["'{}'='{}'".format(t, tags[t]) for t in tags])) return None self._logger.debug(DEBUG_SELECTED_BY_TASK_NAME_IN_TAG_VALUE, safe_json(resource, indent=2), tagname, taskname) else: # using a tag filter, * means any tag if tags_filter != tagging.tag_filter_set.WILDCARD_CHAR: # test if there are any tags matching the tag filter if not TagFilterExpression(tags_filter).is_match(tags): self._logger.debug( DEBUG_RESOURCE_NOT_SELECTED_TAG_FILTER, safe_json(resource, indent=2), taskname, ','.join([ "'{}'='{}'".format(t, tags[t]) for t in tags ])) return None self._logger.debug(DEBUG_SELECTED_BY_TAG_FILTER, safe_json(resource, indent=2), tags, tag_filter_str, taskname) else: self._logger.debug(DEBUG_SELECTED_WILDCARD_TAG_FILTER, safe_json(resource, indent=2), taskname) return filter_by_action_filter(srv=aws_service, used_role=used_role, r=resource) return filter_by_action_filter(srv=aws_service, used_role=used_role, r=resource) def resource_batches(resources): """ Returns resources as chunks of size items. If the class has an optional custom aggregation function then the resources are aggregated first using this function before applying the batch size :param resources: resources to process :return: Generator for blocks of resource items """ aggregate_func = getattr(self.action_class, actions.CUSTOM_AGGREGATE_METHOD, None) for i in aggregate_func( resources, self.task_parameters, self._logger) if aggregate_func is not None else [ resources ]: if self.batch_size is None: yield i else: first = 0 while first < len(i): yield i[first:first + self.batch_size] first += self.batch_size def setup_tag_filtering(t_name): # get optional tag filter no_select_by_tags = self.action_properties.get( actions.ACTION_NO_TAG_SELECT, False) if no_select_by_tags: tag_filter_string = tagging.tag_filter_set.WILDCARD_CHAR else: tag_filter_string = self.task.get(handlers.TASK_TAG_FILTER) # set if only a single task is required for selecting the resources, it is used to optimise the select select_tag = None if tag_filter_string is None: self._logger.debug(DEBUG_SELECT_BY_TASK_NAME, self._resource_name, self._task_tag, t_name) select_tag = self._task_tag elif tag_filter_string == tagging.tag_filter_set.WILDCARD_CHAR: self._logger.debug(DEBUG_SELECT_ALL_RESOURCES, self._resource_name) else: self._logger.debug(DEBUG_TAG_FILTER_USED_TO_SELECT_RESOURCES, self._resource_name) # build the tag expression that us used to filter the resources tag_filter_expression = TagFilterExpression(tag_filter_string) # the keys of the used tags tag_filter_expression_tag_keys = list( tag_filter_expression.get_filter_keys()) # if there is only a single tag then we can optimize by just filtering on that specific tag if len(tag_filter_expression_tag_keys) == 1 and \ tagging.tag_filter_set.WILDCARD_CHAR not in tag_filter_expression_tag_keys[0]: select_tag = tag_filter_expression_tag_keys[0] return select_tag, tag_filter_string def add_aggregated(aggregated_resources): # create tasks action for aggregated resources , optionally split in batch size chunks for ra in resource_batches(aggregated_resources): if self._check_can_execute(ra): action_item = self.actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=ra, task_datetime=self.task_dt, source=self.source, task_group=self.task_group) self._logger.debug(DEBUG_ADDED_AGGREGATED_RESOURCES_TASK, action_item[handlers.TASK_TR_ID], len(ra), self._resource_name, self.task[handlers.TASK_NAME]) self._logger.debug("Added item\n{}", safe_json(action_item, indent=3)) yield action_item def add_as_individual(resources): for ri in resources: # task action for each selected resource if self._check_can_execute([ri]): action_item = self.actions_tracking.add_task_action( task=self.task, assumed_role=assumed_role, action_resources=ri, task_datetime=self.task_dt, source=self.source, task_group=self.task_group) self._logger.debug(DEBUG_ADD_SINGLE_RESOURCE_TASK, action_item[handlers.TASK_TR_ID], self._resource_name, self.task[handlers.TASK_NAME]) self._logger.debug("Added item\n{}", safe_json(action_item, indent=3)) yield action_item try: task_items = [] start = datetime.now() self._logger.debug(DEBUG_EVENT, safe_json(self._event, indent=3)) self._logger.debug(DEBUG_ACTION, safe_json(self.action_properties, indent=3)) self._logger.info(INFO_SELECTED_RESOURCES, self._resource_name, self.service, self.task[handlers.TASK_NAME]) self._logger.info(INFO_AGGR_LEVEL, self.aggregation_level) task_level_aggregated_resources = [] args = self._build_describe_argument() service_resource_with_tags = services.create_service( self.service).resources_with_tags if self._resource_name == "": supports_tags = len(service_resource_with_tags) != 0 else: supports_tags = self._resource_name.lower() in [ r.lower() for r in service_resource_with_tags ] args["tags"] = supports_tags self._logger.info(INFO_USE_TAGS_TO_SELECT, "R" if supports_tags else "No r") task_name = self.task[handlers.TASK_NAME] count_resource_items = 0 selected_resource_items = 0 select_on_tag, tag_filter_str = setup_tag_filtering(task_name) filter_func = getattr(self.action_class, actions.FILTER_RESOURCE_METHOD, None) # timer to guard selection time and log warning if getting close to lambda timeout if self._context is not None: self.start_timer(REMAINING_TIME_AFTER_DESCRIBE) try: for assumed_role in self._task_assumed_roles(): retry_strategy = get_default_retry_strategy( service=self.service, context=self._context) service = services.create_service( service_name=self.service, service_retry_strategy=retry_strategy, role_arn=assumed_role) if self.is_timed_out(): break # contains resources for account account_level_aggregated_resources = [] self._logger.info(INFO_ACCOUNT, service.aws_account) if assumed_role not in [None, ""]: self._logger.info(INFO_ASSUMED_ROLE, assumed_role) for region in self._regions: # test for timeouts if self.is_timed_out(): break # handle region passed in the event if region is not None: args["region"] = region else: if "region" in args: del args["region"] # resources can be passed in the invent by event handlers all_resources = self._event.get( handlers.HANDLER_SELECT_RESOURCES, None) if all_resources is None: # actions can have an optional method to select resources action_custom_describe_function = getattr( self.action_class, "describe_resources", None) if action_custom_describe_function is not None and self.use_custom_select: all_resources = action_custom_describe_function( service, self.task, region) else: # select resources from the service self._logger.debug(DEBUG_SELECT_PARAMETERS, self._resource_name, self.service, args) # selecting a list of all resources in this account/region all_resources = list( service.describe( self._resource_name, filter_func=filter_func, select_on_tag=select_on_tag, **args)) # test for timeout if self.is_timed_out(): break count_resource_items += len(all_resources) self._logger.info(INFO_RESOURCES_FOUND, len(all_resources)) # select resources that are processed by the task selected_resources = [] for sr in all_resources: sel = is_selected_resource( aws_service=service, resource=sr, used_role=assumed_role, taskname=task_name, tags_filter=tag_filter_str, does_resource_supports_tags=supports_tags) if sel is not None: selected_resources.append(sel) selected_resource_items += len(selected_resources) # display found and selected resources if len(all_resources) > 0: self._logger.info(INFO_RESOURCES_SELECTED, len(selected_resources)) if len(selected_resources) == 0: continue # delete tags if not needed by the action if not self.keep_tags: for res in selected_resources: if "Tags" in res: del res["Tags"] # add resources to total list of resources for this task if self.aggregation_level == actions.ACTION_AGGREGATION_TASK: task_level_aggregated_resources += selected_resources # add resources to list of resources for this account if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT: account_level_aggregated_resources += selected_resources # add batch(es) of resources for this region if self.aggregation_level == actions.ACTION_AGGREGATION_REGION and len( selected_resources) > 0: task_items += list( add_aggregated(selected_resources)) # no aggregation, add each individual resource if self.aggregation_level == actions.ACTION_AGGREGATION_RESOURCE and len( selected_resources) > 0: task_items += list( add_as_individual(selected_resources)) # at the end of the region loop, check if aggregated resources for account need to be added if self.aggregation_level == actions.ACTION_AGGREGATION_ACCOUNT and len( account_level_aggregated_resources) > 0: task_items += list( add_aggregated(account_level_aggregated_resources)) # at the end of the accounts loop, check if aggregated resources for task need to be added if self.aggregation_level == actions.ACTION_AGGREGATION_TASK and len( task_level_aggregated_resources) > 0: task_items += list( add_aggregated(task_level_aggregated_resources)) except Exception as ex: raise_exception(ERR_SELECTING_TASK_RESOURCES, self.task[handlers.TASK_NAME], ex) finally: if self._timer is not None: # cancel time used avoid timeouts when selecting resources self._timer.cancel() if self.is_timed_out(): raise_exception(ERR_TIMEOUT_SELECTING_RESOURCES, self._resource_name, self.service, task_name) self.start_timer(REMAINING_TIME_AFTER_STORE) self.actions_tracking.flush(self._timeout_event) if self.is_timed_out(): raise_exception( ERR_CREATING_TASKS_FOR_SELECTED_RESOURCES, task_name) self._timer.cancel() else: self.actions_tracking.flush() self._logger.info(INFO_ADDED_ITEMS, len(task_items), self.task[handlers.TASK_NAME]) running_time = float((datetime.now() - start).total_seconds()) self._logger.info(INFO_RESULT, running_time) if self.metrics: put_task_select_data(task_name=task_name, items=count_resource_items, selected_items=selected_resource_items, logger=self._logger, selection_time=running_time) return safe_dict({ "datetime": datetime.now().isoformat(), "running-time": running_time, "dispatched-tasks": task_items }) finally: self._logger.flush()
def is_completed(self, _, start_results): """ Tests if the create snapshot actions have been completed. This method uses the id of the created snapshots and test if the status of all snapshot are "available". As long as this is not the case the method must return None :param start_results: Result of the execute method that started the creation of the snapshots :param _: not used :return: Result of test if all snapshots are available, None if at least one snapshot is in pending state """ # start result data is passed in as text, for this action it is json formatted snapshot_create_data = json.loads(start_results) self.logger.debug("Start result data is {}", start_results) snapshot_ids = [ volume.get("create_snapshot", {}).get("SnapshotId") for volume in snapshot_create_data.get("volumes", {}).values() ] self.logger.info("Checking status of snapshot(s) {}", ",".join(snapshot_ids)) # create service instance to test is snapshots are available ec2 = services.create_service( "ec2", session=self.session, service_retry_strategy=get_default_retry_strategy( "ec2", context=self.context)) # test if the snapshot with the ids that were returned from the CreateSnapshot API call exists and are completed snapshots = ec2.describe("Snapshots", OwnerIds=["self"], Filters=[{ "Name": "snapshot-id", "Values": snapshot_ids }]) test_result = { "InstanceId": snapshot_create_data["instance"], "Volumes": [{ "VolumeId": s["VolumeId"], "SnapshotId": s["SnapshotId"], "State": s["State"], "Progress": s["Progress"] } for s in snapshots] } self.logger.info(INFO_STATE_SNAPSHOTS, json.dumps(test_result)) # wait until all snapshot are no longer pending for volume in test_result["Volumes"]: if volume["State"] == SNAPSHOT_STATE_PENDING: self.logger.info(INFO_CREATION_PENDING) return None # collect possible failed snapshots failed = [] for volume in test_result["Volumes"]: if volume["State"] == SNAPHOT_STATE_ERROR: failed.append(volume) if len(failed) > 0: s = ",".join([ ERR_FAILED_SNAPSHOT.format(volume["SnapshotId"], volume["VolumeId"]) for volume in failed ]) raise Exception(s) self.logger.info(INFO_COMPLETED) return safe_json(test_result)
def execute(self): def volume_has_active_snapshots(ec2_service, vol_id): # test if the snapshot with the ids that were returned from the CreateSnapshot API call exists and are completed volume_snapshots = list( ec2_service.describe(services.ec2_service.SNAPSHOTS, OwnerIds=["self"], region=self.instance["Region"], Filters=[ { "Name": "volume-id", "Values": [vol_id] } ])) active = [s["SnapshotId"] for s in volume_snapshots if s.get("State", "") == "pending"] if len(active) > 0: self._logger_.info(INFO_PENDING_SNAPSHOTS, vol_id, ",".join(active)) return True return False self._logger_.info("{}, version {}", self.properties[ACTION_TITLE], self.properties[ACTION_VERSION]) self._logger_.info(INFO_START_SNAPSHOT_ACTION, self.instance_id, self._account_, self._region_, self._task_) self._logger_.debug("Instance block device mappings are {}", self.instance["BlockDeviceMappings"]) ec2 = services.create_service("ec2", session=self._session_, service_retry_strategy=get_default_retry_strategy("ec2", context=self._context_)) if self.volume_tag_filter is not None: volume_data = ec2.describe(services.ec2_service.VOLUMES, VolumeIds=list(self.volumes.keys()), tags=True, region=self._region_) volume_tags = {k["VolumeId"]: k.get("Tags", {}) for k in list(volume_data)} else: volume_tags = {} if self.backup_root_device: if self.root_volume is None: self._logger_.warning(WARN_ROOT_NOT_FOUND, self.instance_id, ",".join(self.volumes)) else: if self.volume_tag_filter is None or self.volume_tag_filter.is_match(volume_tags.get(self.root_volume, {})): if volume_has_active_snapshots(ec2, self.root_volume): self._logger_.error(ERR_SNAPSHOT_PENDING, self.root_volume) else: self.create_volume_snapshot(self.root_volume) else: self._logger_.info(INF_SKIP_VOLUME_TAG_FILTER, self.root_volume, volume_tags.get(self.root_volume, {})) if self.backup_data_devices: for volume in [v for v in self.volumes if v != self.root_volume]: if self.volume_tag_filter is None or self.volume_tag_filter.is_match(volume_tags.get(volume, {})): if volume_has_active_snapshots(ec2, volume): self._logger_.error(ERR_SNAPSHOT_PENDING, volume) else: self.create_volume_snapshot(volume) else: self._logger_.info(INF_SKIP_VOLUME_TAG_FILTER, volume, volume_tags.get(volume, {})) self.result["start-time"] = self._datetime_.now().isoformat() self.result[METRICS_DATA] = build_action_metrics( action=self, CreatedSnapshots=len(list(self.result.get("volumes", {}).values())), SnapshotsSizeTotal=sum( [volume.get("create_snapshot", {}).get("VolumeSize") for volume in list(self.result.get("volumes", {}).values())])) return self.result
def is_completed(self, snapshot_create_data): def grant_create_volume_permissions(snap_ids): if self.accounts_with_create_permissions is not None and len(self.accounts_with_create_permissions) > 0: args = { "CreateVolumePermission": { "Add": [{"UserId": a.strip()} for a in self.accounts_with_create_permissions] } } for snapshot_id in snap_ids: args["SnapshotId"] = snapshot_id try: self.ec2_client.modify_snapshot_attribute_with_retries(**args) self._logger_.info(INFO_SETTING_CREATE_VOLUME_PERMISSIONS, ", ".join(self.accounts_with_create_permissions)) self.result["create-volume-access-accounts"] = [a.strip() for a in self.accounts_with_create_permissions] except Exception as ex: raise_exception(ERR_SETTING_CREATE_VOLUME_PERMISSIONS, self.accounts_with_create_permissions, ex) def tag_shared_snapshots(snapshot_data, snap_ids): if self.accounts_with_create_permissions not in ["", None] and self.tag_shared_snapshots: for account in self.accounts_with_create_permissions: session_for_tagging = self.get_action_session(account=account, param_name=PARAM_SHARED_ACCOUNT_TAGGING_ROLENAME, logger=self._logger_) if session_for_tagging is None: self._logger_.error(ERR_TAGS_NOT_SET_IN_ACCOUNT, account) continue try: ec2_client = get_client_with_retries(service_name="ec2", methods=[ "create_tags", "delete_tags" ], context=self._context_, region=self._region_, session=session_for_tagging, logger=self._logger_) for snap_id in snap_ids: tags = snapshot_data.get(snap_id, {}).get("tags", None) if tags is not None: self._logger_.info(INFO_SET_SNAPSHOT_TAGS_SHARED, safe_json(tags, indent=3), snap_id, account, self._region_) tagging.set_ec2_tags(ec2_client=ec2_client, resource_ids=[snap_id], tags=tags, logger=self._logger_) except Exception as ex: raise Exception(ERR_SETTING_SHARED_TAGS.format(account, str(ex))) def set_volume_tags(volume_id, snap_id): tags = self.build_tags_from_template(parameter_name=PARAM_VOLUME_TAGS, tag_variables={ TAG_PLACEHOLDER_VOLUME_SNAPSHOT: snap_id }) if len(tags) > 0: try: tagging.set_ec2_tags(ec2_client=self.ec2_client, resource_ids=[volume_id], tags=tags, logger=self._logger_) self._logger_.info(INFO_SET_VOLUME_TAGS, safe_json(tags, indent=3), volume_id) except Exception as ex: raise Exception(ERR_SETTING_VOLUME_TAGS.format(self.instance_id, ex)) def set_instance_tags(snap_ids): tags = self.build_tags_from_template(parameter_name=PARAM_INSTANCE_TAGS, tag_variables={ TAG_PLACEHOLDER_INSTANCE_SNAPSHOTS: ','.join(sorted(snap_ids)) }) if len(tags) > 0: try: self.set_ec2_instance_tags_with_event_loop_check(instance_ids=[self.instance_id], tags_to_set=tags, client=self.ec2_client, region=self._region_) self._logger_.info(INFO_SET_INSTANCE_TAGS, safe_json(tags, indent=3), self.instance_id) except Exception as ex: raise Exception(ERR_SETTING_INSTANCE_TAGS.format(self.instance_id, ex)) snapshot_ids = [volume.get("create_snapshot", {}).get("SnapshotId") for volume in list(snapshot_create_data.get("volumes", {}).values())] self._logger_.info(INFO_CHECKING_SNAPSHOT_STATUS, ",".join(snapshot_ids)) if len(snapshot_ids) == 0: return { "InstanceId": snapshot_create_data["instance"], "Volumes": [] } # create service instance to test is snapshots are available ec2 = services.create_service("ec2", session=self._session_, service_retry_strategy=get_default_retry_strategy("ec2", context=self._context_)) # test if the snapshot with the ids that were returned from the CreateSnapshot API call exists and are completed snapshots = list(ec2.describe(services.ec2_service.SNAPSHOTS, OwnerIds=["self"], region=self.instance["Region"], Filters=[ { "Name": "snapshot-id", "Values": snapshot_ids } ])) if len(snapshots) != len(snapshot_ids): # allow 5 minutes to all snapshots to appear start_time = dateutil.parser.parse(snapshot_create_data["start-time"]) if self._datetime_.now() - start_time < timedelta(minutes=5): self._logger_.info(INFO_NOT_ALL_IN_PROGRESS) return None test_result = { "InstanceId": snapshot_create_data["instance"], "Volumes": [{ "VolumeId": s["VolumeId"], "SnapshotId": s["SnapshotId"], "State": s["State"], "Progress": s["Progress"] } for s in snapshots] } self._logger_.info(INFO_STATE_SNAPSHOTS, safe_json(test_result, indent=3)) # wait until all snapshot are no longer pending for volume in test_result["Volumes"]: if volume["State"] == SNAPSHOT_STATE_PENDING: self._logger_.info(INFO_CREATION_PENDING) return None # collect possible failed snapshots failed = [] for volume in test_result["Volumes"]: if volume["State"] == SNAPSHOT_STATE_ERROR: failed.append(volume) if len(failed) > 0: s = ",".join([ERR_FAILED_SNAPSHOT.format(volume["SnapshotId"], volume["VolumeId"]) for volume in failed]) raise Exception(s) if len(snapshot_ids) != len(snapshots): created_snapshots = [s["SnapshotId"] for s in snapshots] raise Exception(ERR_MISSING_SNAPSHOTS.format(",".join([s for s in snapshot_ids if s not in created_snapshots]))) snapshot_ids = [s["SnapshotId"] for s in snapshots] # set tags on source instance set_instance_tags(snapshot_ids) for s in snapshots: set_volume_tags(volume_id=s["VolumeId"], snap_id=s["SnapshotId"]) # set permissions to create volumes from snapshots grant_create_volume_permissions(snapshot_ids) # tag resources in accounts the snapshots are shared with tag_shared_snapshots(snapshot_create_data.get("snapshots", {}), snapshot_ids) self._logger_.info(INFO_COMPLETED) return test_result