def disable(self, message="", power_off_instances=True, notify_sources=True): """ Mark this CloudAccount as disabled and perform operations to make it so. This has the side effect of finding all related powered-on instances and recording a new "power_off" event them. It also calls the related content_object (e.g. AwsCloudAccount) to make any cloud-specific changes. Args: message (string): status message to set on the Sources Application power_off_instances (bool): if this is set to false, we do not create power_off instance events when disabling the account. This is used on account deletion, when we still want to run the rest of the account disable logic, but should not be creating power_off instance events. Since creating the instance event in the same transaction as deleting the account causes Django errors. notify_sources (bool): determines if we notify sources about this operation. This should always be true except for very special cases. """ logger.info(_("Attempting to disable %(account)s"), {"account": self}) if self.is_enabled: self.is_enabled = False self.save() if power_off_instances: self._power_off_instances(power_off_time=get_now()) self.content_object.disable() if notify_sources: sources.notify_application_availability( self.user.username, self.platform_application_id, "unavailable", message ) logger.info(_("Finished disabling %(account)s"), {"account": self})
def delete_inactive_users(): """ Delete all inactive User objects. A User is considered to be inactive if all of the following are true: - the User has no related CloudAccount objects - the User is not a superuser - the User's date joined is more than MINIMUM_USER_AGE_SECONDS old """ oldest_allowed_date_joined = get_now() - timedelta( seconds=settings.DELETE_INACTIVE_USERS_MIN_AGE) users = User.objects.filter(is_superuser=False, date_joined__lt=oldest_allowed_date_joined) total_user_count = users.count() deleted_user_count = 0 logger.info( _("Found %(total_user_count)s not-superuser Users joined before " "%(date_joined)s."), { "total_user_count": total_user_count, "date_joined": oldest_allowed_date_joined, }, ) for user in users: if _delete_user(user): deleted_user_count += 1 logger.info( _("Successfully deleted %(deleted_user_count)s of %(total_user_count)s " "users."), { "deleted_user_count": deleted_user_count, "total_user_count": total_user_count, }, )
def enable(self): """ Mark this CloudAccount as enabled and perform operations to make it so. This has the side effect of calling the related content_object (e.g. AwsCloudAccount) to make any cloud-specific changes. If any that cloud-specific function fails, we rollback our state change and re-raise the exception for the caller to handle further. """ logger.info( _("'is_enabled' is %(is_enabled)s before enabling %(cloudaccount)s"), {"is_enabled": self.is_enabled, "cloudaccount": self}, ) if not self.is_enabled: self.is_enabled = True self.enabled_at = get_now() self.save() try: self.content_object.enable() # delete stale ConcurrentUsage when an clount is enabled ConcurrentUsage.objects.filter(user=self.user, date=get_today()).delete() except Exception as e: # All failure notifications should happen during the failure logger.info(e) transaction.set_rollback(True) return False sources.notify_application_availability( self.user.username, self.platform_application_id, "available" )
def generate_dummy_block_device_mapping( device_name=None, device_type="Ebs", attach_time=None, delete_on_termination=True, status="attached", volume_id=None, ): """ Generate block device mapping to imitate part of 'describe instances' API response. All arguments are optional, and any not given will be randomly generated. Args: device_name (str): Optional known DeviceName value. device_type (str): Optional known device type key for nested status details. attach_time (str): Optional known AttachTime value. delete_on_termination (bool): Optional known DeleteOnTermination value. status (str): Optional known Status. volume_id (str): Optional known VolumeId value. Returns: dict: Well-formed BlockDeviceMapping data structure. Example: { "DeviceName": "/dev/xvda", "Ebs": { "AttachTime": "2020-10-08T19:07:23+00:00", "DeleteOnTermination": true, "Status": "attached", "VolumeId": "vol-06c61265cb97c1e1e" } } """ if device_name is None: device_index = random.randint(0, 100) device_name = misc.generate_device_name(device_index) if attach_time is None: attach_time = misc.get_now().isoformat() if status is None: status = random.choice(["attaching", "attached", "detaching"]) if volume_id is None: volume_id = generate_dummy_volume_id() mapping = { "DeviceName": device_name, device_type: { "AttachTime": attach_time, "DeleteOnTermination": delete_on_termination, "Status": status, "VolumeId": volume_id, }, } return mapping
def test_delete_inactive_users_ignores_superusers(self): """Test delete_inactive_users ignores superusers.""" age = settings.DELETE_INACTIVE_USERS_MIN_AGE + 10 old_date = misc.get_now() - timedelta(seconds=age) for account_number in range(1, USERS_COUNT + 1): User.objects.create_user( account_number, date_joined=old_date, is_superuser=True ) self.assertEqual(User.objects.count(), USERS_COUNT) tasks.delete_inactive_users() self.assertEqual(User.objects.count(), USERS_COUNT)
def test_delete_inactive_users_ignores_users_with_cloudaccount(self): """Test delete_inactive_users ignores Users having any CloudAccount.""" age = settings.DELETE_INACTIVE_USERS_MIN_AGE + 10 old_date = misc.get_now() - timedelta(seconds=age) for account_number in range(1, USERS_COUNT + 1): user = User.objects.create_user(account_number, date_joined=old_date) api_helper.generate_cloud_account(user=user) self.assertEqual(User.objects.count(), USERS_COUNT) self.assertEqual(CloudAccount.objects.count(), USERS_COUNT) tasks.delete_inactive_users() self.assertEqual(User.objects.count(), USERS_COUNT)
def test_inspect_pending_images(self): """ Test that only old "pending" images are found and reinspected. Note that we effectively time-travel here to points in the past to create the account, images, and instances. This is necessary because updated_at is automatically set by Django and cannot be manually set, but we need things with specific older updated_at times. """ real_now = get_now() yesterday = real_now - datetime.timedelta(days=1) with clouditardis(yesterday): account = account_helper.generate_cloud_account() image_old_inspected = account_helper.generate_image() image_old_pending = account_helper.generate_image( status=MachineImage.PENDING) # an instance exists using old inspected image. account_helper.generate_instance(cloud_account=account, image=image_old_inspected) # an instance exists using old pending image. instance_old_pending = account_helper.generate_instance( cloud_account=account, image=image_old_pending) # another instance exists using the same old pending image, but the # image should still only be reinspected once regardless of how # many instances used it. account_helper.generate_instance(cloud_account=account, image=image_old_pending) one_hour_ago = real_now - datetime.timedelta(seconds=60 * 60) with clouditardis(one_hour_ago): image_new_inspected = account_helper.generate_image() image_new_pending = account_helper.generate_image( status=MachineImage.PENDING) # an instance exists using new inspected image. account_helper.generate_instance(cloud_account=account, image=image_new_inspected) # an instance exists using new pending image, but it should not # trigger inspection because the image is not old enough. account_helper.generate_instance(cloud_account=account, image=image_new_pending) expected_calls = [ call( account.content_object.account_arn, image_old_pending.content_object.ec2_ami_id, instance_old_pending.content_object.region, ) ] with patch.object(tasks, "start_image_inspection") as mock_start: tasks.inspect_pending_images() mock_start.assert_has_calls(expected_calls, any_order=True)
def create_events(self, instances, options): """ Create random events for the list of Instances. Args: instances (list): instances for which to create events options (dict): command options Returns: tuple[int, dict] of events_count, runs_counts """ since = options["since"] if not since.tzinfo: since = since.replace(tzinfo=tz.tzutc()) now = get_now() seconds = int(datetime.timedelta.total_seconds(now - since)) # Force reasonable not-negative defaults and convert to integer seconds. min_run_secs = int(options["min_run_hours"] * 3600) mean_run_secs = max(min_run_secs, int(options["mean_run_hours"] * 3600)) max_run_secs = mean_run_secs * 2 - min_run_secs max_run_count = options["mean_run_count"] * 2 max_secs_between_runs = int(options["mean_hours_between_runs"] * 3600 * 2) events_count = 0 runs_counts = collections.defaultdict(int) if max_run_count and max_run_secs: for instance in tqdm( instances, desc="Spawn instance event progress", unit="events" ): events_count += self.create_events_for_instance( instance, since, now, seconds, max_run_count, max_secs_between_runs, min_run_secs, max_run_secs, runs_counts, options["cloud_type"], ) return events_count, runs_counts
def test_delete_inactive_users(self): """Test delete_inactive_users deletes inactive Users and related objects.""" age = settings.DELETE_INACTIVE_USERS_MIN_AGE + 10 old_date = misc.get_now() - timedelta(seconds=age) for account_number in range(1, USERS_COUNT + 1): user = User.objects.create_user(account_number, date_joined=old_date) ConcurrentUsage.objects.create(date=old_date, user_id=user.id, maximum_counts=[]) ConcurrentUsageCalculationTask.objects.create( user_id=user.id, date=old_date, task_id=f"{_faker.uuid4()}") self.assertEqual(User.objects.count(), USERS_COUNT) self.assertEqual(ConcurrentUsage.objects.count(), USERS_COUNT) self.assertEqual(ConcurrentUsageCalculationTask.objects.count(), USERS_COUNT) tasks.delete_inactive_users() self.assertEqual(User.objects.count(), 0) self.assertEqual(ConcurrentUsage.objects.count(), 0) self.assertEqual(ConcurrentUsageCalculationTask.objects.count(), 0)
def inspect_pending_images(): """ (Re)start inspection of images in PENDING, PREPARING, or INSPECTING status. This generally should not be necessary for most images, but if an image inspection fails to proceed normally, this function will attempt to run it through inspection again. This function runs atomically in a transaction to protect against the risk of it being called multiple times simultaneously which could result in the same image being found and getting multiple inspection tasks. """ updated_since = get_now() - timedelta( seconds=settings.INSPECT_PENDING_IMAGES_MIN_AGE) restartable_statuses = [ MachineImage.PENDING, MachineImage.PREPARING, MachineImage.INSPECTING, ] images = MachineImage.objects.filter( status__in=restartable_statuses, instance__aws_instance__region__isnull=False, updated_at__lt=updated_since, ).distinct() logger.info( _("Found %(number)s images for inspection that have not updated " "since %(updated_time)s"), { "number": images.count(), "updated_time": updated_since }, ) for image in images: instance = image.instance_set.filter( aws_instance__region__isnull=False).first() arn = instance.cloud_account.content_object.account_arn ami_id = image.content_object.ec2_ami_id region = instance.content_object.region start_image_inspection(arn, ami_id, region)
def __init__(self): """Initialize all the data for the examples.""" api_helper.generate_instance_type_definitions(cloud_type="aws") api_helper.generate_instance_type_definitions(cloud_type="azure") self.customer_account_number = "100001" self.customer_user = util_helper.get_test_user( self.customer_account_number, is_superuser=False) self.customer_user.date_joined = util_helper.utc_dt( 2019, 1, 1, 0, 0, 0) self.customer_user.save() self.customer_client = api_helper.SandboxedRestClient() self.customer_client._force_authenticate(self.customer_user) self.internal_client = api_helper.SandboxedRestClient( api_root="/internal/api/cloudigrade/v1") self.internal_client._force_authenticate(self.customer_user) self.customer_arn = util_helper.generate_dummy_arn() # Times to use for various account and event activity. self.now = get_now() self.this_morning = self.now.replace(hour=0, minute=0, second=0, microsecond=0) self.yesterday = self.this_morning - timedelta(days=1) self.last_month = self.this_morning - timedelta(days=31) self.last_week = self.this_morning - timedelta(days=7) self.three_days_ago = self.this_morning - timedelta(days=3) self.two_days_ago = self.this_morning - timedelta(days=2) self.two_weeks_ago = self.this_morning - timedelta(weeks=2) self.tomorrow = self.this_morning + timedelta(days=1) self.next_week = self.this_morning + timedelta(weeks=1) ###################################### # Generate AWS data for the customer user. self.aws_customer_account = api_helper.generate_cloud_account( arn=util_helper.generate_dummy_arn(), user=self.customer_user, name="greatest account ever", created_at=self.two_weeks_ago, ) self.azure_customer_account = api_helper.generate_cloud_account( user=self.customer_user, name="meh account", created_at=self.two_weeks_ago, cloud_type="azure", azure_subscription_id=str(seeded_uuid4()), azure_tenant_id=str(seeded_uuid4()), ) self.customer_instances = [ api_helper.generate_instance(self.aws_customer_account), api_helper.generate_instance(self.aws_customer_account), api_helper.generate_instance(self.aws_customer_account), api_helper.generate_instance(self.azure_customer_account, cloud_type="azure"), api_helper.generate_instance(self.azure_customer_account, cloud_type="azure"), api_helper.generate_instance(self.azure_customer_account, cloud_type="azure"), ] # Generate events so we can see customer activity in the responses. # These events represent all customer instances starting one week ago, # stopping two days ago, and starting again yesterday. self.events = [] for instance in self.customer_instances[:2]: self.events.extend( api_helper.generate_instance_events( instance, [ (self.last_week, self.three_days_ago), (self.yesterday, None), ], )) for instance in self.customer_instances[3:6]: self.events.extend( api_helper.generate_instance_events( instance, [ (self.last_week, self.three_days_ago), (self.yesterday, None), ], cloud_type="azure", )) # Build the runs for the created events. # Note: this crude and *direct* implementation of Run-saving should be # replaced as we continue porting pilot functionality and (eventually) # better general-purpose Run-handling functions materialize. normalized_runs = normalize_runs(models.InstanceEvent.objects.all()) for normalized_run in normalized_runs: run = models.Run( start_time=normalized_run.start_time, end_time=normalized_run.end_time, machineimage_id=normalized_run.image_id, instance_id=normalized_run.instance_id, instance_type=normalized_run.instance_type, memory=normalized_run.instance_memory, vcpu=normalized_run.instance_vcpu, ) run.save() # Force all images to have RHEL detected ("7.7") self.images = list( set(instance.machine_image for instance in self.customer_instances if instance.machine_image is not None)) for image in self.images: image.inspection_json = json.dumps({ "rhel_enabled_repos_found": True, "rhel_version": "7.7", "syspurpose": { "role": "Red Hat Enterprise Linux Server", "service_level_agreement": "Premium", "usage": "Development/Test", }, }) image.status = image.INSPECTED image.region = "us-east-1" image.save() # Pre-calculate concurrent usage data for upcoming requests. # Calculate each day since "last week" (oldest date we use in example requests). the_date = self.last_week.date() one_day_delta = timedelta(days=1) # while the_date <= self.this_morning.date(): while the_date <= self.next_week.date(): task_id = f"calculate-concurrent-usage-{seeded_uuid4()}" models.ConcurrentUsageCalculationTask.objects.create( user_id=self.customer_user.id, date=the_date.isoformat(), task_id=task_id, status=models.ConcurrentUsageCalculationTask.COMPLETE, ) calculate_max_concurrent_usage(the_date, self.customer_user.id) the_date = the_date + one_day_delta
def save_instance_events(awsinstance, instance_data, events=None): """ Save provided events, and create the instance object if it does not exist. Note: This function assumes the images related to the instance events have already been created and saved. Args: awsinstance (AwsInstance): The Instance is associated with these InstanceEvents. instance_data (dict): Dictionary containing instance information. region (str): AWS Region. events (list[dict]): List of dicts representing Events to be saved. Returns: AwsInstance: Object representing the saved instance. """ from api.tasks import process_instance_event if events is None: with transaction.atomic(): awsevent = AwsInstanceEvent.objects.create( subnet=instance_data["SubnetId"], instance_type=instance_data["InstanceType"], ) InstanceEvent.objects.create( event_type=InstanceEvent.TYPE.power_on, occurred_at=get_now(), instance=awsinstance.instance.get(), content_object=awsevent, ) # This get is separate from the create to ensure the relationship # exists correctly even though it shouldn't strictly be necessary. event = awsevent.instance_event.get() process_instance_event(event) else: logger.info( _("Saving %(count)s new event(s) for %(instance)s"), { "count": len(events), "instance": awsinstance }, ) events = sorted(events, key=lambda e: e["occurred_at"]) have_instance_type = False for e in events: # Special case for "power on" events! If we have never saved the # instance type before, we need to try to get the type from the # described instance and use that on the event. if (have_instance_type is False and e["event_type"] == InstanceEvent.TYPE.power_on and e["instance_type"] is None and not AwsInstanceEvent.objects.filter( instance_event__instance__aws_instance=awsinstance, instance_event__occurred_at__lte=e["occurred_at"], instance_type__isnull=False, ).exists()): instance_type = instance_data.get("InstanceType") logger.info( _("Setting type %(instance_type)s for %(event_type)s " "event at %(occurred_at)s from EC2 instance ID " "%(ec2_instance_id)s"), { "instance_type": instance_type, "event_type": e.get("event_type"), "occurred_at": e.get("occurred_at"), "ec2_instance_id": awsinstance.ec2_instance_id, }, ) e["instance_type"] = instance_type have_instance_type = True awsevent = AwsInstanceEvent(subnet=e["subnet"], instance_type=e["instance_type"]) awsevent.save() instance = awsinstance.instance.get() event = InstanceEvent( instance=instance, event_type=e["event_type"], occurred_at=e["occurred_at"], content_object=awsevent, ) event.save() # Need to reload event from DB, otherwise occurred_at is passed # as a string instead of a datetime object. event.refresh_from_db() process_instance_event(event) return awsinstance
def check_cluster_instances_age(instance_ids): """ Check the age of the given ECS cluster EC2 instance IDs. This function returns nothing, but it will log an error for any instance that exists with a launch time ago older than the configured limit. Args: instance_ids (list): list of EC2 instance IDs """ if not instance_ids: return for instance_id in instance_ids: logger.info(_("Inspection cluster instance exists: %s"), instance_id) instances = describe_cluster_instances(instance_ids) age_limit = settings.INSPECTION_CLUSTER_INSTANCE_AGE_LIMIT now = get_now() for (ec2_instance_id, described_instance) in instances.items(): state = described_instance.get("State", {}).get("Name") launch_time = described_instance.get("LaunchTime") if not launch_time: logger.error( _("Inspection cluster instance %(ec2_instance_id)s has state " "%(state)s but no launch time."), { "ec2_instance_id": ec2_instance_id, "state": state }, ) continue launch_age = round((now - launch_time).total_seconds(), 1) if launch_age > age_limit: logger.error( _("Inspection cluster instance %(ec2_instance_id)s has state " "%(state)s and launched %(launch_age)s seconds ago at " "%(launch_time)s. This exceeds our configured limit of " "%(age_limit)s seconds by %(delta)s seconds."), { "ec2_instance_id": ec2_instance_id, "state": state, "launch_time": launch_time, "launch_age": launch_age, "age_limit": age_limit, "delta": round(launch_age - age_limit, 1), }, ) else: logger.debug( _("Inspection cluster instance %(ec2_instance_id)s has state " "%(state)s and launched %(launch_age)s seconds ago at " "%(launch_time)s. This fits within our configured limit of " "%(age_limit)s seconds by %(delta)s seconds."), { "ec2_instance_id": ec2_instance_id, "state": state, "launch_time": launch_time, "launch_age": launch_age, "age_limit": age_limit, "delta": round(age_limit - launch_age, 1), }, )
def generate_cloud_account( # noqa: C901 arn=None, aws_account_id=None, user=None, name=None, created_at=None, platform_authentication_id=None, platform_application_id=None, platform_source_id=None, is_enabled=True, enabled_at=None, verify_task=None, generate_verify_task=True, cloud_type=AWS_PROVIDER_STRING, azure_subscription_id=None, azure_tenant_id=None, ): """ Generate an CloudAccount for testing. Any optional arguments not provided will be randomly generated. Args: arn (str): Optional ARN. aws_account_id (12-digit string): Optional AWS account ID. user (User): Optional Django auth User to be this account's owner. name (str): Optional name for this account. created_at (datetime): Optional creation datetime for this account. platform_authentication_id (int): Optional platform source authentication ID. platform_application_id (int): Optional platform source application ID. platform_source_id (int): Optional platform source source ID. is_enabled (bool): Optional should the account be enabled. enabled_at (datetime): Optional enabled datetime for this account. verify_task (PeriodicTask): Optional Celery verify task for this account. generate_verify_task (bool): Optional should a verify_task be generated here. cloud_type (str): Str denoting cloud type, defaults to "aws" azure_subscription_id (str): optional uuid str for azure subscription id azure_tenant_id (str): optional uuid str for azure tenant id Returns: CloudAccount: The created Cloud Account. """ if user is None: user = helper.generate_test_user() if name is None: name = str(uuid.uuid4()) if created_at is None: created_at = get_now() if enabled_at is None: enabled_at = created_at if platform_authentication_id is None: platform_authentication_id = _faker.pyint() if platform_application_id is None: platform_application_id = _faker.pyint() if platform_source_id is None: platform_source_id = _faker.pyint() if cloud_type == AZURE_PROVIDER_STRING: if azure_subscription_id is None: azure_subscription_id = uuid.uuid4() if azure_tenant_id is None: azure_tenant_id = uuid.uuid4() cloud_provider_account = AzureCloudAccount.objects.create( subscription_id=azure_subscription_id, tenant_id=azure_tenant_id) # default to AWS else: if arn is None: arn = helper.generate_dummy_arn(account_id=aws_account_id) if verify_task is None and generate_verify_task: schedule, _ = IntervalSchedule.objects.get_or_create( every=settings.SCHEDULE_VERIFY_VERIFY_TASKS_INTERVAL, period=IntervalSchedule.SECONDS, ) verify_task, _ = PeriodicTask.objects.get_or_create( interval=schedule, name=f"Verify {arn}.", task="api.clouds.aws.tasks.verify_account_permissions", kwargs=json.dumps({ "account_arn": arn, }), defaults={"start_time": created_at}, ) cloud_provider_account = AwsCloudAccount.objects.create( account_arn=arn, aws_account_id=aws.AwsArn(arn).account_id, verify_task=verify_task, ) cloud_provider_account.created_at = created_at cloud_provider_account.save() cloud_account = CloudAccount.objects.create( user=user, name=name, content_object=cloud_provider_account, platform_authentication_id=platform_authentication_id, platform_application_id=platform_application_id, platform_source_id=platform_source_id, is_enabled=is_enabled, ) cloud_account.created_at = created_at cloud_account.save() if enabled_at: cloud_account.enabled_at = enabled_at cloud_account.save() return cloud_account