def put_metric_alarms(self, resource: Resource, topic_arn: str): """ アラームを設定する 一つのメトリクスに対しNARUKOで取り扱う監視レベルの数だけアラームを設定する 最も低いレベルのアラームには復旧時のトリガーを設定する :param resource: :param topic_arn: :return: """ monitor = resource.monitors[0] for monitor_value in monitor.monitor_values: params = dict( AlarmName=CloudWatch.NARUKO_ALARM_NAME.format( resource.get_service_name(), resource.resource_id, monitor.metric.name, monitor_value.level.name), ActionsEnabled=monitor.enabled, AlarmActions=[topic_arn], MetricName=monitor.metric.name, Namespace=resource.get_namespace(), Statistic=monitor.statistic, Dimensions=[ dict(Name=resource.get_id_name(), Value=resource.resource_id) ], Period=monitor.period, EvaluationPeriods=1, Threshold=monitor_value.value, ComparisonOperator=monitor.metric.comparison_operator) # 最も安全なレベルのアラームには復旧時の通知を設定する if monitor_value.level.is_lowest_level(): params.update(dict(OKActions=[topic_arn])) self.client.put_metric_alarm(**params)
def get_metric_data(self, monitor_graph: MonitorGraph, resource: Resource, token: str = None): params = dict(MetricDataQueries=[ dict(Id=monitor_graph.metric_name.lower(), MetricStat=dict(Metric=dict( Namespace=resource.get_namespace(), MetricName=monitor_graph.metric_name, Dimensions=[ dict(Name=resource.get_id_name(), Value=resource.resource_id) ]), Period=monitor_graph.period, Stat=monitor_graph.stat)) ], StartTime=monitor_graph.start_time, EndTime=monitor_graph.end_time, ScanBy="TimestampAscending", MaxDatapoints=500) if token: params["NextToken"] = token response = self.client.get_metric_data(**params) return response
def describe_resource_monitors(self, resource: Resource): res = self.client.describe_alarms( AlarmNamePrefix=CloudWatch.NARUKO_ALARM_NAME_SPECIFY_INSTANCE. format(resource.get_service_name(), resource.resource_id)) # metricごとにアラームをグルーピングする grouped_alarms = dict() for alarm in res["MetricAlarms"]: # metric = alarm["Metrics"][0]["MetricStat"]["Metric"] alarms_by_metric = grouped_alarms.get(alarm["MetricName"], []) if alarms_by_metric: alarms_by_metric.append(alarm) else: alarms_by_metric.append(alarm) grouped_alarms[alarm["MetricName"]] = alarms_by_metric # alarmをmonitorに monitors = [] for metric in resource.get_metrics(): # metricの全レベルのアラーム metric_alarms = grouped_alarms.get(metric, [{}]) # 初期値:アラームが未設定の場合に使用する monitor_status = Monitor.MonitorStatus.UNSET monitor_values = { level.value: None for level in MonitorValue.MonitorLevel } # メトリクスにアラームが設定されている場合 if metric_alarms[0]: alarms_state = { state.name: [] for state in CloudWatch.AlarmState } for metric_alarm in metric_alarms: # 監視レベルの値を振り分ける level = metric_alarm["AlarmName"].rsplit('-', 1)[1] monitor_values[level.lower()] = metric_alarm["Threshold"] # アラームのステータスごとに監視レベルを振り分ける alarms_state[metric_alarm["StateValue"]].append(level) # アラームが発生しているもので最も監視レベルが高いステータスを取得する # アラームが発生していない場合はOK monitor_status = Monitor.MonitorStatus.max([ Monitor.MonitorStatus[level.upper()] for level in alarms_state[CloudWatch.AlarmState.ALARM.name] ]) monitors.append( Monitor(metric_name=metric, values=monitor_values, enabled=metric_alarms[0].get("ActionsEnabled"), period=metric_alarms[0].get("Period"), evaluation_period=metric_alarms[0].get( "EvaluationPeriods"), statistic=metric_alarms[0].get("Statistic"), status=monitor_status)) return monitors
def list(self, request, tenant_pk=None, aws_env_pk=None, region_pk=None, service_pk=None, resource_pk=None): log = NarukoLogging(request) logger = log.get_logger(__name__) logger.info("START: list") try: with transaction.atomic(): tenant = TenantModel.objects.get(id=tenant_pk) aws_env = AwsEnvironmentModel.objects.get(id=aws_env_pk, tenant=tenant) resource = Resource.get_service_resource(region_pk, service_pk, resource_pk) schedules = ControlScheduleUseCase(log).fetch_schedules(request.user, tenant, aws_env, resource) except (TypeError, ValueError, KeyError, ClientError, NarukoException) as e: # リクエストデータが不正 logger.exception(e) return Response(status=status.HTTP_400_BAD_REQUEST) except ObjectDoesNotExist as e: # AWS環境が存在しない logger.exception(e) return Response(status=status.HTTP_404_NOT_FOUND) except Exception as e: logger.exception(e) raise else: logger.info("END: list") return Response(data=[schedule.serialize() for schedule in schedules], status=status.HTTP_200_OK)
def list(self, request, tenant_pk=None, aws_env_pk=None, region_pk=None, service_pk=None, resource_pk=None): log = NarukoLogging(request) logger = log.get_logger(__name__) logger.info("START: list") try: aws = AwsEnvironmentModel.objects.get(id=aws_env_pk, tenant_id=tenant_pk) resource = Resource.get_service_resource(region_pk, service_pk, resource_pk) monitors = ControlMonitorUseCase(log).fetch_monitors( request.user, aws, resource) except (TypeError, ValueError, KeyError, NarukoException) as e: # リクエストデータが不正 logger.exception(e) return Response(status=status.HTTP_400_BAD_REQUEST) except ObjectDoesNotExist as e: # AWS環境が存在しない logger.exception(e) return Response(status=status.HTTP_404_NOT_FOUND) except Exception as e: logger.exception(e) raise else: logger.info("END: list") return Response(data=[monitor.serialize() for monitor in monitors], status=status.HTTP_200_OK)
def graph(self, request, tenant_pk=None, aws_env_pk=None, region_pk=None, service_pk=None, resource_pk=None, pk=None): log = NarukoLogging(request) logger = log.get_logger(__name__) logger.info("START: graph") try: aws_environment = AwsEnvironmentModel.objects.get( id=aws_env_pk, tenant_id=tenant_pk) resource = Resource.get_service_resource(region_pk, service_pk, resource_pk) monitor_graph = MonitorGraph(metric_name=pk, **request.data) monitor_graph = ControlMonitorUseCase(log).graph( request.user, resource, aws_environment, monitor_graph) except (TypeError, ValueError, KeyError, NarukoException) as e: # リクエストデータが不正 logger.exception(e) return Response(status=status.HTTP_400_BAD_REQUEST) except ObjectDoesNotExist as e: # AWS環境が存在しない logger.exception(e) return Response(status=status.HTTP_404_NOT_FOUND) except Exception as e: logger.exception(e) raise else: logger.info("END: graph") return Response(data=monitor_graph.serialize(), status=status.HTTP_200_OK)
def stop_resource(self, request_user: UserModel, aws_environment: AwsEnvironmentModel, resource: Resource): self.logger.info("START: stop_resource") tenant = aws_environment.tenant if not request_user.is_belong_to_tenant(tenant): raise PermissionDenied( "request user is not belong to tenant. user_id:{} tenant_id:{}" .format(request_user.id, tenant.id)) if not request_user.has_aws_env(aws_environment): raise PermissionDenied( "request user doesn't have aws environments. id:{}".format( request_user.id)) resource.stop(aws_environment) self.logger.info("END: stop_resource")
def test_create_monitor(self, use_case: mock.Mock): client = APIClient() user_model = UserModel.objects.get(email="test_email") client.force_authenticate(user=user_model) # Company1のIDを取得 tenant_id = TenantModel.objects.get(tenant_name="test_tenant_users_in_tenant_1").id # AWS環境のIDを取得 aws_id = AwsEnvironmentModel.objects.get(aws_account_id="test_aws1").id # mock準備 create_monitor = use_case.return_value.save_monitor resource = Resource.get_service_resource("ap-northeast-1", "ec2", "i-123456789012") resource.monitors.append(Monitor( "test_name", {"caution": 60, "danger": 90}, True, 300, 1, 'Average' )) create_monitor.return_value = resource # 検証対象の実行 response = client.post( path=self.api_path.format(tenant_id, aws_id), data={ "metric_name": "test_name", "values": { "caution": 60, "danger": 90 }, "enabled": True, "period": 300, "evaluation_period": 1, "statistic": 'Average' }, format='json') use_case.assert_called_once() create_monitor.assert_called_once() self.assertEqual(response.status_code, 200) self.assertEqual(response.data, { "metric_name": "test_name", "values": { "danger": 90, "caution": 60 }, "enabled": True, "period": 300, "evaluation_period": 1, "statistic": 'Average', "comparison_operator": "GreaterThanOrEqualToThreshold", "status": None })
def addResource(_id, userId, courseId, resName="name", resPath="", resIntroduction=""): if (os.path.exists(resPath) == False): return "找不到文件!" fsize = round(os.path.getsize(resPath) / float(1024 * 1024), 2) curTime = time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) temp = Resource(id=_id, name=resName, link=resPath, introduce=resIntroduction, size=fsize, uploadTime=curTime, upload_user_id=userId, course_id=courseId) temp.save() return "添加成功!"
def describe_load_balancer(self, name: str): # response = self.client.describe_load_balancers( # Names=[name] # ) # # load_balancer = response['LoadBalancers'][0] resource = Resource.get_service_resource(self.region, 'elb', name) resource.name = name return resource
def graph(self, request_user: UserModel, resource: Resource, aws: AwsEnvironmentModel, monitor_graph: MonitorGraph): self.logger.info("START: graph") # 使用できるAWSアカウントか if not request_user.has_aws_env(aws): raise PermissionDenied( "request user can't use aws account. user_id: {}, aws_id: {}". format(request_user.id, aws.id)) if monitor_graph.metric_name not in resource.get_metrics(): raise ObjectDoesNotExist( "service doesn't have metric service_type: {} metric: {}". format(resource.get_service_name(), monitor_graph.metric_name)) monitor_graph = CloudWatch(aws, resource.region).get_chart( monitor_graph, resource) self.logger.info("END: graph") return monitor_graph
def describe_instance(self, instance_id: str): response = self.client.describe_db_instances( DBInstanceIdentifier=instance_id) instance = response['DBInstances'][0] instance_state = instance.get('DBInstanceStatus') resource = Resource.get_service_resource(self.region, self._service_name(), instance_id) resource.state = instance_state resource.name = instance_id return resource
def create_backup(self, request_user: UserModel, aws_environment: AwsEnvironmentModel, resource: Resource, no_reboot: bool): self.logger.info("START: create_backup") tenant = aws_environment.tenant if not request_user.is_belong_to_tenant(tenant): raise PermissionDenied( "request user is not belong to tenant. user_id:{} tenant_id:{}" .format(request_user.id, tenant.id)) if not request_user.has_aws_env(aws_environment): raise PermissionDenied( "request user doesn't have aws environments. id:{}".format( request_user.id)) backup_id = resource.create_backup(aws_environment, no_reboot=no_reboot) self.logger.info("END: create_backup") return backup_id
def list(self, request, tenant_pk=None, aws_env_pk=None, region_pk=None, service_pk=None, resource_pk=None): log = NarukoLogging(request) logger = log.get_logger(__name__) logger.info("START: list") aws = AwsEnvironmentModel.objects.get(id=aws_env_pk, tenant_id=tenant_pk) resource = Resource.get_service_resource(region_pk, service_pk, resource_pk) monitors = ControlMonitorUseCase(log).fetch_monitors( request.user, aws, resource) logger.info("END: list") return Response(data=[monitor.serialize() for monitor in monitors], status=status.HTTP_200_OK)
def list(self, request, tenant_pk=None, aws_env_pk=None, region_pk=None, service_pk=None, resource_pk=None): log = NarukoLogging(request) logger = log.get_logger(__name__) logger.info("START: list") tenant = TenantModel.objects.get(id=tenant_pk) aws_env = AwsEnvironmentModel.objects.get(id=aws_env_pk, tenant=tenant) resource = Resource.get_service_resource(region_pk, service_pk, resource_pk) schedules = ControlScheduleUseCase(log).fetch_schedules( request.user, tenant, aws_env, resource) logger.info("END: list") return Response(data=[schedule.serialize() for schedule in schedules], status=status.HTTP_200_OK)
def describe_instance(self, instance_id: str): response = self.client.describe_instances( Filters=[ {'Name': 'instance-id', 'Values': [instance_id]} ] ) instance = response['Reservations'][0]['Instances'][0] tag = self.convert_tag(instance.get("Tags", [])) name = tag.get("Name", instance_id) instance_state = instance.get('State', {}).get('Name') resource = Resource.get_service_resource(self.region, self._service_name(), instance_id) resource.name = name resource.state = instance_state return resource
def create(self, request, tenant_pk=None, aws_env_pk=None, region_pk=None, service_pk=None, resource_pk=None): log = NarukoLogging(request) logger = log.get_logger(__name__) logger.info("START: create") aws_environment = AwsEnvironmentModel.objects.get(id=aws_env_pk, tenant_id=tenant_pk) resource = Resource.get_service_resource(region_pk, service_pk, resource_pk) resource.monitors.append(Monitor(**request.data)) resource = ControlMonitorUseCase(log).save_monitor( request.user, resource, aws_environment) logger.info("END: create") return Response(data=resource.monitors[0].serialize(), status=status.HTTP_200_OK)
def graph(self, request, tenant_pk=None, aws_env_pk=None, region_pk=None, service_pk=None, resource_pk=None, pk=None): log = NarukoLogging(request) logger = log.get_logger(__name__) logger.info("START: graph") aws_environment = AwsEnvironmentModel.objects.get(id=aws_env_pk, tenant_id=tenant_pk) resource = Resource.get_service_resource(region_pk, service_pk, resource_pk) monitor_graph = MonitorGraph(metric_name=pk, **request.data) monitor_graph = ControlMonitorUseCase(log).graph( request.user, resource, aws_environment, monitor_graph) logger.info("END: graph") return Response(data=monitor_graph.serialize(), status=status.HTTP_200_OK)
def fetch_resources(self, request_user: UserModel, aws_environment: AwsEnvironmentModel, region: str) -> list: self.logger.info("START: fetch resources") if not request_user.is_belong_to_tenant(aws_environment.tenant): raise PermissionDenied( "request user is not belong to tenant. user_id:{} tenant_id:{}" .format(request_user.id, aws_environment.tenant.id)) if not request_user.has_aws_env(aws_environment): raise PermissionDenied( "request user doesn't have aws environments. id:{}".format( request_user.id)) tagging = ResourceGroupTagging(aws_environment=aws_environment, region=region) self.logger.info("ResourceGroupTagging Client Created.") resources = [] resources_status = None for get_resources in tagging.get_resources( Resource.get_all_services()): self.logger.info("got resource tags") if resources_status is None and get_resources: resources_status = CloudWatch( aws_environment=aws_environment, region=region).get_resources_status() self.logger.info("got cloudwatch alarms") for get_resource in get_resources: self.logger.info("resource tag convert response") # アラームがなければ未設定とする get_resource.status = resources_status[get_resource.get_service_name()].\ get(get_resource.resource_id, "UNSET") resources.append(get_resource) self.logger.info("END: fetch resources") return resources
def target_info(resource: Resource, aws_env: AwsEnvironmentModel): return "{}_{}_{}_{}_{}_{}".format(aws_env.name, aws_env.aws_account_id, resource.region, resource.get_service_name(), resource.resource_id, resource.monitors[0].metric.name)
def target_info(aws_env: AwsEnvironmentModel, resource: Resource): return "{}_{}_{}_{}_{}".format(aws_env.name, aws_env.aws_account_id, resource.region, resource.get_service_name(), resource.resource_id)