예제 #1
0
 def test_it_leaves_other_values_untouched(self):
     self.agent_config_merger.disable_profiling()
     assert AgentConfiguration.get().sampling_interval == timedelta(
         milliseconds=1)
     assert AgentConfiguration.get(
     ).minimum_time_reporting == timedelta(seconds=1)
     assert AgentConfiguration.get().reporting_interval == timedelta(
         minutes=1)
     assert AgentConfiguration.get().max_stack_depth == 998
예제 #2
0
 def assert_init_values(self):
     assert AgentConfiguration.get().should_profile is True
     assert AgentConfiguration.get().sampling_interval == timedelta(
         milliseconds=1)
     assert AgentConfiguration.get().minimum_time_reporting == timedelta(
         seconds=1)
     assert AgentConfiguration.get().reporting_interval == timedelta(
         minutes=1)
     assert AgentConfiguration.get().max_stack_depth == 998
예제 #3
0
 def test_agent_configuration_when_configure_agent_throws_error(self):
     self.client_stubber.add_client_error(
         'configure_agent',
         http_status_code=500,
         service_message='Simulated error in configure_agent call')
     with self.client_stubber:
         self.subject.refresh_configuration()
         assert AgentConfiguration.get().should_profile is True
         assert AgentConfiguration.get().sampling_interval == timedelta(
             seconds=13)
예제 #4
0
 def _refresh_configuration(self):
     self.collector.refresh_configuration()
     self.is_profiling_in_progress = AgentConfiguration.get().should_profile
     if self.is_profiling_in_progress:
         self.scheduler.update_delay_provider(
             lambda: AgentConfiguration.get().sampling_interval)
     else:
         # if we should not profile we can simply wait for the reporting interval and call again at that time.
         self.scheduler.update_delay_provider(
             lambda: AgentConfiguration.get().reporting_interval)
예제 #5
0
    def test_default_values_are_overridden_at_merge_with(self):
        agent_config_merger = AgentConfigurationMerger(default=self.config)
        self.assert_init_values()

        agent_config_merger.merge_with(
            configure_agent_response=self.configure_agent_response)
        assert AgentConfiguration.get().should_profile is False
        assert AgentConfiguration.get().sampling_interval == timedelta(
            milliseconds=2000)
        assert AgentConfiguration.get().minimum_time_reporting == timedelta(
            milliseconds=21000)
        assert AgentConfiguration.get().reporting_interval == timedelta(
            seconds=123)
        assert AgentConfiguration.get().max_stack_depth == 1001
예제 #6
0
    def __init__(self, environment=dict()):
        """
        :param environment: dependency container dictionary for the current profiler
        :param sampling_interval: (required inside environment) delay between profile reports in datetime.timedelta
        :param killswitch_filepath: (required inside environment) filepath pointing to the killswitch file. This path
            gets checked every time the profiler samples; the profiler is immediately stopped if this file exists.
        :param collector: (required inside environment) collector object to handle sample processing
        :param initial_sampling_interval: (required inside environment) Initial delay signal sampler takes for starting
        to sample
        :param profiler_thread_name: (required inside environment) Thread name used for running the
        report_orchestration_scheduler
        """
        self.timer = environment.get("timer")
        self.sampler = environment.get("sampler") or Sampler(
            environment=environment)

        self.scheduler = Scheduler(
            command=self._profiling_command,
            delay_provider=lambda: AgentConfiguration.get().sampling_interval,
            initial_delay=environment["initial_sampling_interval"],
            thread_name=environment["profiler_thread_name"])
        self.collector = environment["collector"]
        self.profiler_disabler = environment["profiler_disabler"]
        self.is_profiling_in_progress = False
        self._first_execution = True
    def is_overall_cpu_usage_limit_reached(self, profile=None):
        """
        This function carries out an overall cpu limit check that covers the cpu overhead caused for the full
        sampling cycle: refresh config -> (sample -> aggregate) * n -> profile submission. We expect this function to
        be called after profile submission.
        """
        profiler_metric = self.timer.metrics.get("runProfiler")
        if not profile or not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
            return False

        used_time_percentage = 100 * profiler_metric.total / (
            profile.get_active_millis_since_start() / 1000)

        cpu_limit_percentage = AgentConfiguration.get().cpu_limit_percentage

        if used_time_percentage >= cpu_limit_percentage:
            logger.debug(self.timer.metrics)
            logger.debug("Profile active seconds since start: {:.2f} s".format(
                profile.get_active_millis_since_start() / 1000))
            logger.info(
                "Profiler overall cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler."
                .format(used_time_percentage, cpu_limit_percentage))
            return True
        else:
            return False
 def test_it_sets_all_parameters(self):
     self.env['memory_limit_bytes'] = 42
     self.disabler = ProfilerDisabler(self.env)
     assert self.disabler.memory_limit_bytes == 42
     assert self.disabler.killswitch.killswitch_filepath == 'path_to_my_kill_switch'
     assert self.disabler.cpu_usage_check.timer == self.timer
     assert AgentConfiguration.get(
     ).cpu_limit_percentage == DEFAULT_CPU_LIMIT_PERCENTAGE
 def reset(self):
     self.profile = self.profile_factory(
         profiling_group_name=self.profiling_group_name,
         sampling_interval_seconds=AgentConfiguration.get(
         ).sampling_interval.total_seconds(),
         host_weight=self.host_weight,
         start=current_milli_time(clock=self.clock),
         clock=self.clock)
     self.timer.reset()
예제 #10
0
 def test_when_backend_sends_validation_exception_it_stops_the_profiling(
         self):
     self.client_stubber.add_client_error(
         'configure_agent',
         service_error_code='ValidationException',
         service_message='Simulated error in configure_agent call')
     with self.client_stubber:
         self.subject.refresh_configuration()
         assert AgentConfiguration.get().should_profile is False
예제 #11
0
 def test_when_backends_sends_resource_not_found_it_stops_the_profiling_in_non_lambda_case(
         self):
     self.client_stubber.add_client_error(
         'configure_agent',
         service_error_code='ResourceNotFoundException',
         service_message='Simulated error in configure_agent call')
     with self.client_stubber:
         self.subject.refresh_configuration()
         assert AgentConfiguration.get().should_profile is False
예제 #12
0
 def test_configure_agent_calls_the_client(self):
     response = {
         'configuration': {
             'agentParameters': {
                 'SamplingIntervalInMilliseconds': '91000',
                 'MinimumTimeForReportingInMilliseconds': '60000',
                 'MaxStackDepth': '1001'
             },
             'periodInSeconds': 123,
             'shouldProfile': False
         }
     }
     self.client_stubber.add_response('configure_agent', response)
     with self.client_stubber:
         self.subject.refresh_configuration()
         assert AgentConfiguration.get().should_profile is False
         assert AgentConfiguration.get().sampling_interval.total_seconds(
         ) == 91
예제 #13
0
    def is_cpu_usage_limit_reached(self, profile=None):
        profiler_metric = self.timer.metrics.get("runProfiler")
        if not profiler_metric or profiler_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
            return False

        sampling_interval_seconds = self._get_average_sampling_interval_seconds(
            profile)
        used_time_percentage = 100 * profiler_metric.average(
        ) / sampling_interval_seconds

        if used_time_percentage >= AgentConfiguration.get(
        ).cpu_limit_percentage:
            logger.debug(self.timer.metrics)
            logger.info(
                "Profiler cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler."
                .format(used_time_percentage,
                        AgentConfiguration.get().cpu_limit_percentage))
            return True
        else:
            return False
예제 #14
0
 def test_when_backends_sends_resource_not_found_it_does_not_stop_the_profiling_in_lambda_case(
         self):
     self.client_stubber.add_client_error(
         'configure_agent',
         service_error_code='ResourceNotFoundException',
         service_message='Simulated error in configure_agent call')
     os.environ.__setitem__(LAMBDA_TASK_ROOT, 'test-task-root')
     os.environ.__setitem__(LAMBDA_RUNTIME_DIR, 'test-dir')
     with self.client_stubber:
         self.subject.refresh_configuration()
         assert AgentConfiguration.get().should_profile is True
예제 #15
0
 def reset(self):
     self.errors_metadata.reset()
     self.timer.reset()
     self.profile = self.profile_factory(
         profiling_group_name=self.profiling_group_name,
         sampling_interval_seconds=AgentConfiguration.get(
         ).sampling_interval.total_seconds(),
         host_weight=self.host_weight,
         start=current_milli_time(clock=self.clock),
         agent_debug_info=AgentDebugInfo(self.errors_metadata,
                                         self.agent_start_time, self.timer),
         clock=self.clock)
예제 #16
0
    def test_a_user_override_is_not_overridden_at_merge(self):
        agent_config_merger = AgentConfigurationMerger(
            default=self.config, user_overrides=self.overide_config)
        assert AgentConfiguration.get().should_profile is True
        assert AgentConfiguration.get().sampling_interval == timedelta(
            seconds=9)
        assert AgentConfiguration.get().minimum_time_reporting == timedelta(
            seconds=1)
        assert AgentConfiguration.get().reporting_interval == timedelta(
            minutes=1)
        assert AgentConfiguration.get().max_stack_depth == 998

        agent_config_merger.merge_with(
            configure_agent_response=self.configure_agent_response)
        assert AgentConfiguration.get().should_profile is False
        assert AgentConfiguration.get().sampling_interval == timedelta(
            seconds=9)
        assert AgentConfiguration.get().minimum_time_reporting == timedelta(
            milliseconds=21000)
        assert AgentConfiguration.get().reporting_interval == timedelta(
            seconds=123)
        assert AgentConfiguration.get().max_stack_depth == 1001
    def _setup_final_environment(self, environment, environment_override):
        environment.update(environment_override)

        # set additional parameters if needed (costly default init or depend on other parameters)
        if environment.get('initial_sampling_interval') is None:
            environment['initial_sampling_interval'] = datetime.timedelta(
                seconds=SystemRandom().uniform(0, AgentConfiguration.get().sampling_interval.total_seconds()))
        environment['excluded_threads'] = \
            frozenset({environment['profiler_thread_name']}.union(environment['excluded_threads']))
        # TODO delay metadata lookup until we need it
        environment['agent_metadata'] = environment.get('agent_metadata') or AgentMetadata()
        environment['collector'] = environment.get('collector') or self._select_collector(environment)
        environment["profiler_disabler"] = environment.get('profiler_disabler') or ProfilerDisabler(environment)
        return UnmodifiableDict(environment)
    def test_when_orchestrator_says_no_to_profiler(self):
        self.agent_configuration = AgentConfiguration(
            should_profile=False,
            sampling_interval=timedelta(seconds=2),
            reporting_interval=timedelta(seconds=151))
        # calling start in this test, it will start the scheduler and because initial delay is 0 it will execute now
        self.profiler_runner.start()
        # still it is safer to wait until the new config has been applied
        wait_for(lambda: AgentConfiguration.get().reporting_interval.
                 total_seconds() == 151)
        wait_for(lambda: self.profiler_runner.scheduler.
                 _get_next_delay_seconds() == 151)

        assert self.profiler_runner.scheduler._get_next_delay_seconds() == 151
        self.mock_collector.add.assert_not_called()
예제 #19
0
 def assert_initial_values():
     assert AgentConfiguration.get().should_profile is True
     assert AgentConfiguration.get().sampling_interval == timedelta(
         seconds=1)
     assert AgentConfiguration.get().reporting_interval == timedelta(
         minutes=13)
     assert AgentConfiguration.get().minimum_time_reporting == timedelta(
         minutes=6)
     assert AgentConfiguration.get().max_stack_depth == 2345
     assert AgentConfiguration.get().cpu_limit_percentage == 29
예제 #20
0
    def sample(self):
        """
        Samples stack traces of running threads (up to max_threads, and excluding excluded_threads) running in the
        current Python instance. Any exception encountered during sampling process will be propagated.
        """
        all_threads = self._get_all_threads()
        all_threads_count = len(all_threads)
        threads_to_sample = self._threads_to_sample_from(all_threads)
        threads_to_sample_count = len(threads_to_sample)

        stacks = self._get_stacks(
            threads_to_sample=threads_to_sample,
            excluded_threads=self._excluded_threads,
            max_depth=AgentConfiguration.get().max_stack_depth)

        # Memory usage optimization
        del all_threads
        del threads_to_sample

        return Sample(stacks=stacks,
                      attempted_sample_threads_count=threads_to_sample_count,
                      seen_threads_count=all_threads_count)
    def is_sampling_cpu_usage_limit_reached(self, profile=None):
        sample_and_aggregate_metric = self.timer.metrics.get(
            "sampleAndAggregate")
        if not sample_and_aggregate_metric or \
                sample_and_aggregate_metric.counter < MINIMUM_MEASURES_IN_DURATION_METRICS:
            return False

        sampling_interval_seconds = self._get_average_sampling_interval_seconds(
            profile)
        used_time_percentage = 100 * sample_and_aggregate_metric.average(
        ) / sampling_interval_seconds

        cpu_limit_percentage = AgentConfiguration.get().cpu_limit_percentage

        if used_time_percentage >= cpu_limit_percentage:
            logger.debug(self.timer.metrics)
            logger.debug("Sampling interval seconds: {:.2f} s".format(
                sampling_interval_seconds))
            logger.info(
                "Profiler sampling cpu usage limit reached: {:.2f} % (limit: {:.2f} %), will stop CodeGuru Profiler."
                .format(used_time_percentage, cpu_limit_percentage))
            return True
        else:
            return False
예제 #22
0
    def test_beta_endpoint_call_report_and_refresh_and_overrides_default_agent_configuration(
            self):
        self.environment["agent_config_merger"] = AgentConfigurationMerger(
            default=self.agent_config)

        sdk_reporter = SdkReporter(self.environment)
        sdk_reporter.setup()
        self.assert_initial_values()
        assert sdk_reporter.report(self.profile) is True

        sdk_reporter.refresh_configuration()
        assert AgentConfiguration.get().should_profile is True
        assert AgentConfiguration.get().sampling_interval == timedelta(
            seconds=1)
        assert AgentConfiguration.get().reporting_interval == timedelta(
            minutes=5)
        assert AgentConfiguration.get().minimum_time_reporting == timedelta(
            seconds=60)
        assert AgentConfiguration.get().max_stack_depth == 1000
        assert AgentConfiguration.get().cpu_limit_percentage == 10
    def test_live_profiling(self):
        with \
                patch(
                    "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
                    return_value=False), \
                patch(
                    "codeguru_profiler_agent.sdk_reporter.sdk_reporter.SdkReporter.check_create_pg_called_during_submit_profile",
                    return_value=False), \
                patch(
                    "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration._is_reporting_interval_smaller_than_minimum_allowed",
                    return_value=False):

            profiler = Profiler(
                profiling_group_name=DUMMY_TEST_PROFILING_GROUP_NAME,
                region_name='eu-west-2',
                environment_override={
                    "initial_sampling_interval": timedelta(),
                    "sampling_interval": timedelta(seconds=1),
                    "reporting_interval": timedelta(seconds=2),
                    'agent_metadata':
                    AgentMetadata(fleet_info=DefaultFleetInfo())
                })

            client = profiler._profiler_runner.collector.reporter.codeguru_client_builder.codeguru_client
            aggregator = profiler._profiler_runner.collector

            assert AgentConfiguration.get().sampling_interval == timedelta(
                seconds=1)
            assert AgentConfiguration.get().reporting_interval == timedelta(
                seconds=2)

            with \
                    patch.object(client, "post_agent_profile",
                                 wraps=client.post_agent_profile) as wrapped_post_agent_profile, \
                    patch.object(client, "configure_agent",
                                 wraps=client.configure_agent) as wrapped_configure_agent, \
                    patch.object(aggregator, "add",
                                 wraps=aggregator.add) as wrapped_add, \
                    patch(
                        "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
                        return_value=False), \
                    patch(
                        "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration._is_reporting_interval_smaller_than_minimum_allowed",
                        return_value=False):

                wrapped_configure_agent.return_value = {
                    "configuration": {
                        "agentParameters": {
                            "SamplingIntervalInMilliseconds": "100",
                            "MinimumTimeForReportingInMilliseconds": "1000",
                            "MaxStackDepth": "1000",
                            "MemoryUsageLimitPercent": "29"
                        },
                        "periodInSeconds": 2,
                        "shouldProfile": True
                    }
                }

                try:
                    start_status = profiler.start()
                    assert start_status
                    assert profiler.is_running()
                    time.sleep(4)
                finally:
                    profiler.stop()

                # We should see at least 2 samples in 4 seconds as the sequence should happen in the order of
                # initial delay (1 second)
                # After 1 second, no flush -> sample
                # After 2 seconds, it attempt to flush (possibly succeed) -> sample/ no sample
                # After 3 seconds, it attempt to flush (must succeed if it did not flush before) -> no sample/ sample
                # After 4 seconds, no flush -> sample (if profiler has not stopped yet)
                assert wrapped_add.call_count >= 2
                assert wrapped_post_agent_profile.call_count >= 1
                assert wrapped_configure_agent.call_count >= 1
                assert AgentConfiguration.get().sampling_interval == timedelta(
                    seconds=1)
                assert AgentConfiguration.get(
                ).reporting_interval == timedelta(seconds=2)
 def test_given_override_is_used(self):
     self.handler({}, self.context)
     assert AgentConfiguration.get().cpu_limit_percentage == 42
 def test_it_throws_error_at_calling_get_when_singleton_is_none(self):
     setattr(codeguru_profiler_agent.reporter.agent_configuration,
             "_singleton", None)
     with pytest.raises(ValueError):
         AgentConfiguration.get()
예제 #26
0
 def _get_average_sampling_interval_seconds(profile):
     if profile is None or profile.total_sample_count < MINIMUM_SAMPLES_IN_PROFILE:
         return AgentConfiguration.get().sampling_interval.total_seconds()
     return (profile.get_active_millis_since_start() /
             profile.total_sample_count) / 1000
 def _is_over_reporting_interval(self, now):
     return AgentConfiguration.get().is_over_reporting_interval(
         now - self.last_report_attempted)
 def _is_under_min_reporting_time(self, now):
     return AgentConfiguration.get().is_under_min_reporting_time(
         now - self.last_report_attempted)
예제 #29
0
 def test_it_sets_should_profile_to_false(self):
     self.agent_config_merger.disable_profiling()
     assert AgentConfiguration.get().should_profile is False
    def test_live_profiling(self):
        with \
                patch(
                    "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
                    return_value=False), \
                patch(
                    "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration._is_reporting_interval_smaller_than_minimum_allowed",
                    return_value=False):

            profiler = Profiler(
                profiling_group_name=DUMMY_TEST_PROFILING_GROUP_NAME,
                region_name='eu-west-2',
                environment_override={
                    "initial_sampling_interval": timedelta(),
                    "sampling_interval": timedelta(seconds=1),
                    "reporting_interval": timedelta(seconds=2),
                    'agent_metadata':
                    AgentMetadata(fleet_info=DefaultFleetInfo())
                })

            client = profiler._profiler_runner.collector.reporter.codeguru_client_builder.codeguru_client
            aggregator = profiler._profiler_runner.collector

            assert AgentConfiguration.get().sampling_interval == timedelta(
                seconds=1)
            assert AgentConfiguration.get().reporting_interval == timedelta(
                seconds=2)

            with \
                    patch.object(client, "post_agent_profile",
                                 wraps=client.post_agent_profile) as wrapped_post_agent_profile, \
                    patch.object(client, "configure_agent",
                                 wraps=client.configure_agent) as wrapped_configure_agent, \
                    patch.object(aggregator, "add",
                                 wraps=aggregator.add) as wrapped_add, \
                    patch(
                        "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time",
                        return_value=False), \
                    patch(
                        "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration._is_reporting_interval_smaller_than_minimum_allowed",
                        return_value=False):

                wrapped_configure_agent.return_value = {
                    "configuration": {
                        "agentParameters": {
                            "SamplingIntervalInMilliseconds": "100",
                            "MinimumTimeForReportingInMilliseconds": "1000",
                            "MaxStackDepth": "1000",
                            "MemoryUsageLimitPercent": "29"
                        },
                        "periodInSeconds": 2,
                        "shouldProfile": True
                    }
                }

                try:
                    start_status = profiler.start()
                    assert start_status
                    assert profiler.is_running()
                    time.sleep(3)
                finally:
                    profiler.stop()

                assert wrapped_add.call_count >= 3
                assert wrapped_post_agent_profile.call_count >= 1
                assert wrapped_configure_agent.call_count >= 1
                assert AgentConfiguration.get().sampling_interval == timedelta(
                    seconds=1)
                assert AgentConfiguration.get(
                ).reporting_interval == timedelta(seconds=2)