def __init__(self, reporter, environment=dict()): """ :param reporter: reporter object used for reporting profiles :param environment: dependency container dictionary for the current profiler :param profiling_group_name: (required inside environment) name of the profiling group :param memory_limit_bytes: (required inside environment) memory limit (Bytes) for profiler :param host_weight: (required inside environment) scale factor used to rescale the profile collected in this host to make the profile representative of the whole fleet :param timer: (required inside environment) timer to be used for metrics :param errors_metadata: (required inside environment) metadata capturing errors in the current profile. :param profile_factory: (inside environment) the factory to created profiler; default Profile. :param clock: (inside environment) clock to be used; default is time.time """ self.reporter = reporter self.profiling_group_name = environment["profiling_group_name"] self.host_weight = environment["host_weight"] self.timer = environment["timer"] self.errors_metadata = environment["errors_metadata"] self.profile_factory = environment.get("profile_factory") or Profile self.clock = environment.get("clock") or time.time self.profile = None self.memory_limit_bytes = environment["memory_limit_bytes"] self.last_report_attempted = current_milli_time(clock=self.clock) self.agent_start_time = current_milli_time(clock=self.clock) self.reset()
def resume(self): if self.last_resume is not None: # resume gets called when profile is running return self.last_resume = current_milli_time(clock=self._clock) self._paused_ms += self.last_resume - self.last_pause self.last_pause = None
def _check_memory_limit(self): if self.profile.get_memory_usage_bytes() > self.memory_limit_bytes: if self._is_under_min_reporting_time( current_milli_time(clock=self.clock)): raise OverMemoryLimitException( "Profiler memory usage limit has been reached") self.flush(force=True)
def assert_profile_is_reset(profile_factory, clock): profile_factory.assert_called_once_with( profiling_group_name=TEST_PROFILING_GROUP_NAME, host_weight=TEST_HOST_WEIGHT, sampling_interval_seconds=TEST_SAMPLING_INTERVAL, start=current_milli_time(clock), clock=clock)
def reset(self): self.profile = self.profile_factory( profiling_group_name=self.profiling_group_name, sampling_interval_seconds=AgentConfiguration.get( ).sampling_interval.total_seconds(), host_weight=self.host_weight, start=current_milli_time(clock=self.clock), clock=self.clock) self.timer.reset()
def get_active_millis_since_start(self): """ This returns the total "active" wall clock time since start. In AWS lambda the process can be frozen, the time while we are frozen should not be counted in here. In an EC2 or other type of host it is simply the wall clock time since start. If the "end" time has been set, we give the active time between start and end otherwise between start and now """ end = self.last_pause if self.last_pause is not None else \ self._end if self._end is not None else current_milli_time(clock=self._clock) return end - self.start - self._paused_ms
def test_it_samples_and_saves_a_profile_to_a_file(self): with \ patch( "codeguru_profiler_agent.reporter.agent_configuration.AgentConfiguration.is_under_min_reporting_time", return_value=False): file_prefix = str(Path(self.temporary_directory, FILE_PREFIX)) test_start_time = time_utils.current_milli_time() profiler = Profiler( profiling_group_name=DUMMY_TEST_PROFILING_GROUP_NAME, environment_override={ "initial_sampling_interval": timedelta(), "reporting_mode": "file", "file_prefix": file_prefix, 'agent_metadata': AgentMetadata(fleet_info=DefaultFleetInfo()) }) try: profiler.start() finally: profiler.stop() test_end_time = time_utils.current_milli_time() resulting_profile_path = str( Path(self.temporary_directory, os.listdir(self.temporary_directory)[0])) with (open(resulting_profile_path)) as profiling_result_file: resulting_json = json.loads(profiling_result_file.read()) self.assert_valid_agent_metadata(resulting_json["agentMetadata"]) assert test_start_time <= resulting_json[ "start"] <= resulting_json["end"] <= test_end_time assert frames_in_callgraph_are_in_expected_order( resulting_json["callgraph"], "test.help_utils:HelperThreadRunner:dummy_parent_method", "test.help_utils:HelperThreadRunner:dummy_method")
def reset(self): self.errors_metadata.reset() self.timer.reset() self.profile = self.profile_factory( profiling_group_name=self.profiling_group_name, sampling_interval_seconds=AgentConfiguration.get( ).sampling_interval.total_seconds(), host_weight=self.host_weight, start=current_milli_time(clock=self.clock), agent_debug_info=AgentDebugInfo(self.errors_metadata, self.agent_start_time, self.timer), clock=self.clock)
def add(self, sample): """ Merge Sample into the call graph and update profile end time pointing to the last sample time. """ self.total_attempted_sample_threads_count += \ sample.attempted_sample_threads_count self.total_seen_threads_count += \ sample.seen_threads_count self.total_sample_count += 1 for stack in sample.stacks: self._insert_stack(stack) self.end = current_milli_time(clock=self._clock)
def flush(self, force=False, reset=True): now = current_milli_time(clock=self.clock) reported = False if not force and not self._is_over_reporting_interval(now): return False if self._is_under_min_reporting_time(now): logger.info( "Dropping the profile as it is under the minimum reporting time" ) else: self._report_profile(now) reported = True if force or (reset and reported): self.reset() return reported
def get_active_millis_since_start(self): """ This returns the total "active" wall clock time since start. In AWS lambda the process can be frozen, the time while we are frozen should not be counted in here. In an EC2 or other type of host it is simply the wall clock time since start. Previously self.end was used to calculate active_millis_since_start but self.end is updated when a sample is added so in rare cases where a sample is collected before the last pause time then we might add additional pause time which can lead to incorrect calculation of active time. In worst cases, it can even lead to negative values depending on the pause time. Below is an example indicating the potential error of considering self.end in the calculation. ------------------------------------------------------------------ | | | | | | | | |||||| S P R P R SE P R REPORT s1_________|___p1___|____|____p2___|____e1____|___p3__|__________| S - Start P - Pause R - Resume SE - self.end (last sample) REPORT - Calculating active time. If we consider self.end which is e1 in above case then active time would be e1-s1-(p1+p2+p3). But pause p3 is after e1 so that leads to incorrect calculation of active time. Ideally we would want to set profile_end to be last sample time and subtract pause times only before that but it requires additional work in maintaining pause time which isn't worth as it makes the logic complex with very little gain. So we are setting it to current time and in some corner cases to last_pause time. """ end = self.last_pause if self.last_pause is not None else current_milli_time( clock=self._clock) active_time_millis_since_start = end - self.start - self._paused_ms logger.debug( "Active time since start is {activeTime} which is calculated using start: {start}, end: {end}, last_pause: {last_pause}, paused_ms: {paused_ms}, last_resume: {last_resume}" .format(activeTime=active_time_millis_since_start, start=self.start, end=self._end, last_pause=self.last_pause, paused_ms=self._paused_ms, last_resume=self.last_resume)) return active_time_millis_since_start
def pause(self): if self.last_pause is not None: # pause gets called when profile is paused return self.last_pause = current_milli_time(clock=self._clock) self.last_resume = None
from unittest.mock import MagicMock from botocore.stub import Stubber, ANY from codeguru_profiler_agent.reporter.agent_configuration import AgentConfigurationMerger from codeguru_profiler_agent.agent_metadata.agent_metadata import AgentMetadata, DefaultFleetInfo from codeguru_profiler_agent.agent_metadata.aws_lambda import LAMBDA_EXECUTION_ENV, \ LAMBDA_MEMORY_SIZE_ENV, LAMBDA_TASK_ROOT, LAMBDA_RUNTIME_DIR from codeguru_profiler_agent.reporter.agent_configuration import AgentConfiguration from codeguru_profiler_agent.sdk_reporter.sdk_reporter import SdkReporter from codeguru_profiler_agent.sdk_reporter.profile_encoder import ProfileEncoder from codeguru_profiler_agent.model.profile import Profile from codeguru_profiler_agent.codeguru_client_builder import CodeGuruClientBuilder profiling_group_name = "test-ProfilingGroup-name" autocreated_test_lambda_profiling_group_name = "aws-lambda-testLambdaName" profile = Profile(profiling_group_name, 1.0, 0.5, current_milli_time()) class TestSdkReporter: def before(self): codeguru_client_builder = CodeGuruClientBuilder( environment={"aws_session": boto3.session.Session()}) self.client_stubber = Stubber(codeguru_client_builder.codeguru_client) self.clear_lambda_specific_environment_variables_for_test_run() profile_encoder = MagicMock(name="profile_encoder", spec=ProfileEncoder) profile_encoder.encode.side_effect = lambda **args: args[ "output_stream"].write(b"test-profile-encoder-output")