def test_multi_process_multi_epochs(self): metric1 = Metric("fakemetric1", "faketype1") metric2 = Metric("fakemetric2", "faketype2") # epoch 1 self.monitor.record_metrics(111, {metric1: 1.2}) self.monitor.record_metrics(222, {metric2: 2.87}) expected_history = {111: {metric1: [1.2]}, 222: {metric2: [2.87]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {}) # epoch 2 self.monitor.record_metrics(111, {metric1: 1.6}) self.monitor.record_metrics(222, {metric2: 2.92}) expected_history = { 111: { metric1: [1.2, 1.6] }, 222: { metric2: [2.87, 2.92] } } self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {})
def test_multi_process_single_epoch(self): metric1 = Metric('fakemetric1', 'faketype1') metric2 = Metric('fakemetric2', 'faketype2') self.monitor.record_metrics(111, {metric1: 1.2}) self.monitor.record_metrics(222, {metric2: 2.87}) expected_history = {111: {metric1: [1.2]}, 222: {metric2: [2.87]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {})
def test_multiple_process_multiple_epochs(self): metric1 = Metric("fakemetric1", "faketype1") metric2 = Metric("fakemetric2", "faketype2") # epoch 1 metrics1 = {metric1: 21} metrics2 = {metric2: 100.0} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = {1: {metric1: [21]}, 2: {metric2: [100.0]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual( self.monitor._ProcessMonitor__aggregated_metrics, { metric1: 21, metric2: 100.0 }, ) # epoch 2 # before epoch 2, the reset is called for absolute metrics self.monitor._reset_absolute_metrics() metrics1 = {metric1: 21.11} metrics2 = {metric2: 100.11} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = { 1: { metric1: [21, 21.11] }, 2: { metric2: [100.0, 100.11] } } self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual( self.monitor._ProcessMonitor__aggregated_metrics, { metric1: 21.11, metric2: 100.11 }, )
def test_single_process_multiple_epoch(self): metric = Metric("fakemetric", "faketype") # epoch 1 metrics = {metric: 21} self.monitor.record_metrics(555, metrics) self.monitor._ProcessMonitor__pids = [555] self.monitor._calculate_aggregated_metrics() expected_history = {555: {metric: [21]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric: 21}) # epoch 2 # before epoch 2, the reset is called for absolute metrics self.monitor._reset_absolute_metrics() metrics = {metric: 21.5} self.monitor.record_metrics(555, metrics) self.monitor._ProcessMonitor__pids = [555] self.monitor._calculate_aggregated_metrics() expected_history = {555: {metric: [21, 21.5]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric: 21.5})
def get_agent_status_metrics(process): # type: (psutil.Process) -> dict """ Retrieve additional agent related metrics utilizing agent status functionality. """ result = {} # Request json format agent_data_path = os.path.expanduser("~/scalyr-agent-dev/data") status_format_file = os.path.join(agent_data_path, "status_format") with open(status_format_file, "w") as fp: fp.write(six.text_type("json")) # Ask agent to dump metrics os.kill(process.pid, signal.SIGUSR1) # Wait a bit for agent to write the metrics and parse the metrics time.sleep(2) status_file = os.path.join(agent_data_path, "last_status") with open(status_file, "r") as fp: content = fp.read() content = json.loads(content) # NOTE: Currently we only capture gc metrics metric_gc_garbage = Metric(name="app.gc.garbage", _type=None) result[metric_gc_garbage] = content.get("gc_stats", {}).get("garbage", 0) return result
def test_single_process_single_epoch(self): metric = Metric("fakemetric", "faketype") metrics = {metric: 21} self.monitor.record_metrics(555, metrics) expected_history = {555: {metric: [21]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {})
def test_multiple_process_multiple_epochs_cumulative_metrics_all_process_death( self): """ Same as test_multiple_process_multiple_epochs_cumulative_metrics_one_process_death but all processes die after epoch 2 """ metric1 = Metric('app.cpu', 'system') # epoch 1 metrics1 = {metric1: 20} metrics2 = {metric1: 40} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = {1: {metric1: [20]}, 2: {metric1: [40]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric1: 0}) # epoch 2 # before epoch 2, the reset is called for absolute metrics self.monitor._reset_absolute_metrics() metrics1 = {metric1: 25} metrics2 = {metric1: 46} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = {1: {metric1: [20, 25]}, 2: {metric1: [40, 46]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric1: (25 - 20) + (46 - 40)}) # epoch 3 self.monitor._reset_absolute_metrics() metrics1 = {metric1: 23} metrics2 = {metric1: 43} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) # Process 1 and 2 die.. boom # we should ensure the total running value for metric doesn't go down. self.monitor._ProcessMonitor__pids = [] self.monitor._calculate_aggregated_metrics() # we only keep the last 2 historical values expected_history = {1: {metric1: [25, 23]}, 2: {metric1: [46, 43]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric1: (25 - 20) + (46 - 40)})
def test_single_process_single_epoch(self): metric = Metric('fakemetric', 'faketype') metrics = {metric: 21} self.monitor.record_metrics(555, metrics) self.monitor._ProcessMonitor__pids = [555] self.monitor._calculate_aggregated_metrics() expected_history = {555: {metric: [21]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric: 21})
def get_additional_psutil_metrics(process): # type: (psutil.Process) -> Dict[Metric, T_metric_value] """ Capture any additional metrics which are currently not exposed via Linux Process Metric tracker using psutil. """ # Capture and calculate shared and private memory usage metric_shared = Metric(name="app.mem.bytes", _type="resident_shared") metric_private = Metric(name="app.mem.bytes", _type="resident_private") result = { metric_shared: 0, metric_private: 0, } # type: Dict[Metric, T_metric_value] memory_maps = process.memory_maps() for memory_map in memory_maps: result[ metric_shared] += memory_map.shared_clean + memory_map.shared_dirty result[ metric_private] += memory_map.private_clean + memory_map.private_dirty return result
def test_multiple_process_multiple_epochs_cumulative_metrics(self): metric1 = Metric("app.cpu", "system") # epoch 1 metrics1 = {metric1: 20} metrics2 = {metric1: 40} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = {1: {metric1: [20]}, 2: {metric1: [40]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric1: 0}) # epoch 2 # before epoch 2, the reset is called for absolute metrics self.monitor._reset_absolute_metrics() metrics1 = {metric1: 22} metrics2 = {metric1: 44} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = {1: {metric1: [20, 22]}, 2: {metric1: [40, 44]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual( self.monitor._ProcessMonitor__aggregated_metrics, {metric1: (22 - 20) + (44 - 40)}, ) # epoch 3 self.monitor._reset_absolute_metrics() metrics1 = {metric1: 25} metrics2 = {metric1: 48} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() # we only keep the last 2 historical values expected_history = {1: {metric1: [22, 25]}, 2: {metric1: [44, 48]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual( self.monitor._ProcessMonitor__aggregated_metrics, {metric1: (22 - 20) + (44 - 40) + (25 - 22) + (48 - 44)}, )
def test_single_process_multiple_epochs(self): metric = Metric("fakemetric", "faketype") # epoch 1 self.monitor.record_metrics(777, {metric: 1.2}) expected_history = {777: {metric: [1.2]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {}) # epoch 2 self.monitor.record_metrics(777, {metric: 1.9}) expected_history = {777: {metric: [1.2, 1.9]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {})
def test_basic_namedtuple_access(self): m = Metric("abc", 123) # Ensure name and type fields are present self.assertEquals(m.name, "abc") self.assertEquals(m.type, 123) # Non-existent self.assertRaises(AttributeError, lambda: m.asdf) # pylint: disable=no-member # Ensure cannot mutate def mutate(): m.name = "mutated value" self.assertRaises(AttributeError, lambda: mutate())
def test_basic_namedtuple_access(self): m = Metric('abc', 123) # Ensure name and type fields are present self.assertEquals(m.name, 'abc') self.assertEquals(m.type, 123) # Non-existent self.assertRaises(AttributeError, lambda: m.asdf) # Ensure cannot mutate def mutate(): m.name = 'mutated value' self.assertRaises(AttributeError, lambda: mutate())
def test_gather_sample(self): stat_reader = StatReader(pid=1, monitor_id=1, logger=None) stat_reader._StatReader__get_uptime_ms = lambda: 5 * 60 * 60 * 1000 stat_file = os.path.join(FIXTURES_DIR, "proc_1125_stat") with open(stat_file, "r") as fp: result = stat_reader.gather_sample(stat_file=fp) self.assertEqual(len(result), 7) self.assertEqual(result[Metric("app.cpu", "user")], 0) self.assertEqual(result[Metric("app.cpu", "system")], 277) self.assertEqual(result[Metric("app.uptime", None)], 17993090) self.assertEqual(result[Metric("app.nice", None)], -20.0) self.assertEqual(result[Metric("app.threads", None)], 1) self.assertEqual(result[Metric("app.mem.majflt", None)], 0) self.assertEqual(result[Metric("app.io.wait", None)], 0)
def test_metric_as_object_key(self): """ This protects against a bug introduced in v2.0.47 whereby the Metric object didn't implement __hash__ and __eq__ dunder methods. Metric is repeatedly used as a key during linux process metrics generation. Without those dunder methods, the key/val is never replaced and the dictionary keeps growing. (Reference AGENT-142) """ dd = {} for x in range(1000): dd[Metric("name1a", "name1b")] = x dd[Metric("name2a", "name2b")] = x dd[Metric("name3a", "name3b")] = x dd[Metric("name4a", "name4b")] = x dd[Metric("name5a", "name5b")] = x self.assertEqual(len(dd), 5) self.assertEqual(dd[Metric("name1a", "name1b")], 999) self.assertEqual(dd[Metric("name2a", "name2b")], 999) self.assertEqual(dd[Metric("name3a", "name3b")], 999) self.assertEqual(dd[Metric("name4a", "name4b")], 999) self.assertEqual(dd[Metric("name5a", "name5b")], 999) dd = {} for x in range(1000): dd[Metric("name1a", "name1b")] = 1 dd[Metric("name2a", "name2b")] = 1 dd[Metric("name3a", "name3b")] = 1 dd[Metric("name4a", "name4b")] = 1 dd[Metric("name5a", "name5b")] = 1 self.assertEqual(len(dd), 5) self.assertEqual(dd[Metric("name1a", "name1b")], 1) self.assertEqual(dd[Metric("name2a", "name2b")], 1) self.assertEqual(dd[Metric("name3a", "name3b")], 1) self.assertEqual(dd[Metric("name4a", "name4b")], 1) self.assertEqual(dd[Metric("name5a", "name5b")], 1)
from scalyr_agent.builtin_monitors.linux_process_metrics import ProcessTracker from scalyr_agent.builtin_monitors.linux_process_metrics import Metric from utils import initialize_logging from utils import add_common_parser_arguments from utils import parse_auth_credentials from utils import parse_commit_date from utils import send_payload_to_codespeed logger = logging.getLogger(__name__) METRICS_GAUGES = { # CPU usage related metrics "cpu_threads": Metric(name="app.threads", _type=None), # Memory usage related metrics "memory_usage_rss": Metric(name="app.mem.bytes", _type="resident"), "memory_usage_rss_shared": Metric(name="app.mem.bytes", _type="resident_shared"), "memory_usage_rss_private": Metric(name="app.mem.bytes", _type="resident_private"), "memory_usage_vms": Metric(name="app.mem.bytes", _type="vmsize"), # IO related metrics "io_open_fds": Metric(name="app.io.fds", _type="open"), # GC related metrics "gc_garbage": Metric(name="app.gc.garbage", _type=None),
def test_multiple_process_multiple_epochs_cumulative_metrics_one_process_death( self, ): """ Same as test_multiple_process_multiple_epochs_cumulative_metrics but one process dies after epoch 2 """ metric1 = Metric("app.cpu", "system") # epoch 1 metrics1 = {metric1: 21} metrics2 = {metric1: 100.0} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = {1: {metric1: [21]}, 2: {metric1: [100.0]}} self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual(self.monitor._ProcessMonitor__aggregated_metrics, {metric1: 0}) # epoch 2 # before epoch 2, the reset is called for absolute metrics self.monitor._reset_absolute_metrics() metrics1 = {metric1: 30.1} metrics2 = {metric1: 100.2} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) self.monitor._ProcessMonitor__pids = [1, 2] self.monitor._calculate_aggregated_metrics() expected_history = { 1: { metric1: [21, 30.1] }, 2: { metric1: [100.0, 100.2] } } self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual( self.monitor._ProcessMonitor__aggregated_metrics, {metric1: (30.1 - 21) + (100.2 - 100.0)}, ) # epoch 3 self.monitor._reset_absolute_metrics() metrics1 = {metric1: 26.0} metrics2 = {metric1: 103} self.monitor.record_metrics(1, metrics1) self.monitor.record_metrics(2, metrics2) # Process 1 dies.. boom self.monitor._ProcessMonitor__pids = [2] self.monitor._calculate_aggregated_metrics() # we only keep the last 2 historical values expected_history = { 1: { metric1: [30.1, 26.0] }, 2: { metric1: [100.2, 103] } } self.assertEqual(self.monitor._ProcessMonitor__metrics_history, expected_history) self.assertEqual( self.monitor._ProcessMonitor__aggregated_metrics, {metric1: (30.1 - 21) + (100.2 - 100.0) + (103 - 100.2)}, )