def get_attributes(app, app_env_data): """ Get application attributes as a key-value dict :param app: application data (json) :param app_env_data: environment data for the application (json) :return: dict (attribute: value) """ return { 'app_id': app['id'], 'name': app['name'], 'start_time': app['attempts'][0]['startTime'], 'end_time': app['attempts'][0]['endTime'], 'duration': app['attempts'][0]['duration'], 'spark_user': app['attempts'][0]['sparkUser'], 'completed': app['attempts'][0]['completed'], 'runtime': get_prop(app_env_data, 'runtime'), 'spark_properties': get_prop(app_env_data, 'sparkProperties'), 'spark_command': get_system_property(app_env_data, app['id'], 'sun.java.command'), 'mode': app['mode'] }
def test_get_prop(): obj1 = { 'prop1': 'val1', 'prop2': { 'sub1': 'val3', 'sub2': { 'subsub1': 'val5', 'subsub2': 'val6' } } } assert utils.get_prop(obj1, "prop1") == "val1" assert utils.get_prop(obj1, "prop3") is None assert utils.get_prop(obj1, 'prop2', 'sub1') == 'val3' assert utils.get_prop(obj1, 'prop2', 'sub3') is None assert utils.get_prop(obj1, 'prop2', 'sub2', 'subsub2') is 'val6'
def get_attributes(app_id, stage, stage_job_mapping): """ Get stage attributes as a key-value dict :param app_id: application id (string) :param stage: stage data (json) :param stage_job_mapping: mapping of jobs to stages as dict(stage_key: job_key) :return: dict (attribute: value) """ return { 'stage_key': f"{app_id}_{stage['stageId']}", 'app_id': app_id, 'status': stage['status'], 'stage_id': stage['stageId'], 'attempt_id': stage['attemptId'], 'job_key': stage_job_mapping[f"{app_id}_{stage['stageId']}"], 'num_tasks': stage['numTasks'], 'num_active_tasks': stage['numActiveTasks'], 'num_complete_tasks': stage['numCompleteTasks'], 'num_failed_tasks': stage['numFailedTasks'], 'num_killed_tasks': stage['numKilledTasks'], 'num_completed_indices': stage['numCompletedIndices'], 'executor_run_time': stage['executorRunTime'], 'executor_cpu_time': stage['executorCpuTime'], 'submission_time': get_prop(stage, 'submissionTime'), 'first_task_launched_time': get_prop(stage, 'firstTaskLaunchedTime'), 'completion_time': get_prop(stage, 'completionTime'), 'failure_reason': get_prop(stage, 'failureReason'), 'input_bytes': stage['inputBytes'], 'input_records': stage['inputRecords'], 'output_bytes': stage['outputBytes'], 'output_records': stage['outputRecords'], 'shuffle_read_bytes': stage['shuffleReadBytes'], 'shuffle_read_records': stage['shuffleReadRecords'], 'shuffle_write_bytes': stage['shuffleWriteBytes'], 'shuffle_write_records': stage['shuffleWriteRecords'], 'memory_bytes_spilled': stage['memoryBytesSpilled'], 'disk_bytes_spilled': stage['diskBytesSpilled'], 'name': stage['name'], 'details': stage['details'], 'scheduling_pool': stage['schedulingPool'], 'rdd_ids': stage['rddIds'], 'accumulator_updates': stage['accumulatorUpdates'], 'killed_tasks_summary': stage['killedTasksSummary'] }
def get_attributes(app_id, executor): """ Get executor attributes as a key-value dict :param app_id: application id (string) :param executor: executor data (json) :return: dict (attribute: value) """ return { 'executor_key': f"{app_id}_{executor['id']}", 'app_id': app_id, 'id': executor['id'], 'host_port': executor['hostPort'], 'is_active': executor['isActive'], 'rdd_blocks': executor['rddBlocks'], 'memory_used': executor['memoryUsed'], 'disk_used': executor['diskUsed'], 'total_cores': executor['totalCores'], 'max_tasks': executor['maxTasks'], 'active_tasks': executor['activeTasks'], 'failed_tasks': executor['failedTasks'], 'total_duration': executor['totalDuration'], 'total_gc_time': executor['totalGCTime'], 'total_input_bytes': executor['totalInputBytes'], 'total_shuffle_read': executor['totalShuffleRead'], 'total_shuffle_write': executor['totalShuffleWrite'], 'is_blacklisted': executor['isBlacklisted'], 'max_memory': executor['maxMemory'], 'add_time': executor['addTime'], 'remove_time': get_prop(executor, 'removeTime'), 'remove_reason': get_prop(executor, 'removeReason'), 'executor_stdout_log': get_prop(executor, 'executorLogs', 'stdout'), 'executor_stderr_log': get_prop(executor, 'executorLogs', 'stderr'), 'used_on_heap_storage_memory': get_prop(executor, 'memoryMetrics', 'usedOnHeapStorageMemory'), 'used_off_heap_storage_memory': get_prop(executor, 'memoryMetrics', 'usedOffHeapStorageMemory'), 'total_on_heap_storage_memory': get_prop(executor, 'memoryMetrics', 'totalOnHeapStorageMemory'), 'total_off_heap_storage_memory': get_prop(executor, 'memoryMetrics', 'totalOffHeapStorageMemory'), 'blacklisted_in_stages': executor['blacklistedInStages'] }
def __init__(self, attributes): """ Create a Job object. :param attributes: dictionary {name: value} containing the attributes """ self.job_key = get_prop(attributes, "job_key") self.app_id = get_prop(attributes, "app_id") self.job_id = get_prop(attributes, "job_id") self.submission_time = get_prop(attributes, "submission_time") self.completion_time = get_prop(attributes, "completion_time") self.status = get_prop(attributes, "status") self.num_tasks = get_prop(attributes, "num_tasks") self.num_active_tasks = get_prop(attributes, "num_active_tasks") self.num_completed_tasks = get_prop(attributes, "num_completed_tasks") self.num_skipped_tasks = get_prop(attributes, "num_skipped_tasks") self.num_failed_tasks = get_prop(attributes, "num_failed_tasks") self.num_killed_tasks = get_prop(attributes, "num_killed_tasks") self.num_completed_indices = get_prop(attributes, "num_completed_indices") self.num_active_stages = get_prop(attributes, "num_active_stages") self.num_completed_stages = get_prop(attributes, "num_completed_stages") self.num_skipped_stages = get_prop(attributes, "num_skipped_stages") self.num_failed_stages = get_prop(attributes, "num_failed_stages") self.killed_tasks_summary = get_prop(attributes, "killed_tasks_summary")
def __init__(self, attributes): """ Create a StageExecutor object. :param attributes: dictionary {name: value} containing the attributes """ self.stage_executor_key = get_prop(attributes, "stage_executor_key") self.stage_key = get_prop(attributes, "stage_key") self.executor_key = get_prop(attributes, "executor_key") self.executor_id = get_prop(attributes, "executor_id") self.task_time = get_prop(attributes, "task_time") self.failed_tasks = get_prop(attributes, "failed_tasks") self.succeeded_tasks = get_prop(attributes, "succeeded_tasks") self.killed_tasks = get_prop(attributes, "killed_tasks") self.input_bytes = get_prop(attributes, "input_bytes") self.input_records = get_prop(attributes, "input_records") self.output_bytes = get_prop(attributes, "output_bytes") self.output_records = get_prop(attributes, "output_records") self.shuffle_read = get_prop(attributes, "shuffle_read") self.shuffle_read_records = get_prop(attributes, "shuffle_read_records") self.shuffle_write = get_prop(attributes, "shuffle_write") self.shuffle_write_records = get_prop(attributes, "shuffle_write_records") self.memory_bytes_spilled = get_prop(attributes, "memory_bytes_spilled") self.disk_bytes_spilled = get_prop(attributes, "disk_bytes_spilled") self.is_blacklisted_for_stage = get_prop(attributes, "is_blacklisted_for_stage")
def __init__(self, attributes): """ Create an Application object. :param attributes: dictionary {name: value} containing the attributes """ self.app_id = get_prop(attributes, 'app_id') self.name = get_prop(attributes, 'name') self.start_time = get_prop(attributes, 'start_time') self.end_time = get_prop(attributes, 'end_time') self.duration = get_prop(attributes, 'duration') self.spark_user = get_prop(attributes, 'spark_user') self.completed = get_prop(attributes, 'completed') self.runtime = get_prop(attributes, 'runtime') self.spark_properties = get_prop(attributes, 'spark_properties') self.spark_command = get_prop(attributes, 'spark_command') self.mode = get_prop(attributes, 'mode') self.is_processed = False self.stage_failure_metric = None self.stage_skew_metric = None self.stage_disk_spill_metric = None self.job_metrics = None self.driver_gc_time_metric = None self.executor_gc_time_metric = None self.executor_gc_time_metric = None self.serializer_metric = None self.dynamic_allocation_metric = None self.min_max_executors_metric = None self.yarn_queue_metric = None self.memory_config_metric = None self.core_number_metric = None self.duration_formatted = fmt_time(self.duration / 1000) # this dict should serve as an overview of metrics with severity >= LOW self.metrics_overview = {} # key: metric, value: severity self.overall_severity = Severity.NONE
def __init__(self, attributes): """ Create a StageStatistics object. :param attributes: dictionary {name: value} containing the attributes """ self.stage_key = get_prop(attributes, "stage_key") self.quantiles = get_prop(attributes, "quantiles") self.executor_deserialize_time = get_prop(attributes, "executor_deserialize_time") self.executor_deserialize_cpu_time = get_prop( attributes, "executor_deserialize_cpu_time") self.executor_run_time = get_prop(attributes, "executor_run_time") self.executor_cpu_time = get_prop(attributes, "executor_cpu_time") self.result_size = get_prop(attributes, "result_size") self.jvm_gc_time = get_prop(attributes, "jvm_gc_time") self.result_serialization_time = get_prop(attributes, "result_serialization_time") self.getting_result_time = get_prop(attributes, "getting_result_time") self.scheduler_delay = get_prop(attributes, "scheduler_delay") self.peak_execution_memory = get_prop(attributes, "peak_execution_memory") self.memory_bytes_spilled = get_prop(attributes, "memory_bytes_spilled") self.disk_bytes_spilled = get_prop(attributes, "disk_bytes_spilled") self.bytes_read = get_prop(attributes, "bytes_read") self.records_read = get_prop(attributes, "records_read") self.bytes_written = get_prop(attributes, "bytes_written") self.records_written = get_prop(attributes, "records_written") self.shuffle_read_bytes = get_prop(attributes, "shuffle_read_bytes") self.shuffle_read_records = get_prop(attributes, "shuffle_read_records") self.shuffle_remote_blocks_fetched = get_prop( attributes, "shuffle_remote_blocks_fetched") self.shuffle_local_blocks_fetched = get_prop( attributes, "shuffle_local_blocks_fetched") self.shuffle_fetch_wait_time = get_prop(attributes, "shuffle_fetch_wait_time") self.shuffle_remote_bytes_read = get_prop(attributes, "shuffle_remote_bytes_read") self.shuffle_remote_bytes_read_to_disk = get_prop( attributes, "shuffle_remote_bytes_read_to_disk") self.shuffle_total_blocks_fetched = get_prop( attributes, "shuffle_total_blocks_fetched") self.shuffle_write_bytes = get_prop(attributes, "shuffle_write_bytes") self.shuffle_write_records = get_prop(attributes, "shuffle_write_records") self.shuffle_write_time = get_prop(attributes, "shuffle_write_time")
def __init__(self, attributes): """ Create a Stage object. :param attributes: dictionary {name: value} containing the attributes """ self.stage_key = get_prop(attributes, "stage_key") self.app_id = get_prop(attributes, "app_id") self.status = get_prop(attributes, "status") self.stage_id = get_prop(attributes, "stage_id") self.attempt_id = get_prop(attributes, "attempt_id") self.job_key = get_prop(attributes, "job_key") self.num_tasks = get_prop(attributes, "num_tasks") self.num_active_tasks = get_prop(attributes, "num_active_tasks") self.num_complete_tasks = get_prop(attributes, "num_complete_tasks") self.num_failed_tasks = get_prop(attributes, "num_failed_tasks") self.num_killed_tasks = get_prop(attributes, "num_killed_tasks") self.num_completed_indices = get_prop(attributes, "num_completed_indices") self.executor_run_time = get_prop(attributes, "executor_run_time") self.executor_cpu_time = get_prop(attributes, "executor_cpu_time") self.submission_time = get_prop(attributes, "submission_time") self.first_task_launched_time = get_prop(attributes, "first_task_launched_time") self.completion_time = get_prop(attributes, "completion_time") self.failure_reason = get_prop(attributes, "failure_reason") self.input_bytes = get_prop(attributes, "input_bytes") self.input_records = get_prop(attributes, "input_records") self.output_bytes = get_prop(attributes, "output_bytes") self.output_records = get_prop(attributes, "output_records") self.shuffle_read_bytes = get_prop(attributes, "shuffle_read_bytes") self.shuffle_read_records = get_prop(attributes, "shuffle_read_records") self.shuffle_write_bytes = get_prop(attributes, "shuffle_write_bytes") self.shuffle_write_records = get_prop(attributes, "shuffle_write_records") self.memory_bytes_spilled = get_prop(attributes, "memory_bytes_spilled") self.disk_bytes_spilled = get_prop(attributes, "disk_bytes_spilled") self.name = get_prop(attributes, "name") self.details = get_prop(attributes, "details") self.scheduling_pool = get_prop(attributes, "scheduling_pool") self.rdd_ids = get_prop(attributes, "rdd_ids") self.accumulator_updates = get_prop(attributes, "accumulator_updates") self.killed_tasks_summary = get_prop(attributes, "killed_tasks_summary")
def get_attributes(stage_key, task, app_id): """ Get task entity attributes as a key-value dict :param stage_key: stage key (string) :param task: task data (json) :param app_id: application id (string) :return: dict (attribute: value) """ return { 'task_key': f"{stage_key}_{task['taskId']}", 'stage_key': stage_key, 'task_id': task['taskId'], 'index': task['index'], 'attempt': task['attempt'], 'launch_time': task['launchTime'], 'duration': task['duration'], 'executor_key': f"{app_id}_{task['executorId']}", 'host': task['host'], 'status': task['status'], 'error_message': get_prop(task, 'errorMessage'), 'task_locality': task['taskLocality'], 'speculative': task['speculative'], 'accumulator_updates': task['accumulatorUpdates'], 'executor_deserialize_time': get_prop(task, 'taskMetrics', 'executorDeserializeTime'), 'executor_deserialize_cpu_time': get_prop(task, 'taskMetrics', 'executorDeserializeCpuTime'), 'executor_run_time': get_prop(task, 'taskMetrics', 'executorRunTime'), 'executor_cpu_time': get_prop(task, 'taskMetrics', 'executorCpuTime'), 'result_size': get_prop(task, 'taskMetrics', 'resultSize'), 'jvm_gc_time': get_prop(task, 'taskMetrics', 'jvmGcTime'), 'result_serialization_time': get_prop(task, 'taskMetrics', 'resultSerializationTime'), 'memory_bytes_spilled': get_prop(task, 'taskMetrics', 'memoryBytesSpilled'), 'disk_bytes_spilled': get_prop(task, 'taskMetrics', 'diskBytesSpilled'), 'peak_execution_memory': get_prop(task, 'taskMetrics', 'peakExecutionMemory'), 'bytes_read': get_prop(task, 'taskMetrics', 'inputMetrics', 'bytesRead'), 'records_read': get_prop(task, 'taskMetrics', 'inputMetrics', 'recordsRead'), 'bytes_written': get_prop(task, 'taskMetrics', 'outputMetrics', 'bytesWritten'), 'records_written': get_prop(task, 'taskMetrics', 'outputMetrics', 'recordsWritten'), 'shuffle_remote_blocks_fetched': get_prop(task, 'taskMetrics', 'shuffleReadMetrics', 'remoteBlocksFetched'), 'shuffle_local_blocks_fetched': get_prop(task, 'taskMetrics', 'shuffleReadMetrics', 'localBlocksFetched'), 'shuffle_fetch_wait_time': get_prop(task, 'taskMetrics', 'shuffleReadMetrics', 'fetchWaitTime'), 'shuffle_remote_bytes_read': get_prop(task, 'taskMetrics', 'shuffleReadMetrics', 'remoteBytesRead'), 'shuffle_remote_bytes_read_to_disk': get_prop(task, 'taskMetrics', 'shuffleReadMetrics', 'remoteBytesReadToDisk'), 'shuffle_local_bytes_read': get_prop(task, 'taskMetrics', 'shuffleReadMetrics', 'localBytesRead'), 'shuffle_records_read': get_prop(task, 'taskMetrics', 'shuffleReadMetrics', 'recordsRead'), 'shuffle_bytes_written': get_prop(task, 'taskMetrics', 'shuffleWriteMetrics', 'bytesWritten'), 'shuffle_write_time': get_prop(task, 'taskMetrics', 'shuffleWriteMetrics', 'writeTime'), 'shuffle_records_written': get_prop(task, 'taskMetrics', 'shuffleWriteMetrics', 'recordsWritten') }
def __init__(self, attributes): """ Create an Executor object. :param attributes: dictionary {name: value} containing the attributes """ self.task_key = get_prop(attributes, 'task_key') self.stage_key = get_prop(attributes, 'stage_key') self.task_id = get_prop(attributes, 'task_id') self.index = get_prop(attributes, 'index') self.attempt = get_prop(attributes, 'attempt') self.launch_time = get_prop(attributes, 'launch_time') self.duration = get_prop(attributes, 'duration') self.executor_key = get_prop(attributes, 'executor_key') self.host = get_prop(attributes, 'host') self.status = get_prop(attributes, 'status') self.error_message = get_prop(attributes, 'error_message') self.task_locality = get_prop(attributes, 'task_locality') self.speculative = get_prop(attributes, 'speculative') self.accumulator_updates = get_prop(attributes, 'accumulator_updates') self.executor_deserialize_time = get_prop(attributes, 'executor_deserialize_time') self.executor_deserialize_cpu_time = get_prop( attributes, 'executor_deserialize_cpu_time') self.executor_run_time = get_prop(attributes, 'executor_run_time') self.executor_cpu_time = get_prop(attributes, 'executor_cpu_time') self.result_size = get_prop(attributes, 'result_size') self.jvm_gc_time = get_prop(attributes, 'jvm_gc_time') self.result_serialization_time = get_prop(attributes, 'result_serialization_time') self.memory_bytes_spilled = get_prop(attributes, 'memory_bytes_spilled') self.disk_bytes_spilled = get_prop(attributes, 'disk_bytes_spilled') self.peak_execution_memory = get_prop(attributes, 'peak_execution_memory') self.bytes_read = get_prop(attributes, 'bytes_read') self.records_read = get_prop(attributes, 'records_read') self.bytes_written = get_prop(attributes, 'bytes_written') self.records_written = get_prop(attributes, 'records_written') self.shuffle_remote_blocks_fetched = get_prop( attributes, 'shuffle_remote_blocks_fetched') self.shuffle_local_blocks_fetched = get_prop( attributes, 'shuffle_local_blocks_fetched') self.shuffle_fetch_wait_time = get_prop(attributes, 'shuffle_fetch_wait_time') self.shuffle_remote_bytes_read = get_prop(attributes, 'shuffle_remote_bytes_read') self.shuffle_remote_bytes_read_to_disk = get_prop( attributes, 'shuffle_remote_bytes_read_to_disk') self.shuffle_local_bytes_read = get_prop(attributes, 'shuffle_local_bytes_read') self.shuffle_records_read = get_prop(attributes, 'shuffle_records_read') self.shuffle_bytes_written = get_prop(attributes, 'shuffle_bytes_written') self.shuffle_write_time = get_prop(attributes, 'shuffle_write_time') self.shuffle_records_written = get_prop(attributes, 'shuffle_records_written')
def __init__(self, attributes): """ Create an Executor object. :param attributes: dictionary {name: value} containing the attributes """ self.executor_key = get_prop(attributes, "executor_key") self.app_id = get_prop(attributes, "app_id") self.id = get_prop(attributes, "id") self.host_port = get_prop(attributes, "host_port") self.is_active = get_prop(attributes, "is_active") self.rdd_blocks = get_prop(attributes, "rdd_blocks") self.memory_used = get_prop(attributes, "memory_used") self.disk_used = get_prop(attributes, "disk_used") self.total_cores = get_prop(attributes, "total_cores") self.max_tasks = get_prop(attributes, "max_tasks") self.active_tasks = get_prop(attributes, "active_tasks") self.failed_tasks = get_prop(attributes, "failed_tasks") self.total_duration = get_prop(attributes, "total_duration") self.total_gc_time = get_prop(attributes, "total_gc_time") self.total_input_bytes = get_prop(attributes, "total_input_bytes") self.total_shuffle_read = get_prop(attributes, "total_shuffle_read") self.total_shuffle_write = get_prop(attributes, "total_shuffle_write") self.is_blacklisted = get_prop(attributes, "is_blacklisted") self.max_memory = get_prop(attributes, "max_memory") self.add_time = get_prop(attributes, "add_time") self.remove_time = get_prop(attributes, "remove_time") self.remove_reason = get_prop(attributes, "remove_reason") self.executor_stdout_log = get_prop(attributes, "executor_stdout_log") self.executor_stderr_log = get_prop(attributes, "executor_stderr_log") self.used_on_heap_storage_memory = get_prop(attributes, "used_on_heap_storage_memory") self.used_off_heap_storage_memory = get_prop(attributes, "used_off_heap_storage_memory") self.total_on_heap_storage_memory = get_prop(attributes, "total_on_heap_storage_memory") self.total_off_heap_storage_memory = get_prop(attributes, "total_off_heap_storage_memory") self.blacklisted_in_stages = get_prop(attributes, "blacklisted_in_stages")